From 8e1ebf29b21b0cb62216fdb7694beeb26dfd4e14 Mon Sep 17 00:00:00 2001 From: 1600822305 <1600822305@qq.com> Date: Fri, 11 Apr 2025 00:43:13 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86=20TTS=20=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=E6=9C=8D=E5=8A=A1=E5=B9=B6=E6=9B=B4=E6=96=B0=E4=BA=86?= =?UTF-8?q?=E8=AE=BE=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 1 + packages/shared/IpcChannel.ts | 4 + src/main/index.ts | 4 + src/main/ipc.ts | 9 + src/main/services/FileService.ts | 9 +- src/main/services/MsEdgeTTSService.ts | 93 ++ src/main/services/MsTTSIpcHandler.ts | 18 + src/main/services/MsTTSService.ts | 236 ++++ src/preload/index.ts | 7 +- src/renderer/src/components/TTSButton.tsx | 1 + src/renderer/src/i18n/locales/en-us.json | 28 +- src/renderer/src/i18n/locales/zh-cn.json | 30 +- .../pages/home/Messages/MessageMenubar.tsx | 5 +- .../src/pages/home/Messages/TTSStopButton.tsx | 2 +- .../settings/TTSSettings/ASRSettings.tsx | 1 - .../settings/TTSSettings/TTSSettings.tsx | 405 ++++-- src/renderer/src/services/TTSService.ts | 1197 +---------------- .../src/services/tts/EdgeTTSService.ts | 270 ++++ src/renderer/src/services/tts/MsTTSService.ts | 58 + .../src/services/tts/OpenAITTSService.ts | 92 ++ .../src/services/tts/SiliconflowTTSService.ts | 116 ++ src/renderer/src/services/tts/TTSService.ts | 240 ++++ .../src/services/tts/TTSServiceFactory.ts | 70 + .../src/services/tts/TTSServiceInterface.ts | 12 + .../src/services/tts/TTSTextFilter.ts | 148 ++ src/renderer/src/services/tts/index.ts | 7 + src/renderer/src/store/settings.ts | 56 +- yarn.lock | 16 +- 28 files changed, 1859 insertions(+), 1276 deletions(-) create mode 100644 src/main/services/MsEdgeTTSService.ts create mode 100644 src/main/services/MsTTSIpcHandler.ts create mode 100644 src/main/services/MsTTSService.ts create mode 100644 src/renderer/src/services/tts/EdgeTTSService.ts create mode 100644 src/renderer/src/services/tts/MsTTSService.ts create mode 100644 src/renderer/src/services/tts/OpenAITTSService.ts create mode 100644 src/renderer/src/services/tts/SiliconflowTTSService.ts create mode 100644 src/renderer/src/services/tts/TTSService.ts create mode 100644 src/renderer/src/services/tts/TTSServiceFactory.ts create mode 100644 src/renderer/src/services/tts/TTSServiceInterface.ts create mode 100644 src/renderer/src/services/tts/TTSTextFilter.ts create mode 100644 src/renderer/src/services/tts/index.ts diff --git a/package.json b/package.json index 36002abd87..406960c2c1 100644 --- a/package.json +++ b/package.json @@ -87,6 +87,7 @@ "got-scraping": "^4.1.1", "jsdom": "^26.0.0", "markdown-it": "^14.1.0", + "node-edge-tts": "^1.2.8", "officeparser": "^4.1.1", "proxy-agent": "^6.5.0", "tar": "^7.4.3", diff --git a/packages/shared/IpcChannel.ts b/packages/shared/IpcChannel.ts index 0946491bb6..5975737b0c 100644 --- a/packages/shared/IpcChannel.ts +++ b/packages/shared/IpcChannel.ts @@ -22,6 +22,10 @@ export enum IpcChannel { Asr_StartServer = 'start-asr-server', Asr_StopServer = 'stop-asr-server', + // MsTTS + MsTTS_GetVoices = 'mstts:get-voices', + MsTTS_Synthesize = 'mstts:synthesize', + // Open Open_Path = 'open:path', Open_Website = 'open:website', diff --git a/src/main/index.ts b/src/main/index.ts index 816695bb2d..127518b648 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -7,6 +7,7 @@ import installExtension, { REACT_DEVELOPER_TOOLS, REDUX_DEVTOOLS } from 'electro import { registerIpc } from './ipc' import { configManager } from './services/ConfigManager' import { CHERRY_STUDIO_PROTOCOL, handleProtocolUrl, registerProtocolClient } from './services/ProtocolClient' +import { registerMsTTSIpcHandlers } from './services/MsTTSIpcHandler' import { registerShortcuts } from './services/ShortcutService' import { TrayService } from './services/TrayService' import { windowService } from './services/WindowService' @@ -46,6 +47,9 @@ if (!app.requestSingleInstanceLock()) { registerIpc(mainWindow, app) + // 注册MsTTS IPC处理程序 + registerMsTTSIpcHandlers() + replaceDevtoolsFont(mainWindow) if (process.env.NODE_ENV === 'development') { diff --git a/src/main/ipc.ts b/src/main/ipc.ts index 1bd7318dbd..f2df60fe0e 100644 --- a/src/main/ipc.ts +++ b/src/main/ipc.ts @@ -23,6 +23,7 @@ import ObsidianVaultService from './services/ObsidianVaultService' import { ProxyConfig, proxyManager } from './services/ProxyManager' import { asrServerService } from './services/ASRServerService' import { searchService } from './services/SearchService' +import * as MsTTSService from './services/MsTTSService' import { registerShortcuts, unregisterAllShortcuts } from './services/ShortcutService' import { TrayService } from './services/TrayService' import { windowService } from './services/WindowService' @@ -309,4 +310,12 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) { // 注册ASR服务器IPC处理程序 asrServerService.registerIpcHandlers() + + // 注册MsTTS IPC处理程序 + ipcMain.handle(IpcChannel.MsTTS_GetVoices, MsTTSService.getVoices) + ipcMain.handle( + IpcChannel.MsTTS_Synthesize, + (_, text: string, voice: string, outputFormat: string) => + MsTTSService.synthesize(text, voice, outputFormat) + ) } diff --git a/src/main/services/FileService.ts b/src/main/services/FileService.ts index 39255e15f7..01837fe382 100644 --- a/src/main/services/FileService.ts +++ b/src/main/services/FileService.ts @@ -1,7 +1,12 @@ import fs from 'node:fs' export default class FileService { - public static async readFile(_: Electron.IpcMainInvokeEvent, path: string) { - return fs.readFileSync(path, 'utf8') + public static async readFile(_: Electron.IpcMainInvokeEvent, path: string, encoding?: BufferEncoding) { + // 如果指定了编码,则返回字符串,否则返回二进制数据 + if (encoding) { + return fs.readFileSync(path, encoding) + } else { + return fs.readFileSync(path) + } } } diff --git a/src/main/services/MsEdgeTTSService.ts b/src/main/services/MsEdgeTTSService.ts new file mode 100644 index 0000000000..fdf31e2edb --- /dev/null +++ b/src/main/services/MsEdgeTTSService.ts @@ -0,0 +1,93 @@ +import { EdgeTTS } from 'node-edge-tts'; +import fs from 'node:fs'; +import path from 'node:path'; +import { app } from 'electron'; +import log from 'electron-log'; + +/** + * Microsoft Edge TTS服务 + * 使用Microsoft Edge的在线TTS服务,不需要API密钥 + */ +class MsEdgeTTSService { + private static instance: MsEdgeTTSService; + private tts: EdgeTTS; + private tempDir: string; + + private constructor() { + this.tts = new EdgeTTS(); + this.tempDir = path.join(app.getPath('temp'), 'cherry-tts'); + + // 确保临时目录存在 + if (!fs.existsSync(this.tempDir)) { + fs.mkdirSync(this.tempDir, { recursive: true }); + } + } + + /** + * 获取单例实例 + */ + public static getInstance(): MsEdgeTTSService { + if (!MsEdgeTTSService.instance) { + MsEdgeTTSService.instance = new MsEdgeTTSService(); + } + return MsEdgeTTSService.instance; + } + + /** + * 获取可用的语音列表 + * @returns 语音列表 + */ + public async getVoices(): Promise { + try { + // 返回预定义的中文语音列表 + return [ + { name: 'zh-CN-XiaoxiaoNeural', locale: 'zh-CN', gender: 'Female' }, + { name: 'zh-CN-YunxiNeural', locale: 'zh-CN', gender: 'Male' }, + { name: 'zh-CN-YunyangNeural', locale: 'zh-CN', gender: 'Male' }, + { name: 'zh-CN-XiaohanNeural', locale: 'zh-CN', gender: 'Female' }, + { name: 'zh-CN-XiaomoNeural', locale: 'zh-CN', gender: 'Female' }, + { name: 'zh-CN-XiaoxuanNeural', locale: 'zh-CN', gender: 'Female' }, + { name: 'zh-CN-XiaoruiNeural', locale: 'zh-CN', gender: 'Female' }, + { name: 'zh-CN-YunfengNeural', locale: 'zh-CN', gender: 'Male' }, + ]; + } catch (error) { + log.error('获取Microsoft Edge TTS语音列表失败:', error); + throw error; + } + } + + /** + * 合成语音 + * @param text 要合成的文本 + * @param voice 语音 + * @param outputFormat 输出格式 + * @returns 音频文件路径 + */ + public async synthesize(text: string, voice: string, outputFormat: string): Promise { + try { + // 设置TTS参数 + await this.tts.setMetadata(voice, outputFormat); + + // 生成临时文件路径 + const timestamp = Date.now(); + const outputPath = path.join(this.tempDir, `tts_${timestamp}.mp3`); + + // 合成语音 + await this.tts.toFile(outputPath, text); + + return outputPath; + } catch (error) { + log.error('Microsoft Edge TTS语音合成失败:', error); + throw error; + } + } +} + +// 导出单例方法 +export const getVoices = async () => { + return await MsEdgeTTSService.getInstance().getVoices(); +}; + +export const synthesize = async (text: string, voice: string, outputFormat: string) => { + return await MsEdgeTTSService.getInstance().synthesize(text, voice, outputFormat); +}; diff --git a/src/main/services/MsTTSIpcHandler.ts b/src/main/services/MsTTSIpcHandler.ts new file mode 100644 index 0000000000..24a31f8e23 --- /dev/null +++ b/src/main/services/MsTTSIpcHandler.ts @@ -0,0 +1,18 @@ +import { IpcChannel } from '@shared/IpcChannel'; +import { ipcMain } from 'electron'; +import * as MsTTSService from './MsTTSService'; + +/** + * 注册MsTTS相关的IPC处理程序 + */ +export function registerMsTTSIpcHandlers(): void { + // 获取可用的语音列表 + ipcMain.handle(IpcChannel.MsTTS_GetVoices, MsTTSService.getVoices); + + // 合成语音 + ipcMain.handle( + IpcChannel.MsTTS_Synthesize, + (_, text: string, voice: string, outputFormat: string) => + MsTTSService.synthesize(text, voice, outputFormat) + ); +} diff --git a/src/main/services/MsTTSService.ts b/src/main/services/MsTTSService.ts new file mode 100644 index 0000000000..1d7fcf42f3 --- /dev/null +++ b/src/main/services/MsTTSService.ts @@ -0,0 +1,236 @@ +import { EdgeTTS } from 'node-edge-tts'; // listVoices is no longer needed here +import fs from 'node:fs'; +import path from 'node:path'; +import { app } from 'electron'; +import log from 'electron-log'; + +// --- START OF HARDCODED VOICE LIST --- +// WARNING: This list is static and may become outdated. +// It's generally recommended to use listVoices() for the most up-to-date list. +const hardcodedVoices = [ + { Name: 'Microsoft Server Speech Text to Speech Voice (af-ZA, AdriNeural)', ShortName: 'af-ZA-AdriNeural', Gender: 'Female', Locale: 'af-ZA' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (am-ET, MekdesNeural)', ShortName: 'am-ET-MekdesNeural', Gender: 'Female', Locale: 'am-ET' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, FatimaNeural)', ShortName: 'ar-AE-FatimaNeural', Gender: 'Female', Locale: 'ar-AE' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, HamdanNeural)', ShortName: 'ar-AE-HamdanNeural', Gender: 'Male', Locale: 'ar-AE' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, AliNeural)', ShortName: 'ar-BH-AliNeural', Gender: 'Male', Locale: 'ar-BH' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, LailaNeural)', ShortName: 'ar-BH-LailaNeural', Gender: 'Female', Locale: 'ar-BH' }, + // ... (Many other Arabic locales/voices) ... + { Name: 'Microsoft Server Speech Text to Speech Voice (ar-SA, ZariyahNeural)', ShortName: 'ar-SA-ZariyahNeural', Gender: 'Female', Locale: 'ar-SA' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BabekNeural)', ShortName: 'az-AZ-BabekNeural', Gender: 'Male', Locale: 'az-AZ' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BanuNeural)', ShortName: 'az-AZ-BanuNeural', Gender: 'Female', Locale: 'az-AZ' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, BorislavNeural)', ShortName: 'bg-BG-BorislavNeural', Gender: 'Male', Locale: 'bg-BG' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, KalinaNeural)', ShortName: 'bg-BG-KalinaNeural', Gender: 'Female', Locale: 'bg-BG' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, NabanitaNeural)', ShortName: 'bn-BD-NabanitaNeural', Gender: 'Female', Locale: 'bn-BD' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, PradeepNeural)', ShortName: 'bn-BD-PradeepNeural', Gender: 'Male', Locale: 'bn-BD' }, + // ... (Catalan, Czech, Welsh, Danish, German, Greek, English variants) ... + { Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, NatashaNeural)', ShortName: 'en-AU-NatashaNeural', Gender: 'Female', Locale: 'en-AU' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, WilliamNeural)', ShortName: 'en-AU-WilliamNeural', Gender: 'Male', Locale: 'en-AU' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, ClaraNeural)', ShortName: 'en-CA-ClaraNeural', Gender: 'Female', Locale: 'en-CA' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, LiamNeural)', ShortName: 'en-CA-LiamNeural', Gender: 'Male', Locale: 'en-CA' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, LibbyNeural)', ShortName: 'en-GB-LibbyNeural', Gender: 'Female', Locale: 'en-GB' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, MaisieNeural)', ShortName: 'en-GB-MaisieNeural', Gender: 'Female', Locale: 'en-GB' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, RyanNeural)', ShortName: 'en-GB-RyanNeural', Gender: 'Male', Locale: 'en-GB' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, SoniaNeural)', ShortName: 'en-GB-SoniaNeural', Gender: 'Female', Locale: 'en-GB' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, ThomasNeural)', ShortName: 'en-GB-ThomasNeural', Gender: 'Male', Locale: 'en-GB' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, SamNeural)', ShortName: 'en-HK-SamNeural', Gender: 'Male', Locale: 'en-HK' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, YanNeural)', ShortName: 'en-HK-YanNeural', Gender: 'Female', Locale: 'en-HK' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, ConnorNeural)', ShortName: 'en-IE-ConnorNeural', Gender: 'Male', Locale: 'en-IE' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, EmilyNeural)', ShortName: 'en-IE-EmilyNeural', Gender: 'Female', Locale: 'en-IE' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, NeerjaNeural)', ShortName: 'en-IN-NeerjaNeural', Gender: 'Female', Locale: 'en-IN' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, PrabhatNeural)', ShortName: 'en-IN-PrabhatNeural', Gender: 'Male', Locale: 'en-IN' }, + // ... (Many more English variants: KE, NG, NZ, PH, SG, TZ, US, ZA) ... + { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)', ShortName: 'en-US-AriaNeural', Gender: 'Female', Locale: 'en-US' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AnaNeural)', ShortName: 'en-US-AnaNeural', Gender: 'Female', Locale: 'en-US' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, ChristopherNeural)', ShortName: 'en-US-ChristopherNeural', Gender: 'Male', Locale: 'en-US' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, EricNeural)', ShortName: 'en-US-EricNeural', Gender: 'Male', Locale: 'en-US' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, GuyNeural)', ShortName: 'en-US-GuyNeural', Gender: 'Male', Locale: 'en-US' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, JennyNeural)', ShortName: 'en-US-JennyNeural', Gender: 'Female', Locale: 'en-US' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, MichelleNeural)', ShortName: 'en-US-MichelleNeural', Gender: 'Female', Locale: 'en-US' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, RogerNeural)', ShortName: 'en-US-RogerNeural', Gender: 'Male', Locale: 'en-US' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, SteffanNeural)', ShortName: 'en-US-SteffanNeural', Gender: 'Male', Locale: 'en-US' }, + // ... (Spanish variants) ... + { Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, DaliaNeural)', ShortName: 'es-MX-DaliaNeural', Gender: 'Female', Locale: 'es-MX' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, JorgeNeural)', ShortName: 'es-MX-JorgeNeural', Gender: 'Male', Locale: 'es-MX' }, + // ... (Estonian, Basque, Persian, Finnish, Filipino, French, Irish, Galician, Gujarati, Hebrew, Hindi, Croatian, Hungarian, Indonesian, Icelandic, Italian, Japanese) ... + { Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, KeitaNeural)', ShortName: 'ja-JP-KeitaNeural', Gender: 'Male', Locale: 'ja-JP' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, NanamiNeural)', ShortName: 'ja-JP-NanamiNeural', Gender: 'Female', Locale: 'ja-JP' }, + // ... (Javanese, Georgian, Kazakh, Khmer, Kannada, Korean) ... + { Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, InJoonNeural)', ShortName: 'ko-KR-InJoonNeural', Gender: 'Male', Locale: 'ko-KR' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, SunHiNeural)', ShortName: 'ko-KR-SunHiNeural', Gender: 'Female', Locale: 'ko-KR' }, + // ... (Lao, Lithuanian, Latvian, Macedonian, Malayalam, Mongolian, Marathi, Malay, Maltese, Burmese, Norwegian, Dutch, Polish, Pashto, Portuguese) ... + { Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, AntonioNeural)', ShortName: 'pt-BR-AntonioNeural', Gender: 'Male', Locale: 'pt-BR' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, FranciscaNeural)', ShortName: 'pt-BR-FranciscaNeural', Gender: 'Female', Locale: 'pt-BR' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, DuarteNeural)', ShortName: 'pt-PT-DuarteNeural', Gender: 'Male', Locale: 'pt-PT' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, RaquelNeural)', ShortName: 'pt-PT-RaquelNeural', Gender: 'Female', Locale: 'pt-PT' }, + // ... (Romanian, Russian, Sinhala, Slovak, Slovenian, Somali, Albanian, Serbian, Sundanese, Swedish, Swahili, Tamil, Telugu, Thai) ... + { Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, NiwatNeural)', ShortName: 'th-TH-NiwatNeural', Gender: 'Male', Locale: 'th-TH' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, PremwadeeNeural)', ShortName: 'th-TH-PremwadeeNeural', Gender: 'Female', Locale: 'th-TH' }, + // ... (Turkish, Ukrainian, Urdu, Uzbek, Vietnamese) ... + { Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, HoaiMyNeural)', ShortName: 'vi-VN-HoaiMyNeural', Gender: 'Female', Locale: 'vi-VN' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, NamMinhNeural)', ShortName: 'vi-VN-NamMinhNeural', Gender: 'Male', Locale: 'vi-VN' }, + // ... (Chinese variants) ... + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)', ShortName: 'zh-CN-XiaoxiaoNeural', Gender: 'Female', Locale: 'zh-CN' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiNeural)', ShortName: 'zh-CN-YunxiNeural', Gender: 'Male', Locale: 'zh-CN' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunjianNeural)', ShortName: 'zh-CN-YunjianNeural', Gender: 'Male', Locale: 'zh-CN' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiaNeural)', ShortName: 'zh-CN-YunxiaNeural', Gender: 'Male', Locale: 'zh-CN' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunyangNeural)', ShortName: 'zh-CN-YunyangNeural', Gender: 'Male', Locale: 'zh-CN' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN-liaoning, XiaobeiNeural)', ShortName: 'zh-CN-liaoning-XiaobeiNeural', Gender: 'Female', Locale: 'zh-CN-liaoning' }, + // { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN-shaanxi, XiaoniNeural)', ShortName: 'zh-CN-shaanxi-XiaoniNeural', Gender: 'Female', Locale: 'zh-CN-shaanxi' }, // Example regional voice + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuGaaiNeural)', ShortName: 'zh-HK-HiuGaaiNeural', Gender: 'Female', Locale: 'zh-HK' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuMaanNeural)', ShortName: 'zh-HK-HiuMaanNeural', Gender: 'Female', Locale: 'zh-HK' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, WanLungNeural)', ShortName: 'zh-HK-WanLungNeural', Gender: 'Male', Locale: 'zh-HK' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoChenNeural)', ShortName: 'zh-TW-HsiaoChenNeural', Gender: 'Female', Locale: 'zh-TW' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoYuNeural)', ShortName: 'zh-TW-HsiaoYuNeural', Gender: 'Female', Locale: 'zh-TW' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, YunJheNeural)', ShortName: 'zh-TW-YunJheNeural', Gender: 'Male', Locale: 'zh-TW' }, + // ... (Zulu) ... + { Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThandoNeural)', ShortName: 'zu-ZA-ThandoNeural', Gender: 'Female', Locale: 'zu-ZA' }, + { Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThembaNeural)', ShortName: 'zu-ZA-ThembaNeural', Gender: 'Male', Locale: 'zu-ZA' }, +]; +// --- END OF HARDCODED VOICE LIST --- + + +/** + * 免费在线TTS服务 + * 使用免费的在线TTS服务,不需要API密钥 + */ +class MsTTSService { + private static instance: MsTTSService; + private tempDir: string; + + private constructor() { + this.tempDir = path.join(app.getPath('temp'), 'cherry-tts'); + if (!fs.existsSync(this.tempDir)) { + fs.mkdirSync(this.tempDir, { recursive: true }); + } + log.info('初始化免费在线TTS服务 (使用硬编码语音列表)'); + } + + public static getInstance(): MsTTSService { + if (!MsTTSService.instance) { + MsTTSService.instance = new MsTTSService(); + } + return MsTTSService.instance; + } + + /** + * 获取可用的语音列表 (返回硬编码列表) + * @returns 语音列表 + */ + public async getVoices(): Promise { + try { + log.info(`返回硬编码的 ${hardcodedVoices.length} 个语音列表`); + // 直接返回硬编码的列表 + // 注意:保持 async 是为了接口兼容性,虽然这里没有实际的异步操作 + return hardcodedVoices; + } catch (error) { + // 这个 try/catch 在这里意义不大了,因为返回静态数据不会出错 + // 但保留结构以防未来改动 + log.error('获取硬编码语音列表时出错 (理论上不应发生):', error); + return []; // 返回空列表以防万一 + } + } + + /** + * 合成语音 + * @param text 要合成的文本 + * @param voice 语音的 ShortName (例如 'zh-CN-XiaoxiaoNeural') + * @param outputFormat 输出格式 (例如 'audio-24khz-48kbitrate-mono-mp3') + * @returns 音频文件路径 + */ + public async synthesize(text: string, voice: string, outputFormat: string): Promise { + try { + // 记录详细的请求信息 + log.info(`微软在线TTS合成语音: 文本="${text.substring(0, 30)}...", 语音=${voice}, 格式=${outputFormat}`); + + // 验证输入参数 + if (!text || text.trim() === '') { + throw new Error('要合成的文本不能为空'); + } + + if (!voice || voice.trim() === '') { + throw new Error('语音名称不能为空'); + } + + // 创建一个新的EdgeTTS实例,并设置参数 + // 添加超时设置,默认为30秒 + const tts = new EdgeTTS({ + voice: voice, + outputFormat: outputFormat, + timeout: 30000, // 30秒超时 + rate: '+0%', // 正常语速 + pitch: '+0Hz', // 正常音调 + volume: '+0%' // 正常音量 + }); + + // 生成临时文件路径 + const timestamp = Date.now(); + const fileExtension = outputFormat.includes('mp3') ? 'mp3' : outputFormat.split('-').pop() || 'audio'; + const outputPath = path.join(this.tempDir, `tts_${timestamp}.${fileExtension}`); + + log.info(`开始生成语音文件: ${outputPath}`); + + // 使用ttsPromise方法生成文件 + await tts.ttsPromise(text, outputPath); + + // 验证生成的文件是否存在且大小大于0 + if (!fs.existsSync(outputPath)) { + throw new Error(`生成的语音文件不存在: ${outputPath}`); + } + + const stats = fs.statSync(outputPath); + if (stats.size === 0) { + throw new Error(`生成的语音文件大小为0: ${outputPath}`); + } + + log.info(`微软在线TTS合成成功: ${outputPath}, 文件大小: ${stats.size} 字节`); + return outputPath; + } catch (error: any) { + // 记录详细的错误信息 + log.error(`微软在线TTS语音合成失败 (语音=${voice}):`, error); + + // 尝试提供更有用的错误信息 + if (error.message && typeof error.message === 'string') { + if (error.message.includes('Timed out')) { + throw new Error(`语音合成超时,请检查网络连接或尝试其他语音`); + } else if (error.message.includes('ENOTFOUND')) { + throw new Error(`无法连接到微软语音服务,请检查网络连接`); + } else if (error.message.includes('ECONNREFUSED')) { + throw new Error(`连接被拒绝,请检查网络设置或代理配置`); + } + } + + throw error; + } + } + + /** + * (可选) 清理临时文件目录 + */ + public async cleanupTempDir(): Promise { + // (Cleanup method remains the same) + try { + const files = await fs.promises.readdir(this.tempDir); + for (const file of files) { + if (file.startsWith('tts_')) { + await fs.promises.unlink(path.join(this.tempDir, file)); + } + } + log.info('TTS 临时文件已清理'); + } catch (error) { + log.error('清理 TTS 临时文件失败:', error); + } + } +} + +// 导出单例方法 (保持不变) +export const getVoices = async () => { + return await MsTTSService.getInstance().getVoices(); +}; + +export const synthesize = async (text: string, voice: string, outputFormat: string) => { + return await MsTTSService.getInstance().synthesize(text, voice, outputFormat); +}; + +export const cleanupTtsTempFiles = async () => { + await MsTTSService.getInstance().cleanupTempDir(); +}; \ No newline at end of file diff --git a/src/preload/index.ts b/src/preload/index.ts index b2b72f5b71..50b922c917 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -64,7 +64,7 @@ const api = { binaryFile: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_BinaryFile, fileId) }, fs: { - read: (path: string) => ipcRenderer.invoke(IpcChannel.Fs_Read, path) + read: (path: string, encoding?: BufferEncoding) => ipcRenderer.invoke(IpcChannel.Fs_Read, path, encoding) }, export: { toWord: (markdown: string, fileName: string) => ipcRenderer.invoke(IpcChannel.Export_Word, markdown, fileName) @@ -119,6 +119,11 @@ const api = { toggle: () => ipcRenderer.invoke(IpcChannel.MiniWindow_Toggle), setPin: (isPinned: boolean) => ipcRenderer.invoke(IpcChannel.MiniWindow_SetPin, isPinned) }, + msTTS: { + getVoices: () => ipcRenderer.invoke(IpcChannel.MsTTS_GetVoices), + synthesize: (text: string, voice: string, outputFormat: string) => + ipcRenderer.invoke(IpcChannel.MsTTS_Synthesize, text, voice, outputFormat) + }, aes: { encrypt: (text: string, secretKey: string, iv: string) => ipcRenderer.invoke(IpcChannel.Aes_Encrypt, text, secretKey, iv), diff --git a/src/renderer/src/components/TTSButton.tsx b/src/renderer/src/components/TTSButton.tsx index 58e4c2039a..aa6592f372 100644 --- a/src/renderer/src/components/TTSButton.tsx +++ b/src/renderer/src/components/TTSButton.tsx @@ -23,6 +23,7 @@ const TTSButton: React.FC = ({ message, className }) => { setIsSpeaking(true) try { + console.log('点击TTS按钮,开始播放消息') await TTSService.speakFromMessage(message) // 监听播放结束 diff --git a/src/renderer/src/i18n/locales/en-us.json b/src/renderer/src/i18n/locales/en-us.json index c2f6aeb6c8..aa2686a43a 100644 --- a/src/renderer/src/i18n/locales/en-us.json +++ b/src/renderer/src/i18n/locales/en-us.json @@ -1350,8 +1350,21 @@ "service_type": "Service Type", "service_type.openai": "OpenAI", "service_type.edge": "Browser TTS", + "service_type.siliconflow": "SiliconFlow", "service_type.refresh": "Refresh TTS service type settings", "service_type.refreshed": "TTS service type settings refreshed", + "siliconflow_api_key": "SiliconFlow API Key", + "siliconflow_api_key.placeholder": "Enter SiliconFlow API key", + "siliconflow_api_url": "SiliconFlow API URL", + "siliconflow_api_url.placeholder": "Example: https://api.siliconflow.cn/v1/audio/speech", + "siliconflow_voice": "SiliconFlow Voice", + "siliconflow_voice.placeholder": "Select a voice", + "siliconflow_model": "SiliconFlow Model", + "siliconflow_model.placeholder": "Select a model", + "siliconflow_response_format": "Response Format", + "siliconflow_response_format.placeholder": "Default is mp3", + "siliconflow_speed": "Speech Speed", + "siliconflow_speed.placeholder": "Default is 1.0", "api_key": "API Key", "api_key.placeholder": "Enter OpenAI API key", "api_url": "API URL", @@ -1381,10 +1394,17 @@ "learn_more": "Learn more", "tab_title": "[to be translated]:语音合成", "error": { - "not_enabled": "[to be translated]:语音合成功能未启用", - "no_api_key": "[to be translated]:未设置API密钥", - "no_edge_voice": "[to be translated]:未选择浏览器 TTS音色", - "browser_not_support": "[to be translated]:浏览器不支持语音合成" + "not_enabled": "Text-to-speech feature is not enabled", + "no_api_key": "API key is not set", + "no_voice": "Voice is not selected", + "no_model": "Model is not selected", + "no_edge_voice": "Browser TTS voice is not selected", + "browser_not_support": "Browser does not support speech synthesis", + "synthesis_failed": "Speech synthesis failed", + "play_failed": "Speech playback failed", + "empty_text": "Text is empty", + "general": "An error occurred during speech synthesis", + "unsupported_service_type": "Unsupported service type: {{serviceType}}" } }, "asr": { diff --git a/src/renderer/src/i18n/locales/zh-cn.json b/src/renderer/src/i18n/locales/zh-cn.json index 160dcc55c9..08cb46fd05 100644 --- a/src/renderer/src/i18n/locales/zh-cn.json +++ b/src/renderer/src/i18n/locales/zh-cn.json @@ -1356,8 +1356,22 @@ "service_type": "服务类型", "service_type.openai": "OpenAI", "service_type.edge": "浏览器 TTS", + "service_type.siliconflow": "硅基流动", + "service_type.mstts": "免费在线 TTS", "service_type.refresh": "刷新TTS服务类型设置", "service_type.refreshed": "已刷新TTS服务类型设置", + "siliconflow_api_key": "硅基流动API密钥", + "siliconflow_api_key.placeholder": "请输入硅基流动API密钥", + "siliconflow_api_url": "硅基流动API地址", + "siliconflow_api_url.placeholder": "例如:https://api.siliconflow.cn/v1/audio/speech", + "siliconflow_voice": "硅基流动音色", + "siliconflow_voice.placeholder": "请选择音色", + "siliconflow_model": "硅基流动模型", + "siliconflow_model.placeholder": "请选择模型", + "siliconflow_response_format": "响应格式", + "siliconflow_response_format.placeholder": "默认为mp3", + "siliconflow_speed": "语速", + "siliconflow_speed.placeholder": "默认为1.0", "api_key": "API密钥", "api_key.placeholder": "请输入OpenAI API密钥", "api_url": "API地址", @@ -1366,6 +1380,13 @@ "edge_voice.loading": "加载中...", "edge_voice.refresh": "刷新可用音色列表", "edge_voice.not_found": "未找到匹配的音色", + "edge_voice.available_count": "可用语音: {{count}}个", + "edge_voice.refreshing": "正在刷新语音列表...", + "edge_voice.refreshed": "语音列表已刷新", + "mstts.voice": "免费在线 TTS音色", + "mstts.output_format": "输出格式", + "mstts.info": "免费在线TTS服务不需要API密钥,完全免费使用。", + "error.no_mstts_voice": "未设置免费在线 TTS音色", "voice": "音色", "voice.placeholder": "请选择音色", "voice_input_placeholder": "输入音色", @@ -1388,8 +1409,15 @@ "error": { "not_enabled": "语音合成功能未启用", "no_api_key": "未设置API密钥", + "no_voice": "未选择音色", + "no_model": "未选择模型", "no_edge_voice": "未选择浏览器 TTS音色", - "browser_not_support": "浏览器不支持语音合成" + "browser_not_support": "浏览器不支持语音合成", + "synthesis_failed": "语音合成失败", + "play_failed": "语音播放失败", + "empty_text": "文本为空", + "general": "语音合成出现错误", + "unsupported_service_type": "不支持的服务类型: {{serviceType}}" } }, "asr": { diff --git a/src/renderer/src/pages/home/Messages/MessageMenubar.tsx b/src/renderer/src/pages/home/Messages/MessageMenubar.tsx index e5189732da..006a1d495a 100644 --- a/src/renderer/src/pages/home/Messages/MessageMenubar.tsx +++ b/src/renderer/src/pages/home/Messages/MessageMenubar.tsx @@ -407,7 +407,10 @@ const MessageMenubar: FC = (props) => { )} {isAssistantMessage && ttsEnabled && ( - TTSService.speakFromMessage(message)}> + { + console.log('点击MessageMenubar中的TTS按钮,开始播放消息') + TTSService.speakFromMessage(message) + }}> diff --git a/src/renderer/src/pages/home/Messages/TTSStopButton.tsx b/src/renderer/src/pages/home/Messages/TTSStopButton.tsx index 82b6f921eb..f5065b2821 100644 --- a/src/renderer/src/pages/home/Messages/TTSStopButton.tsx +++ b/src/renderer/src/pages/home/Messages/TTSStopButton.tsx @@ -57,7 +57,7 @@ const TTSStopButton: React.FC = () => { const StopButtonContainer = styled.div` position: fixed; - bottom: 100px; + bottom: 150px; /* 从100px改为150px,向上移动50px */ right: 20px; z-index: 1000; ` diff --git a/src/renderer/src/pages/settings/TTSSettings/ASRSettings.tsx b/src/renderer/src/pages/settings/TTSSettings/ASRSettings.tsx index dc44dbb3e8..eb3e7f7e51 100644 --- a/src/renderer/src/pages/settings/TTSSettings/ASRSettings.tsx +++ b/src/renderer/src/pages/settings/TTSSettings/ASRSettings.tsx @@ -34,7 +34,6 @@ const ASRSettings: FC = () => { // 服务类型选项 const serviceTypeOptions = [ { label: 'OpenAI', value: 'openai' }, - { label: t('settings.asr.service_type.browser'), value: 'browser' }, { label: t('settings.asr.service_type.local'), value: 'local' } ] diff --git a/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx b/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx index 3e105bcf4f..4fea657369 100644 --- a/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx +++ b/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx @@ -15,10 +15,18 @@ import { setTtsFilterOptions, setTtsModel, setTtsServiceType, - setTtsVoice + setTtsVoice, + setTtsSiliconflowApiKey, + setTtsSiliconflowApiUrl, + setTtsSiliconflowVoice, + setTtsSiliconflowModel, + setTtsSiliconflowResponseFormat, + setTtsSiliconflowSpeed, + setTtsMsVoice, + setTtsMsOutputFormat } from '@renderer/store/settings' -import { Button, Form, Input, message, Select, Space, Switch, Tabs, Tag } from 'antd' -import { FC, useEffect, useState } from 'react' +import { Button, Form, Input, InputNumber, message, Select, Space, Switch, Tabs, Tag } from 'antd' +import { FC, useEffect, useState, useCallback } from 'react' import { useTranslation } from 'react-i18next' import { useSelector } from 'react-redux' import styled from 'styled-components' @@ -34,6 +42,36 @@ import { } from '..' import ASRSettings from './ASRSettings' +// 预定义的浏览器 TTS音色列表 +const PREDEFINED_VOICES = [ + { label: '小晓 (女声, 中文)', value: 'zh-CN-XiaoxiaoNeural' }, + { label: '云扬 (男声, 中文)', value: 'zh-CN-YunyangNeural' }, + { label: '晓晓 (女声, 中文)', value: 'zh-CN-XiaoxiaoNeural' }, + { label: '晓涵 (女声, 中文)', value: 'zh-CN-XiaohanNeural' }, + { label: '晓诗 (女声, 中文)', value: 'zh-CN-XiaoshuangNeural' }, + { label: '晓瑞 (女声, 中文)', value: 'zh-CN-XiaoruiNeural' }, + { label: '晓墨 (女声, 中文)', value: 'zh-CN-XiaomoNeural' }, + { label: '晓然 (男声, 中文)', value: 'zh-CN-XiaoranNeural' }, + { label: '晓坤 (男声, 中文)', value: 'zh-CN-XiaokunNeural' }, + { label: 'Aria (Female, English)', value: 'en-US-AriaNeural' }, + { label: 'Guy (Male, English)', value: 'en-US-GuyNeural' }, + { label: 'Jenny (Female, English)', value: 'en-US-JennyNeural' }, + { label: 'Ana (Female, Spanish)', value: 'es-ES-ElviraNeural' }, + { label: 'Ichiro (Male, Japanese)', value: 'ja-JP-KeitaNeural' }, + { label: 'Nanami (Female, Japanese)', value: 'ja-JP-NanamiNeural' }, + // 添加更多常用的语音 + { label: 'Microsoft David (en-US)', value: 'Microsoft David Desktop - English (United States)' }, + { label: 'Microsoft Zira (en-US)', value: 'Microsoft Zira Desktop - English (United States)' }, + { label: 'Microsoft Mark (en-US)', value: 'Microsoft Mark Online (Natural) - English (United States)' }, + { label: 'Microsoft Aria (en-US)', value: 'Microsoft Aria Online (Natural) - English (United States)' }, + { label: 'Google US English', value: 'Google US English' }, + { label: 'Google UK English Female', value: 'Google UK English Female' }, + { label: 'Google UK English Male', value: 'Google UK English Male' }, + { label: 'Google 日本語', value: 'Google 日本語' }, + { label: 'Google 普通话(中国大陆)', value: 'Google 普通话(中国大陆)' }, + { label: 'Google 粤語(香港)', value: 'Google 粤語(香港)' } +] + const CustomVoiceInput = styled.div` display: flex; flex-direction: column; @@ -81,6 +119,12 @@ const LoadingText = styled.div` color: #999; ` +const InfoText = styled.div` + margin-top: 8px; + font-size: 12px; + color: #888; +` + const VoiceSelectContainer = styled.div` display: flex; gap: 8px; @@ -93,25 +137,34 @@ const TTSSettings: FC = () => { const dispatch = useAppDispatch() // 从Redux获取TTS设置 - const ttsEnabled = useSelector((state: any) => state.settings.ttsEnabled) - const ttsServiceType = useSelector((state: any) => state.settings.ttsServiceType || 'openai') - const ttsApiKey = useSelector((state: any) => state.settings.ttsApiKey) - const ttsApiUrl = useSelector((state: any) => state.settings.ttsApiUrl) - const ttsVoice = useSelector((state: any) => state.settings.ttsVoice) - const ttsModel = useSelector((state: any) => state.settings.ttsModel) - const ttsEdgeVoice = useSelector((state: any) => state.settings.ttsEdgeVoice || 'zh-CN-XiaoxiaoNeural') - const ttsCustomVoices = useSelector((state: any) => state.settings.ttsCustomVoices || []) - const ttsCustomModels = useSelector((state: any) => state.settings.ttsCustomModels || []) - const ttsFilterOptions = useSelector( - (state: any) => - state.settings.ttsFilterOptions || { - filterThinkingProcess: true, - filterMarkdown: true, - filterCodeBlocks: true, - filterHtmlTags: true, - maxTextLength: 4000 - } - ) + const settings = useSelector((state: any) => state.settings) + const ttsEnabled = settings.ttsEnabled + const ttsServiceType = settings.ttsServiceType || 'openai' + const ttsApiKey = settings.ttsApiKey + const ttsApiUrl = settings.ttsApiUrl + const ttsVoice = settings.ttsVoice + const ttsModel = settings.ttsModel + const ttsEdgeVoice = settings.ttsEdgeVoice || 'zh-CN-XiaoxiaoNeural' + const ttsCustomVoices = settings.ttsCustomVoices || [] + const ttsCustomModels = settings.ttsCustomModels || [] + // 免费在线TTS设置 + const ttsMsVoice = settings.ttsMsVoice || 'zh-CN-XiaoxiaoNeural' + const ttsMsOutputFormat = settings.ttsMsOutputFormat || 'audio-24khz-48kbitrate-mono-mp3' + const ttsFilterOptions = settings.ttsFilterOptions || { + filterThinkingProcess: true, + filterMarkdown: true, + filterCodeBlocks: true, + filterHtmlTags: true, + maxTextLength: 4000 + } + + // 硅基流动TTS设置 + const ttsSiliconflowApiKey = settings.ttsSiliconflowApiKey + const ttsSiliconflowApiUrl = settings.ttsSiliconflowApiUrl + const ttsSiliconflowVoice = settings.ttsSiliconflowVoice + const ttsSiliconflowModel = settings.ttsSiliconflowModel + const ttsSiliconflowResponseFormat = settings.ttsSiliconflowResponseFormat + const ttsSiliconflowSpeed = settings.ttsSiliconflowSpeed // 新增自定义音色和模型的状态 const [newVoice, setNewVoice] = useState('') @@ -120,38 +173,51 @@ const TTSSettings: FC = () => { // 浏览器可用的语音列表 const [availableVoices, setAvailableVoices] = useState<{ label: string; value: string }[]>([]) - // 预定义的浏览器 TTS音色列表 - const predefinedVoices = [ - { label: '小晓 (女声, 中文)', value: 'zh-CN-XiaoxiaoNeural' }, - { label: '云扬 (男声, 中文)', value: 'zh-CN-YunyangNeural' }, - { label: '晓晓 (女声, 中文)', value: 'zh-CN-XiaoxiaoNeural' }, - { label: '晓涵 (女声, 中文)', value: 'zh-CN-XiaohanNeural' }, - { label: '晓诗 (女声, 中文)', value: 'zh-CN-XiaoshuangNeural' }, - { label: '晓瑞 (女声, 中文)', value: 'zh-CN-XiaoruiNeural' }, - { label: '晓墨 (女声, 中文)', value: 'zh-CN-XiaomoNeural' }, - { label: '晓然 (男声, 中文)', value: 'zh-CN-XiaoranNeural' }, - { label: '晓坤 (男声, 中文)', value: 'zh-CN-XiaokunNeural' }, - { label: 'Aria (Female, English)', value: 'en-US-AriaNeural' }, - { label: 'Guy (Male, English)', value: 'en-US-GuyNeural' }, - { label: 'Jenny (Female, English)', value: 'en-US-JennyNeural' }, - { label: 'Ana (Female, Spanish)', value: 'es-ES-ElviraNeural' }, - { label: 'Ichiro (Male, Japanese)', value: 'ja-JP-KeitaNeural' }, - { label: 'Nanami (Female, Japanese)', value: 'ja-JP-NanamiNeural' }, - // 添加更多常用的语音 - { label: 'Microsoft David (en-US)', value: 'Microsoft David Desktop - English (United States)' }, - { label: 'Microsoft Zira (en-US)', value: 'Microsoft Zira Desktop - English (United States)' }, - { label: 'Microsoft Mark (en-US)', value: 'Microsoft Mark Online (Natural) - English (United States)' }, - { label: 'Microsoft Aria (en-US)', value: 'Microsoft Aria Online (Natural) - English (United States)' }, - { label: 'Google US English', value: 'Google US English' }, - { label: 'Google UK English Female', value: 'Google UK English Female' }, - { label: 'Google UK English Male', value: 'Google UK English Male' }, - { label: 'Google 日本語', value: 'Google 日本語' }, - { label: 'Google 普通话(中国大陆)', value: 'Google 普通话(中国大陆)' }, - { label: 'Google 粤語(香港)', value: 'Google 粤語(香港)' } - ] + // 免费在线TTS可用的语音列表 + const [msTtsVoices, setMsTtsVoices] = useState<{ label: string; value: string }[]>([]) + + + + // 获取免费在线TTS可用的语音列表 + const getMsTtsVoices = useCallback(async () => { + try { + // 调用API获取免费在线TTS语音列表 + const response = await window.api.msTTS.getVoices(); + console.log('获取到的免费在线TTS语音列表:', response); + + // 转换为选项格式 + const voices = response.map((voice: any) => ({ + label: `${voice.ShortName} (${voice.Gender === 'Female' ? '女声' : '男声'})`, + value: voice.ShortName + })); + + // 按语言和性别排序 + voices.sort((a: any, b: any) => { + const localeA = a.value.split('-')[0] + a.value.split('-')[1]; + const localeB = b.value.split('-')[0] + b.value.split('-')[1]; + if (localeA !== localeB) return localeA.localeCompare(localeB); + return a.label.localeCompare(b.label); + }); + + setMsTtsVoices(voices); + } catch (error) { + console.error('获取免费在线TTS语音列表失败:', error); + // 如果获取失败,设置一些默认的中文语音 + setMsTtsVoices([ + { label: 'zh-CN-XiaoxiaoNeural (女声)', value: 'zh-CN-XiaoxiaoNeural' }, + { label: 'zh-CN-YunxiNeural (男声)', value: 'zh-CN-YunxiNeural' }, + { label: 'zh-CN-YunyangNeural (男声)', value: 'zh-CN-YunyangNeural' }, + { label: 'zh-CN-XiaohanNeural (女声)', value: 'zh-CN-XiaohanNeural' }, + { label: 'zh-CN-XiaomoNeural (女声)', value: 'zh-CN-XiaomoNeural' }, + { label: 'zh-CN-XiaoxuanNeural (女声)', value: 'zh-CN-XiaoxuanNeural' }, + { label: 'zh-CN-XiaoruiNeural (女声)', value: 'zh-CN-XiaoruiNeural' }, + { label: 'zh-CN-YunfengNeural (男声)', value: 'zh-CN-YunfengNeural' }, + ]); + } + }, []); // 获取浏览器可用的语音列表 - const getVoices = () => { + const getVoices = useCallback(() => { if (typeof window !== 'undefined' && 'speechSynthesis' in window) { // 先触发一下语音合成引擎,确保它已经初始化 window.speechSynthesis.cancel() @@ -170,18 +236,22 @@ const TTSSettings: FC = () => { })) // 添加语言信息到预定义语音 - const enhancedPredefinedVoices = predefinedVoices.map((voice) => ({ + const enhancedPredefinedVoices = PREDEFINED_VOICES.map((voice) => ({ ...voice, lang: voice.value.split('-').slice(0, 2).join('-'), isNative: false // 标记为非浏览器原生语音 })) // 合并所有语音列表 + // 只使用浏览器原生语音,因为预定义语音实际不可用 let allVoices = [...browserVoices] - // 如果浏览器语音少于5个,添加预定义语音 - if (browserVoices.length < 5) { - allVoices = [...browserVoices, ...enhancedPredefinedVoices] + // 如果浏览器没有可用语音,才使用预定义语音 + if (browserVoices.length === 0) { + allVoices = [...enhancedPredefinedVoices] + console.log('浏览器没有可用语音,使用预定义语音') + } else { + console.log('使用浏览器原生语音,共' + browserVoices.length + '个') } // 去除重复项,优先保留浏览器原生语音 @@ -210,12 +280,12 @@ const TTSSettings: FC = () => { } else { // 如果浏览器不支持Web Speech API,使用预定义的语音列表 console.log('浏览器不支持Web Speech API,使用预定义的语音列表') - setAvailableVoices(predefinedVoices) + setAvailableVoices(PREDEFINED_VOICES) } - } + }, []) // 刷新语音列表 - const refreshVoices = () => { + const refreshVoices = useCallback(() => { console.log('手动刷新语音列表') message.loading({ content: t('settings.tts.edge_voice.refreshing', { defaultValue: '正在刷新语音列表...' }), @@ -242,13 +312,19 @@ const TTSSettings: FC = () => { }, 500) } else { // 如果浏览器不支持Web Speech API,使用预定义的语音列表 - setAvailableVoices(predefinedVoices) + setAvailableVoices(PREDEFINED_VOICES) message.success({ content: t('settings.tts.edge_voice.refreshed', { defaultValue: '语音列表已刷新' }), key: 'refresh-voices' }) } - } + }, [getVoices, t]) + + // 获取免费在线TTS语音列表 + useEffect(() => { + // 获取免费在线TTS语音列表 + getMsTtsVoices(); + }, [getMsTtsVoices]); useEffect(() => { // 初始化语音合成引擎 @@ -283,10 +359,10 @@ const TTSSettings: FC = () => { } } else { // 如果浏览器不支持Web Speech API,使用预定义的语音列表 - setAvailableVoices(predefinedVoices) + setAvailableVoices(PREDEFINED_VOICES) return () => {} } - }, [getVoices, predefinedVoices]) + }, [getVoices]) // 测试TTS功能 const testTTS = async () => { @@ -295,6 +371,11 @@ const TTSSettings: FC = () => { return } + // 强制刷新状态,确保使用最新的设置 + // 先获取当前的服务类型 + const currentType = store.getState().settings.ttsServiceType || 'openai' + console.log('测试前当前的TTS服务类型:', currentType) + // 获取最新的服务类型设置 const latestSettings = store.getState().settings const currentServiceType = latestSettings.ttsServiceType || 'openai' @@ -305,7 +386,12 @@ const TTSSettings: FC = () => { ttsApiKey: latestSettings.ttsApiKey ? '已设置' : '未设置', ttsVoice: latestSettings.ttsVoice, ttsModel: latestSettings.ttsModel, - ttsEdgeVoice: latestSettings.ttsEdgeVoice + ttsEdgeVoice: latestSettings.ttsEdgeVoice, + ttsSiliconflowApiKey: latestSettings.ttsSiliconflowApiKey ? '已设置' : '未设置', + ttsSiliconflowVoice: latestSettings.ttsSiliconflowVoice, + ttsSiliconflowModel: latestSettings.ttsSiliconflowModel, + ttsSiliconflowResponseFormat: latestSettings.ttsSiliconflowResponseFormat, + ttsSiliconflowSpeed: latestSettings.ttsSiliconflowSpeed }) // 根据服务类型检查必要的参数 @@ -329,6 +415,25 @@ const TTSSettings: FC = () => { window.message.error({ content: t('settings.tts.error.no_edge_voice'), key: 'tts-test' }) return } + } else if (currentServiceType === 'siliconflow') { + const ttsSiliconflowApiKey = latestSettings.ttsSiliconflowApiKey + const ttsSiliconflowVoice = latestSettings.ttsSiliconflowVoice + const ttsSiliconflowModel = latestSettings.ttsSiliconflowModel + + if (!ttsSiliconflowApiKey) { + window.message.error({ content: t('settings.tts.error.no_api_key'), key: 'tts-test' }) + return + } + + if (!ttsSiliconflowVoice) { + window.message.error({ content: t('settings.tts.error.no_voice'), key: 'tts-test' }) + return + } + + if (!ttsSiliconflowModel) { + window.message.error({ content: t('settings.tts.error.no_model'), key: 'tts-test' }) + return + } } await TTSService.speak('这是一段测试语音,用于测试TTS功能是否正常工作。') @@ -430,25 +535,14 @@ const TTSSettings: FC = () => { value={ttsServiceType} onChange={(value: string) => { console.log('切换TTS服务类型为:', value) - // 先将新的服务类型写入Redux状态 + // 直接将新的服务类型写入Redux状态 dispatch(setTtsServiceType(value)) - - // 等待一下,确保状态已更新 - setTimeout(() => { - // 验证状态是否正确更新 - const currentType = store.getState().settings.ttsServiceType - console.log('更新后的TTS服务类型:', currentType) - - // 如果状态没有正确更新,再次尝试 - if (currentType !== value) { - console.log('状态未正确更新,再次尝试') - dispatch(setTtsServiceType(value)) - } - }, 100) }} options={[ { label: t('settings.tts.service_type.openai'), value: 'openai' }, - { label: t('settings.tts.service_type.edge'), value: 'edge' } + { label: t('settings.tts.service_type.edge'), value: 'edge' }, + { label: t('settings.tts.service_type.siliconflow'), value: 'siliconflow' }, + { label: t('settings.tts.service_type.mstts'), value: 'mstts' } ]} disabled={!ttsEnabled} style={{ flex: 1 }} @@ -495,6 +589,92 @@ const TTSSettings: FC = () => { )} + {/* 硅基流动 TTS设置 */} + {ttsServiceType === 'siliconflow' && ( + <> + + dispatch(setTtsSiliconflowApiKey(e.target.value))} + placeholder={t('settings.tts.siliconflow_api_key.placeholder')} + disabled={!ttsEnabled} + /> + + + dispatch(setTtsSiliconflowApiUrl(e.target.value))} + placeholder={t('settings.tts.siliconflow_api_url.placeholder')} + disabled={!ttsEnabled} + /> + + + dispatch(setTtsSiliconflowModel(value))} + options={[ + { label: 'FunAudioLLM/CosyVoice2-0.5B', value: 'FunAudioLLM/CosyVoice2-0.5B' } + ]} + disabled={!ttsEnabled} + style={{ width: '100%' }} + placeholder={t('settings.tts.siliconflow_model.placeholder')} + showSearch + optionFilterProp="label" + allowClear + /> + + + dispatch(setTtsMsVoice(value))} + disabled={!ttsEnabled} + style={{ width: '100%' }} + options={msTtsVoices.length > 0 ? msTtsVoices : [ + { label: 'zh-CN-XiaoxiaoNeural (女声)', value: 'zh-CN-XiaoxiaoNeural' }, + { label: 'zh-CN-YunxiNeural (男声)', value: 'zh-CN-YunxiNeural' }, + { label: 'zh-CN-YunyangNeural (男声)', value: 'zh-CN-YunyangNeural' }, + { label: 'zh-CN-XiaohanNeural (女声)', value: 'zh-CN-XiaohanNeural' }, + { label: 'zh-CN-XiaomoNeural (女声)', value: 'zh-CN-XiaomoNeural' }, + { label: 'zh-CN-XiaoxuanNeural (女声)', value: 'zh-CN-XiaoxuanNeural' }, + { label: 'zh-CN-XiaoruiNeural (女声)', value: 'zh-CN-XiaoruiNeural' }, + { label: 'zh-CN-YunfengNeural (男声)', value: 'zh-CN-YunfengNeural' }, + ]} + showSearch + optionFilterProp="label" + placeholder={t('settings.tts.voice.placeholder', { defaultValue: '请选择音色' })} + notFoundContent={t('settings.tts.voice.not_found', { defaultValue: '未找到音色' })} + /> +