cherry-studio/src/main/services/MsTTSService.ts
2025-04-11 00:43:13 +08:00

236 lines
16 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { EdgeTTS } from 'node-edge-tts'; // listVoices is no longer needed here
import fs from 'node:fs';
import path from 'node:path';
import { app } from 'electron';
import log from 'electron-log';
// --- START OF HARDCODED VOICE LIST ---
// WARNING: This list is static and may become outdated.
// It's generally recommended to use listVoices() for the most up-to-date list.
const hardcodedVoices = [
{ Name: 'Microsoft Server Speech Text to Speech Voice (af-ZA, AdriNeural)', ShortName: 'af-ZA-AdriNeural', Gender: 'Female', Locale: 'af-ZA' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (am-ET, MekdesNeural)', ShortName: 'am-ET-MekdesNeural', Gender: 'Female', Locale: 'am-ET' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, FatimaNeural)', ShortName: 'ar-AE-FatimaNeural', Gender: 'Female', Locale: 'ar-AE' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, HamdanNeural)', ShortName: 'ar-AE-HamdanNeural', Gender: 'Male', Locale: 'ar-AE' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, AliNeural)', ShortName: 'ar-BH-AliNeural', Gender: 'Male', Locale: 'ar-BH' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, LailaNeural)', ShortName: 'ar-BH-LailaNeural', Gender: 'Female', Locale: 'ar-BH' },
// ... (Many other Arabic locales/voices) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (ar-SA, ZariyahNeural)', ShortName: 'ar-SA-ZariyahNeural', Gender: 'Female', Locale: 'ar-SA' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BabekNeural)', ShortName: 'az-AZ-BabekNeural', Gender: 'Male', Locale: 'az-AZ' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BanuNeural)', ShortName: 'az-AZ-BanuNeural', Gender: 'Female', Locale: 'az-AZ' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, BorislavNeural)', ShortName: 'bg-BG-BorislavNeural', Gender: 'Male', Locale: 'bg-BG' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, KalinaNeural)', ShortName: 'bg-BG-KalinaNeural', Gender: 'Female', Locale: 'bg-BG' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, NabanitaNeural)', ShortName: 'bn-BD-NabanitaNeural', Gender: 'Female', Locale: 'bn-BD' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, PradeepNeural)', ShortName: 'bn-BD-PradeepNeural', Gender: 'Male', Locale: 'bn-BD' },
// ... (Catalan, Czech, Welsh, Danish, German, Greek, English variants) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, NatashaNeural)', ShortName: 'en-AU-NatashaNeural', Gender: 'Female', Locale: 'en-AU' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, WilliamNeural)', ShortName: 'en-AU-WilliamNeural', Gender: 'Male', Locale: 'en-AU' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, ClaraNeural)', ShortName: 'en-CA-ClaraNeural', Gender: 'Female', Locale: 'en-CA' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, LiamNeural)', ShortName: 'en-CA-LiamNeural', Gender: 'Male', Locale: 'en-CA' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, LibbyNeural)', ShortName: 'en-GB-LibbyNeural', Gender: 'Female', Locale: 'en-GB' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, MaisieNeural)', ShortName: 'en-GB-MaisieNeural', Gender: 'Female', Locale: 'en-GB' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, RyanNeural)', ShortName: 'en-GB-RyanNeural', Gender: 'Male', Locale: 'en-GB' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, SoniaNeural)', ShortName: 'en-GB-SoniaNeural', Gender: 'Female', Locale: 'en-GB' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, ThomasNeural)', ShortName: 'en-GB-ThomasNeural', Gender: 'Male', Locale: 'en-GB' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, SamNeural)', ShortName: 'en-HK-SamNeural', Gender: 'Male', Locale: 'en-HK' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, YanNeural)', ShortName: 'en-HK-YanNeural', Gender: 'Female', Locale: 'en-HK' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, ConnorNeural)', ShortName: 'en-IE-ConnorNeural', Gender: 'Male', Locale: 'en-IE' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, EmilyNeural)', ShortName: 'en-IE-EmilyNeural', Gender: 'Female', Locale: 'en-IE' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, NeerjaNeural)', ShortName: 'en-IN-NeerjaNeural', Gender: 'Female', Locale: 'en-IN' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, PrabhatNeural)', ShortName: 'en-IN-PrabhatNeural', Gender: 'Male', Locale: 'en-IN' },
// ... (Many more English variants: KE, NG, NZ, PH, SG, TZ, US, ZA) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)', ShortName: 'en-US-AriaNeural', Gender: 'Female', Locale: 'en-US' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AnaNeural)', ShortName: 'en-US-AnaNeural', Gender: 'Female', Locale: 'en-US' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, ChristopherNeural)', ShortName: 'en-US-ChristopherNeural', Gender: 'Male', Locale: 'en-US' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, EricNeural)', ShortName: 'en-US-EricNeural', Gender: 'Male', Locale: 'en-US' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, GuyNeural)', ShortName: 'en-US-GuyNeural', Gender: 'Male', Locale: 'en-US' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, JennyNeural)', ShortName: 'en-US-JennyNeural', Gender: 'Female', Locale: 'en-US' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, MichelleNeural)', ShortName: 'en-US-MichelleNeural', Gender: 'Female', Locale: 'en-US' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, RogerNeural)', ShortName: 'en-US-RogerNeural', Gender: 'Male', Locale: 'en-US' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, SteffanNeural)', ShortName: 'en-US-SteffanNeural', Gender: 'Male', Locale: 'en-US' },
// ... (Spanish variants) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, DaliaNeural)', ShortName: 'es-MX-DaliaNeural', Gender: 'Female', Locale: 'es-MX' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, JorgeNeural)', ShortName: 'es-MX-JorgeNeural', Gender: 'Male', Locale: 'es-MX' },
// ... (Estonian, Basque, Persian, Finnish, Filipino, French, Irish, Galician, Gujarati, Hebrew, Hindi, Croatian, Hungarian, Indonesian, Icelandic, Italian, Japanese) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, KeitaNeural)', ShortName: 'ja-JP-KeitaNeural', Gender: 'Male', Locale: 'ja-JP' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, NanamiNeural)', ShortName: 'ja-JP-NanamiNeural', Gender: 'Female', Locale: 'ja-JP' },
// ... (Javanese, Georgian, Kazakh, Khmer, Kannada, Korean) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, InJoonNeural)', ShortName: 'ko-KR-InJoonNeural', Gender: 'Male', Locale: 'ko-KR' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, SunHiNeural)', ShortName: 'ko-KR-SunHiNeural', Gender: 'Female', Locale: 'ko-KR' },
// ... (Lao, Lithuanian, Latvian, Macedonian, Malayalam, Mongolian, Marathi, Malay, Maltese, Burmese, Norwegian, Dutch, Polish, Pashto, Portuguese) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, AntonioNeural)', ShortName: 'pt-BR-AntonioNeural', Gender: 'Male', Locale: 'pt-BR' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, FranciscaNeural)', ShortName: 'pt-BR-FranciscaNeural', Gender: 'Female', Locale: 'pt-BR' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, DuarteNeural)', ShortName: 'pt-PT-DuarteNeural', Gender: 'Male', Locale: 'pt-PT' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, RaquelNeural)', ShortName: 'pt-PT-RaquelNeural', Gender: 'Female', Locale: 'pt-PT' },
// ... (Romanian, Russian, Sinhala, Slovak, Slovenian, Somali, Albanian, Serbian, Sundanese, Swedish, Swahili, Tamil, Telugu, Thai) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, NiwatNeural)', ShortName: 'th-TH-NiwatNeural', Gender: 'Male', Locale: 'th-TH' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, PremwadeeNeural)', ShortName: 'th-TH-PremwadeeNeural', Gender: 'Female', Locale: 'th-TH' },
// ... (Turkish, Ukrainian, Urdu, Uzbek, Vietnamese) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, HoaiMyNeural)', ShortName: 'vi-VN-HoaiMyNeural', Gender: 'Female', Locale: 'vi-VN' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, NamMinhNeural)', ShortName: 'vi-VN-NamMinhNeural', Gender: 'Male', Locale: 'vi-VN' },
// ... (Chinese variants) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)', ShortName: 'zh-CN-XiaoxiaoNeural', Gender: 'Female', Locale: 'zh-CN' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiNeural)', ShortName: 'zh-CN-YunxiNeural', Gender: 'Male', Locale: 'zh-CN' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunjianNeural)', ShortName: 'zh-CN-YunjianNeural', Gender: 'Male', Locale: 'zh-CN' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiaNeural)', ShortName: 'zh-CN-YunxiaNeural', Gender: 'Male', Locale: 'zh-CN' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunyangNeural)', ShortName: 'zh-CN-YunyangNeural', Gender: 'Male', Locale: 'zh-CN' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN-liaoning, XiaobeiNeural)', ShortName: 'zh-CN-liaoning-XiaobeiNeural', Gender: 'Female', Locale: 'zh-CN-liaoning' },
// { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN-shaanxi, XiaoniNeural)', ShortName: 'zh-CN-shaanxi-XiaoniNeural', Gender: 'Female', Locale: 'zh-CN-shaanxi' }, // Example regional voice
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuGaaiNeural)', ShortName: 'zh-HK-HiuGaaiNeural', Gender: 'Female', Locale: 'zh-HK' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuMaanNeural)', ShortName: 'zh-HK-HiuMaanNeural', Gender: 'Female', Locale: 'zh-HK' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, WanLungNeural)', ShortName: 'zh-HK-WanLungNeural', Gender: 'Male', Locale: 'zh-HK' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoChenNeural)', ShortName: 'zh-TW-HsiaoChenNeural', Gender: 'Female', Locale: 'zh-TW' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoYuNeural)', ShortName: 'zh-TW-HsiaoYuNeural', Gender: 'Female', Locale: 'zh-TW' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, YunJheNeural)', ShortName: 'zh-TW-YunJheNeural', Gender: 'Male', Locale: 'zh-TW' },
// ... (Zulu) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThandoNeural)', ShortName: 'zu-ZA-ThandoNeural', Gender: 'Female', Locale: 'zu-ZA' },
{ Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThembaNeural)', ShortName: 'zu-ZA-ThembaNeural', Gender: 'Male', Locale: 'zu-ZA' },
];
// --- END OF HARDCODED VOICE LIST ---
/**
* 免费在线TTS服务
* 使用免费的在线TTS服务不需要API密钥
*/
class MsTTSService {
private static instance: MsTTSService;
private tempDir: string;
private constructor() {
this.tempDir = path.join(app.getPath('temp'), 'cherry-tts');
if (!fs.existsSync(this.tempDir)) {
fs.mkdirSync(this.tempDir, { recursive: true });
}
log.info('初始化免费在线TTS服务 (使用硬编码语音列表)');
}
public static getInstance(): MsTTSService {
if (!MsTTSService.instance) {
MsTTSService.instance = new MsTTSService();
}
return MsTTSService.instance;
}
/**
* 获取可用的语音列表 (返回硬编码列表)
* @returns 语音列表
*/
public async getVoices(): Promise<any[]> {
try {
log.info(`返回硬编码的 ${hardcodedVoices.length} 个语音列表`);
// 直接返回硬编码的列表
// 注意:保持 async 是为了接口兼容性,虽然这里没有实际的异步操作
return hardcodedVoices;
} catch (error) {
// 这个 try/catch 在这里意义不大了,因为返回静态数据不会出错
// 但保留结构以防未来改动
log.error('获取硬编码语音列表时出错 (理论上不应发生):', error);
return []; // 返回空列表以防万一
}
}
/**
* 合成语音
* @param text 要合成的文本
* @param voice 语音的 ShortName (例如 'zh-CN-XiaoxiaoNeural')
* @param outputFormat 输出格式 (例如 'audio-24khz-48kbitrate-mono-mp3')
* @returns 音频文件路径
*/
public async synthesize(text: string, voice: string, outputFormat: string): Promise<string> {
try {
// 记录详细的请求信息
log.info(`微软在线TTS合成语音: 文本="${text.substring(0, 30)}...", 语音=${voice}, 格式=${outputFormat}`);
// 验证输入参数
if (!text || text.trim() === '') {
throw new Error('要合成的文本不能为空');
}
if (!voice || voice.trim() === '') {
throw new Error('语音名称不能为空');
}
// 创建一个新的EdgeTTS实例并设置参数
// 添加超时设置默认为30秒
const tts = new EdgeTTS({
voice: voice,
outputFormat: outputFormat,
timeout: 30000, // 30秒超时
rate: '+0%', // 正常语速
pitch: '+0Hz', // 正常音调
volume: '+0%' // 正常音量
});
// 生成临时文件路径
const timestamp = Date.now();
const fileExtension = outputFormat.includes('mp3') ? 'mp3' : outputFormat.split('-').pop() || 'audio';
const outputPath = path.join(this.tempDir, `tts_${timestamp}.${fileExtension}`);
log.info(`开始生成语音文件: ${outputPath}`);
// 使用ttsPromise方法生成文件
await tts.ttsPromise(text, outputPath);
// 验证生成的文件是否存在且大小大于0
if (!fs.existsSync(outputPath)) {
throw new Error(`生成的语音文件不存在: ${outputPath}`);
}
const stats = fs.statSync(outputPath);
if (stats.size === 0) {
throw new Error(`生成的语音文件大小为0: ${outputPath}`);
}
log.info(`微软在线TTS合成成功: ${outputPath}, 文件大小: ${stats.size} 字节`);
return outputPath;
} catch (error: any) {
// 记录详细的错误信息
log.error(`微软在线TTS语音合成失败 (语音=${voice}):`, error);
// 尝试提供更有用的错误信息
if (error.message && typeof error.message === 'string') {
if (error.message.includes('Timed out')) {
throw new Error(`语音合成超时,请检查网络连接或尝试其他语音`);
} else if (error.message.includes('ENOTFOUND')) {
throw new Error(`无法连接到微软语音服务,请检查网络连接`);
} else if (error.message.includes('ECONNREFUSED')) {
throw new Error(`连接被拒绝,请检查网络设置或代理配置`);
}
}
throw error;
}
}
/**
* (可选) 清理临时文件目录
*/
public async cleanupTempDir(): Promise<void> {
// (Cleanup method remains the same)
try {
const files = await fs.promises.readdir(this.tempDir);
for (const file of files) {
if (file.startsWith('tts_')) {
await fs.promises.unlink(path.join(this.tempDir, file));
}
}
log.info('TTS 临时文件已清理');
} catch (error) {
log.error('清理 TTS 临时文件失败:', error);
}
}
}
// 导出单例方法 (保持不变)
export const getVoices = async () => {
return await MsTTSService.getInstance().getVoices();
};
export const synthesize = async (text: string, voice: string, outputFormat: string) => {
return await MsTTSService.getInstance().synthesize(text, voice, outputFormat);
};
export const cleanupTtsTempFiles = async () => {
await MsTTSService.getInstance().cleanupTempDir();
};