修复部分问题

This commit is contained in:
1600822305 2025-04-11 03:50:12 +08:00
parent ff95670f25
commit 1f967765e4
26 changed files with 1321 additions and 962 deletions

View File

@ -6,8 +6,8 @@ import installExtension, { REACT_DEVELOPER_TOOLS, REDUX_DEVTOOLS } from 'electro
import { registerIpc } from './ipc' import { registerIpc } from './ipc'
import { configManager } from './services/ConfigManager' import { configManager } from './services/ConfigManager'
import { CHERRY_STUDIO_PROTOCOL, handleProtocolUrl, registerProtocolClient } from './services/ProtocolClient'
import { registerMsTTSIpcHandlers } from './services/MsTTSIpcHandler' import { registerMsTTSIpcHandlers } from './services/MsTTSIpcHandler'
import { CHERRY_STUDIO_PROTOCOL, handleProtocolUrl, registerProtocolClient } from './services/ProtocolClient'
import { registerShortcuts } from './services/ShortcutService' import { registerShortcuts } from './services/ShortcutService'
import { TrayService } from './services/TrayService' import { TrayService } from './services/TrayService'
import { windowService } from './services/WindowService' import { windowService } from './services/WindowService'

View File

@ -19,11 +19,11 @@ import FileStorage from './services/FileStorage'
import { GeminiService } from './services/GeminiService' import { GeminiService } from './services/GeminiService'
import KnowledgeService from './services/KnowledgeService' import KnowledgeService from './services/KnowledgeService'
import mcpService from './services/MCPService' import mcpService from './services/MCPService'
import * as MsTTSService from './services/MsTTSService'
import * as NutstoreService from './services/NutstoreService' import * as NutstoreService from './services/NutstoreService'
import ObsidianVaultService from './services/ObsidianVaultService' import ObsidianVaultService from './services/ObsidianVaultService'
import { ProxyConfig, proxyManager } from './services/ProxyManager' import { ProxyConfig, proxyManager } from './services/ProxyManager'
import { searchService } from './services/SearchService' import { searchService } from './services/SearchService'
import * as MsTTSService from './services/MsTTSService'
import { registerShortcuts, unregisterAllShortcuts } from './services/ShortcutService' import { registerShortcuts, unregisterAllShortcuts } from './services/ShortcutService'
import { TrayService } from './services/TrayService' import { TrayService } from './services/TrayService'
import { windowService } from './services/WindowService' import { windowService } from './services/WindowService'
@ -307,9 +307,7 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) {
// 注册MsTTS IPC处理程序 // 注册MsTTS IPC处理程序
ipcMain.handle(IpcChannel.MsTTS_GetVoices, MsTTSService.getVoices) ipcMain.handle(IpcChannel.MsTTS_GetVoices, MsTTSService.getVoices)
ipcMain.handle( ipcMain.handle(IpcChannel.MsTTS_Synthesize, (_, text: string, voice: string, outputFormat: string) =>
IpcChannel.MsTTS_Synthesize, MsTTSService.synthesize(text, voice, outputFormat)
(_, text: string, voice: string, outputFormat: string) =>
MsTTSService.synthesize(text, voice, outputFormat)
) )
} }

View File

@ -27,7 +27,7 @@ class ASRServerService {
* ASR服务器 * ASR服务器
* @returns Promise<{success: boolean, pid?: number, error?: string}> * @returns Promise<{success: boolean, pid?: number, error?: string}>
*/ */
private async startServer(): Promise<{success: boolean, pid?: number, error?: string}> { private async startServer(): Promise<{ success: boolean; pid?: number; error?: string }> {
try { try {
if (this.asrServerProcess) { if (this.asrServerProcess) {
return { success: true, pid: this.asrServerProcess.pid } return { success: true, pid: this.asrServerProcess.pid }
@ -90,7 +90,7 @@ class ASRServerService {
}) })
// 等待一段时间确保服务器启动 // 等待一段时间确保服务器启动
await new Promise(resolve => setTimeout(resolve, 1000)) await new Promise((resolve) => setTimeout(resolve, 1000))
return { success: true, pid: this.asrServerProcess.pid } return { success: true, pid: this.asrServerProcess.pid }
} catch (error) { } catch (error) {
@ -105,7 +105,10 @@ class ASRServerService {
* @param pid ID * @param pid ID
* @returns Promise<{success: boolean, error?: string}> * @returns Promise<{success: boolean, error?: string}>
*/ */
private async stopServer(_event: Electron.IpcMainInvokeEvent, pid?: number): Promise<{success: boolean, error?: string}> { private async stopServer(
_event: Electron.IpcMainInvokeEvent,
pid?: number
): Promise<{ success: boolean; error?: string }> {
try { try {
if (!this.asrServerProcess) { if (!this.asrServerProcess) {
return { success: true } return { success: true }
@ -120,7 +123,7 @@ class ASRServerService {
this.asrServerProcess.kill() this.asrServerProcess.kill()
// 等待一段时间确保进程已经退出 // 等待一段时间确保进程已经退出
await new Promise(resolve => setTimeout(resolve, 500)) await new Promise((resolve) => setTimeout(resolve, 500))
this.asrServerProcess = null this.asrServerProcess = null
return { success: true } return { success: true }

View File

@ -1,23 +1,24 @@
import { EdgeTTS } from 'node-edge-tts'; import fs from 'node:fs'
import fs from 'node:fs'; import path from 'node:path'
import path from 'node:path';
import { app } from 'electron'; import { app } from 'electron'
import log from 'electron-log'; import log from 'electron-log'
import { EdgeTTS } from 'node-edge-tts'
/** /**
* Microsoft Edge TTS服务 * Microsoft Edge TTS服务
* 使Microsoft Edge的在线TTS服务API密钥 * 使Microsoft Edge的在线TTS服务API密钥
*/ */
class MsEdgeTTSService { class MsEdgeTTSService {
private static instance: MsEdgeTTSService; private static instance: MsEdgeTTSService
private tempDir: string; private tempDir: string
private constructor() { private constructor() {
this.tempDir = path.join(app.getPath('temp'), 'cherry-tts'); this.tempDir = path.join(app.getPath('temp'), 'cherry-tts')
// 确保临时目录存在 // 确保临时目录存在
if (!fs.existsSync(this.tempDir)) { if (!fs.existsSync(this.tempDir)) {
fs.mkdirSync(this.tempDir, { recursive: true }); fs.mkdirSync(this.tempDir, { recursive: true })
} }
} }
@ -26,9 +27,9 @@ class MsEdgeTTSService {
*/ */
public static getInstance(): MsEdgeTTSService { public static getInstance(): MsEdgeTTSService {
if (!MsEdgeTTSService.instance) { if (!MsEdgeTTSService.instance) {
MsEdgeTTSService.instance = new MsEdgeTTSService(); MsEdgeTTSService.instance = new MsEdgeTTSService()
} }
return MsEdgeTTSService.instance; return MsEdgeTTSService.instance
} }
/** /**
@ -46,11 +47,11 @@ class MsEdgeTTSService {
{ name: 'zh-CN-XiaomoNeural', locale: 'zh-CN', gender: 'Female' }, { name: 'zh-CN-XiaomoNeural', locale: 'zh-CN', gender: 'Female' },
{ name: 'zh-CN-XiaoxuanNeural', locale: 'zh-CN', gender: 'Female' }, { name: 'zh-CN-XiaoxuanNeural', locale: 'zh-CN', gender: 'Female' },
{ name: 'zh-CN-XiaoruiNeural', locale: 'zh-CN', gender: 'Female' }, { name: 'zh-CN-XiaoruiNeural', locale: 'zh-CN', gender: 'Female' },
{ name: 'zh-CN-YunfengNeural', locale: 'zh-CN', gender: 'Male' }, { name: 'zh-CN-YunfengNeural', locale: 'zh-CN', gender: 'Male' }
]; ]
} catch (error) { } catch (error) {
log.error('获取Microsoft Edge TTS语音列表失败:', error); log.error('获取Microsoft Edge TTS语音列表失败:', error)
throw error; throw error
} }
} }
@ -63,15 +64,15 @@ class MsEdgeTTSService {
*/ */
public async synthesize(text: string, voice: string, outputFormat: string): Promise<string> { public async synthesize(text: string, voice: string, outputFormat: string): Promise<string> {
try { try {
log.info(`Microsoft Edge TTS合成语音: 文本="${text.substring(0, 30)}...", 语音=${voice}, 格式=${outputFormat}`); log.info(`Microsoft Edge TTS合成语音: 文本="${text.substring(0, 30)}...", 语音=${voice}, 格式=${outputFormat}`)
// 验证输入参数 // 验证输入参数
if (!text || text.trim() === '') { if (!text || text.trim() === '') {
throw new Error('要合成的文本不能为空'); throw new Error('要合成的文本不能为空')
} }
if (!voice || voice.trim() === '') { if (!voice || voice.trim() === '') {
throw new Error('语音名称不能为空'); throw new Error('语音名称不能为空')
} }
// 创建一个新的EdgeTTS实例并设置参数 // 创建一个新的EdgeTTS实例并设置参数
@ -79,58 +80,58 @@ class MsEdgeTTSService {
voice: voice, voice: voice,
outputFormat: outputFormat, outputFormat: outputFormat,
timeout: 30000, // 30秒超时 timeout: 30000, // 30秒超时
rate: '+0%', // 正常语速 rate: '+0%', // 正常语速
pitch: '+0Hz', // 正常音调 pitch: '+0Hz', // 正常音调
volume: '+0%' // 正常音量 volume: '+0%' // 正常音量
}); })
// 生成临时文件路径 // 生成临时文件路径
const timestamp = Date.now(); const timestamp = Date.now()
const fileExtension = outputFormat.includes('mp3') ? 'mp3' : outputFormat.split('-').pop() || 'audio'; const fileExtension = outputFormat.includes('mp3') ? 'mp3' : outputFormat.split('-').pop() || 'audio'
const outputPath = path.join(this.tempDir, `tts_${timestamp}.${fileExtension}`); const outputPath = path.join(this.tempDir, `tts_${timestamp}.${fileExtension}`)
log.info(`开始生成语音文件: ${outputPath}`); log.info(`开始生成语音文件: ${outputPath}`)
// 使用ttsPromise方法生成文件 // 使用ttsPromise方法生成文件
await tts.ttsPromise(text, outputPath); await tts.ttsPromise(text, outputPath)
// 验证生成的文件是否存在且大小大于0 // 验证生成的文件是否存在且大小大于0
if (!fs.existsSync(outputPath)) { if (!fs.existsSync(outputPath)) {
throw new Error(`生成的语音文件不存在: ${outputPath}`); throw new Error(`生成的语音文件不存在: ${outputPath}`)
} }
const stats = fs.statSync(outputPath); const stats = fs.statSync(outputPath)
if (stats.size === 0) { if (stats.size === 0) {
throw new Error(`生成的语音文件大小为0: ${outputPath}`); throw new Error(`生成的语音文件大小为0: ${outputPath}`)
} }
log.info(`Microsoft Edge TTS合成成功: ${outputPath}, 文件大小: ${stats.size} 字节`); log.info(`Microsoft Edge TTS合成成功: ${outputPath}, 文件大小: ${stats.size} 字节`)
return outputPath; return outputPath
} catch (error: any) { } catch (error: any) {
// 记录详细的错误信息 // 记录详细的错误信息
log.error(`Microsoft Edge TTS语音合成失败 (语音=${voice}):`, error); log.error(`Microsoft Edge TTS语音合成失败 (语音=${voice}):`, error)
// 尝试提供更有用的错误信息 // 尝试提供更有用的错误信息
if (error.message && typeof error.message === 'string') { if (error.message && typeof error.message === 'string') {
if (error.message.includes('Timed out')) { if (error.message.includes('Timed out')) {
throw new Error(`语音合成超时,请检查网络连接或尝试其他语音`); throw new Error(`语音合成超时,请检查网络连接或尝试其他语音`)
} else if (error.message.includes('ENOTFOUND')) { } else if (error.message.includes('ENOTFOUND')) {
throw new Error(`无法连接到Microsoft语音服务请检查网络连接`); throw new Error(`无法连接到Microsoft语音服务请检查网络连接`)
} else if (error.message.includes('ECONNREFUSED')) { } else if (error.message.includes('ECONNREFUSED')) {
throw new Error(`连接被拒绝,请检查网络设置或代理配置`); throw new Error(`连接被拒绝,请检查网络设置或代理配置`)
} }
} }
throw error; throw error
} }
} }
} }
// 导出单例方法 // 导出单例方法
export const getVoices = async () => { export const getVoices = async () => {
return await MsEdgeTTSService.getInstance().getVoices(); return await MsEdgeTTSService.getInstance().getVoices()
}; }
export const synthesize = async (text: string, voice: string, outputFormat: string) => { export const synthesize = async (text: string, voice: string, outputFormat: string) => {
return await MsEdgeTTSService.getInstance().synthesize(text, voice, outputFormat); return await MsEdgeTTSService.getInstance().synthesize(text, voice, outputFormat)
}; }

View File

@ -1,18 +1,17 @@
import { IpcChannel } from '@shared/IpcChannel'; import { IpcChannel } from '@shared/IpcChannel'
import { ipcMain } from 'electron'; import { ipcMain } from 'electron'
import * as MsTTSService from './MsTTSService';
import * as MsTTSService from './MsTTSService'
/** /**
* MsTTS相关的IPC处理程序 * MsTTS相关的IPC处理程序
*/ */
export function registerMsTTSIpcHandlers(): void { export function registerMsTTSIpcHandlers(): void {
// 获取可用的语音列表 // 获取可用的语音列表
ipcMain.handle(IpcChannel.MsTTS_GetVoices, MsTTSService.getVoices); ipcMain.handle(IpcChannel.MsTTS_GetVoices, MsTTSService.getVoices)
// 合成语音 // 合成语音
ipcMain.handle( ipcMain.handle(IpcChannel.MsTTS_Synthesize, (_, text: string, voice: string, outputFormat: string) =>
IpcChannel.MsTTS_Synthesize, MsTTSService.synthesize(text, voice, outputFormat)
(_, text: string, voice: string, outputFormat: string) => )
MsTTSService.synthesize(text, voice, outputFormat)
);
} }

View File

@ -1,115 +1,440 @@
import { EdgeTTS } from 'node-edge-tts'; // listVoices is no longer needed here import fs from 'node:fs'
import fs from 'node:fs'; import path from 'node:path'
import path from 'node:path';
import { app } from 'electron'; import { app } from 'electron'
import log from 'electron-log'; import log from 'electron-log'
import { EdgeTTS } from 'node-edge-tts' // listVoices is no longer needed here
// --- START OF HARDCODED VOICE LIST --- // --- START OF HARDCODED VOICE LIST ---
// WARNING: This list is static and may become outdated. // WARNING: This list is static and may become outdated.
// It's generally recommended to use listVoices() for the most up-to-date list. // It's generally recommended to use listVoices() for the most up-to-date list.
const hardcodedVoices = [ const hardcodedVoices = [
{ Name: 'Microsoft Server Speech Text to Speech Voice (af-ZA, AdriNeural)', ShortName: 'af-ZA-AdriNeural', Gender: 'Female', Locale: 'af-ZA' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (am-ET, MekdesNeural)', ShortName: 'am-ET-MekdesNeural', Gender: 'Female', Locale: 'am-ET' }, Name: 'Microsoft Server Speech Text to Speech Voice (af-ZA, AdriNeural)',
{ Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, FatimaNeural)', ShortName: 'ar-AE-FatimaNeural', Gender: 'Female', Locale: 'ar-AE' }, ShortName: 'af-ZA-AdriNeural',
{ Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, HamdanNeural)', ShortName: 'ar-AE-HamdanNeural', Gender: 'Male', Locale: 'ar-AE' }, Gender: 'Female',
{ Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, AliNeural)', ShortName: 'ar-BH-AliNeural', Gender: 'Male', Locale: 'ar-BH' }, Locale: 'af-ZA'
{ Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, LailaNeural)', ShortName: 'ar-BH-LailaNeural', Gender: 'Female', Locale: 'ar-BH' }, },
{
Name: 'Microsoft Server Speech Text to Speech Voice (am-ET, MekdesNeural)',
ShortName: 'am-ET-MekdesNeural',
Gender: 'Female',
Locale: 'am-ET'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, FatimaNeural)',
ShortName: 'ar-AE-FatimaNeural',
Gender: 'Female',
Locale: 'ar-AE'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, HamdanNeural)',
ShortName: 'ar-AE-HamdanNeural',
Gender: 'Male',
Locale: 'ar-AE'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, AliNeural)',
ShortName: 'ar-BH-AliNeural',
Gender: 'Male',
Locale: 'ar-BH'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, LailaNeural)',
ShortName: 'ar-BH-LailaNeural',
Gender: 'Female',
Locale: 'ar-BH'
},
// ... (Many other Arabic locales/voices) ... // ... (Many other Arabic locales/voices) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (ar-SA, ZariyahNeural)', ShortName: 'ar-SA-ZariyahNeural', Gender: 'Female', Locale: 'ar-SA' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BabekNeural)', ShortName: 'az-AZ-BabekNeural', Gender: 'Male', Locale: 'az-AZ' }, Name: 'Microsoft Server Speech Text to Speech Voice (ar-SA, ZariyahNeural)',
{ Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BanuNeural)', ShortName: 'az-AZ-BanuNeural', Gender: 'Female', Locale: 'az-AZ' }, ShortName: 'ar-SA-ZariyahNeural',
{ Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, BorislavNeural)', ShortName: 'bg-BG-BorislavNeural', Gender: 'Male', Locale: 'bg-BG' }, Gender: 'Female',
{ Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, KalinaNeural)', ShortName: 'bg-BG-KalinaNeural', Gender: 'Female', Locale: 'bg-BG' }, Locale: 'ar-SA'
{ Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, NabanitaNeural)', ShortName: 'bn-BD-NabanitaNeural', Gender: 'Female', Locale: 'bn-BD' }, },
{ Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, PradeepNeural)', ShortName: 'bn-BD-PradeepNeural', Gender: 'Male', Locale: 'bn-BD' }, {
Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BabekNeural)',
ShortName: 'az-AZ-BabekNeural',
Gender: 'Male',
Locale: 'az-AZ'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BanuNeural)',
ShortName: 'az-AZ-BanuNeural',
Gender: 'Female',
Locale: 'az-AZ'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, BorislavNeural)',
ShortName: 'bg-BG-BorislavNeural',
Gender: 'Male',
Locale: 'bg-BG'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, KalinaNeural)',
ShortName: 'bg-BG-KalinaNeural',
Gender: 'Female',
Locale: 'bg-BG'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, NabanitaNeural)',
ShortName: 'bn-BD-NabanitaNeural',
Gender: 'Female',
Locale: 'bn-BD'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, PradeepNeural)',
ShortName: 'bn-BD-PradeepNeural',
Gender: 'Male',
Locale: 'bn-BD'
},
// ... (Catalan, Czech, Welsh, Danish, German, Greek, English variants) ... // ... (Catalan, Czech, Welsh, Danish, German, Greek, English variants) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, NatashaNeural)', ShortName: 'en-AU-NatashaNeural', Gender: 'Female', Locale: 'en-AU' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, WilliamNeural)', ShortName: 'en-AU-WilliamNeural', Gender: 'Male', Locale: 'en-AU' }, Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, NatashaNeural)',
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, ClaraNeural)', ShortName: 'en-CA-ClaraNeural', Gender: 'Female', Locale: 'en-CA' }, ShortName: 'en-AU-NatashaNeural',
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, LiamNeural)', ShortName: 'en-CA-LiamNeural', Gender: 'Male', Locale: 'en-CA' }, Gender: 'Female',
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, LibbyNeural)', ShortName: 'en-GB-LibbyNeural', Gender: 'Female', Locale: 'en-GB' }, Locale: 'en-AU'
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, MaisieNeural)', ShortName: 'en-GB-MaisieNeural', Gender: 'Female', Locale: 'en-GB' }, },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, RyanNeural)', ShortName: 'en-GB-RyanNeural', Gender: 'Male', Locale: 'en-GB' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, SoniaNeural)', ShortName: 'en-GB-SoniaNeural', Gender: 'Female', Locale: 'en-GB' }, Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, WilliamNeural)',
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, ThomasNeural)', ShortName: 'en-GB-ThomasNeural', Gender: 'Male', Locale: 'en-GB' }, ShortName: 'en-AU-WilliamNeural',
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, SamNeural)', ShortName: 'en-HK-SamNeural', Gender: 'Male', Locale: 'en-HK' }, Gender: 'Male',
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, YanNeural)', ShortName: 'en-HK-YanNeural', Gender: 'Female', Locale: 'en-HK' }, Locale: 'en-AU'
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, ConnorNeural)', ShortName: 'en-IE-ConnorNeural', Gender: 'Male', Locale: 'en-IE' }, },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, EmilyNeural)', ShortName: 'en-IE-EmilyNeural', Gender: 'Female', Locale: 'en-IE' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, NeerjaNeural)', ShortName: 'en-IN-NeerjaNeural', Gender: 'Female', Locale: 'en-IN' }, Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, ClaraNeural)',
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, PrabhatNeural)', ShortName: 'en-IN-PrabhatNeural', Gender: 'Male', Locale: 'en-IN' }, ShortName: 'en-CA-ClaraNeural',
Gender: 'Female',
Locale: 'en-CA'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, LiamNeural)',
ShortName: 'en-CA-LiamNeural',
Gender: 'Male',
Locale: 'en-CA'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, LibbyNeural)',
ShortName: 'en-GB-LibbyNeural',
Gender: 'Female',
Locale: 'en-GB'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, MaisieNeural)',
ShortName: 'en-GB-MaisieNeural',
Gender: 'Female',
Locale: 'en-GB'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, RyanNeural)',
ShortName: 'en-GB-RyanNeural',
Gender: 'Male',
Locale: 'en-GB'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, SoniaNeural)',
ShortName: 'en-GB-SoniaNeural',
Gender: 'Female',
Locale: 'en-GB'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, ThomasNeural)',
ShortName: 'en-GB-ThomasNeural',
Gender: 'Male',
Locale: 'en-GB'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, SamNeural)',
ShortName: 'en-HK-SamNeural',
Gender: 'Male',
Locale: 'en-HK'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, YanNeural)',
ShortName: 'en-HK-YanNeural',
Gender: 'Female',
Locale: 'en-HK'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, ConnorNeural)',
ShortName: 'en-IE-ConnorNeural',
Gender: 'Male',
Locale: 'en-IE'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, EmilyNeural)',
ShortName: 'en-IE-EmilyNeural',
Gender: 'Female',
Locale: 'en-IE'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, NeerjaNeural)',
ShortName: 'en-IN-NeerjaNeural',
Gender: 'Female',
Locale: 'en-IN'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, PrabhatNeural)',
ShortName: 'en-IN-PrabhatNeural',
Gender: 'Male',
Locale: 'en-IN'
},
// ... (Many more English variants: KE, NG, NZ, PH, SG, TZ, US, ZA) ... // ... (Many more English variants: KE, NG, NZ, PH, SG, TZ, US, ZA) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)', ShortName: 'en-US-AriaNeural', Gender: 'Female', Locale: 'en-US' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AnaNeural)', ShortName: 'en-US-AnaNeural', Gender: 'Female', Locale: 'en-US' }, Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)',
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, ChristopherNeural)', ShortName: 'en-US-ChristopherNeural', Gender: 'Male', Locale: 'en-US' }, ShortName: 'en-US-AriaNeural',
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, EricNeural)', ShortName: 'en-US-EricNeural', Gender: 'Male', Locale: 'en-US' }, Gender: 'Female',
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, GuyNeural)', ShortName: 'en-US-GuyNeural', Gender: 'Male', Locale: 'en-US' }, Locale: 'en-US'
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, JennyNeural)', ShortName: 'en-US-JennyNeural', Gender: 'Female', Locale: 'en-US' }, },
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, MichelleNeural)', ShortName: 'en-US-MichelleNeural', Gender: 'Female', Locale: 'en-US' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, RogerNeural)', ShortName: 'en-US-RogerNeural', Gender: 'Male', Locale: 'en-US' }, Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AnaNeural)',
{ Name: 'Microsoft Server Speech Text to Speech Voice (en-US, SteffanNeural)', ShortName: 'en-US-SteffanNeural', Gender: 'Male', Locale: 'en-US' }, ShortName: 'en-US-AnaNeural',
Gender: 'Female',
Locale: 'en-US'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-US, ChristopherNeural)',
ShortName: 'en-US-ChristopherNeural',
Gender: 'Male',
Locale: 'en-US'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-US, EricNeural)',
ShortName: 'en-US-EricNeural',
Gender: 'Male',
Locale: 'en-US'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-US, GuyNeural)',
ShortName: 'en-US-GuyNeural',
Gender: 'Male',
Locale: 'en-US'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-US, JennyNeural)',
ShortName: 'en-US-JennyNeural',
Gender: 'Female',
Locale: 'en-US'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-US, MichelleNeural)',
ShortName: 'en-US-MichelleNeural',
Gender: 'Female',
Locale: 'en-US'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-US, RogerNeural)',
ShortName: 'en-US-RogerNeural',
Gender: 'Male',
Locale: 'en-US'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (en-US, SteffanNeural)',
ShortName: 'en-US-SteffanNeural',
Gender: 'Male',
Locale: 'en-US'
},
// ... (Spanish variants) ... // ... (Spanish variants) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, DaliaNeural)', ShortName: 'es-MX-DaliaNeural', Gender: 'Female', Locale: 'es-MX' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, JorgeNeural)', ShortName: 'es-MX-JorgeNeural', Gender: 'Male', Locale: 'es-MX' }, Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, DaliaNeural)',
ShortName: 'es-MX-DaliaNeural',
Gender: 'Female',
Locale: 'es-MX'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, JorgeNeural)',
ShortName: 'es-MX-JorgeNeural',
Gender: 'Male',
Locale: 'es-MX'
},
// ... (Estonian, Basque, Persian, Finnish, Filipino, French, Irish, Galician, Gujarati, Hebrew, Hindi, Croatian, Hungarian, Indonesian, Icelandic, Italian, Japanese) ... // ... (Estonian, Basque, Persian, Finnish, Filipino, French, Irish, Galician, Gujarati, Hebrew, Hindi, Croatian, Hungarian, Indonesian, Icelandic, Italian, Japanese) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, KeitaNeural)', ShortName: 'ja-JP-KeitaNeural', Gender: 'Male', Locale: 'ja-JP' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, NanamiNeural)', ShortName: 'ja-JP-NanamiNeural', Gender: 'Female', Locale: 'ja-JP' }, Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, KeitaNeural)',
ShortName: 'ja-JP-KeitaNeural',
Gender: 'Male',
Locale: 'ja-JP'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, NanamiNeural)',
ShortName: 'ja-JP-NanamiNeural',
Gender: 'Female',
Locale: 'ja-JP'
},
// ... (Javanese, Georgian, Kazakh, Khmer, Kannada, Korean) ... // ... (Javanese, Georgian, Kazakh, Khmer, Kannada, Korean) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, InJoonNeural)', ShortName: 'ko-KR-InJoonNeural', Gender: 'Male', Locale: 'ko-KR' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, SunHiNeural)', ShortName: 'ko-KR-SunHiNeural', Gender: 'Female', Locale: 'ko-KR' }, Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, InJoonNeural)',
ShortName: 'ko-KR-InJoonNeural',
Gender: 'Male',
Locale: 'ko-KR'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, SunHiNeural)',
ShortName: 'ko-KR-SunHiNeural',
Gender: 'Female',
Locale: 'ko-KR'
},
// ... (Lao, Lithuanian, Latvian, Macedonian, Malayalam, Mongolian, Marathi, Malay, Maltese, Burmese, Norwegian, Dutch, Polish, Pashto, Portuguese) ... // ... (Lao, Lithuanian, Latvian, Macedonian, Malayalam, Mongolian, Marathi, Malay, Maltese, Burmese, Norwegian, Dutch, Polish, Pashto, Portuguese) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, AntonioNeural)', ShortName: 'pt-BR-AntonioNeural', Gender: 'Male', Locale: 'pt-BR' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, FranciscaNeural)', ShortName: 'pt-BR-FranciscaNeural', Gender: 'Female', Locale: 'pt-BR' }, Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, AntonioNeural)',
{ Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, DuarteNeural)', ShortName: 'pt-PT-DuarteNeural', Gender: 'Male', Locale: 'pt-PT' }, ShortName: 'pt-BR-AntonioNeural',
{ Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, RaquelNeural)', ShortName: 'pt-PT-RaquelNeural', Gender: 'Female', Locale: 'pt-PT' }, Gender: 'Male',
Locale: 'pt-BR'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, FranciscaNeural)',
ShortName: 'pt-BR-FranciscaNeural',
Gender: 'Female',
Locale: 'pt-BR'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, DuarteNeural)',
ShortName: 'pt-PT-DuarteNeural',
Gender: 'Male',
Locale: 'pt-PT'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, RaquelNeural)',
ShortName: 'pt-PT-RaquelNeural',
Gender: 'Female',
Locale: 'pt-PT'
},
// ... (Romanian, Russian, Sinhala, Slovak, Slovenian, Somali, Albanian, Serbian, Sundanese, Swedish, Swahili, Tamil, Telugu, Thai) ... // ... (Romanian, Russian, Sinhala, Slovak, Slovenian, Somali, Albanian, Serbian, Sundanese, Swedish, Swahili, Tamil, Telugu, Thai) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, NiwatNeural)', ShortName: 'th-TH-NiwatNeural', Gender: 'Male', Locale: 'th-TH' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, PremwadeeNeural)', ShortName: 'th-TH-PremwadeeNeural', Gender: 'Female', Locale: 'th-TH' }, Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, NiwatNeural)',
ShortName: 'th-TH-NiwatNeural',
Gender: 'Male',
Locale: 'th-TH'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, PremwadeeNeural)',
ShortName: 'th-TH-PremwadeeNeural',
Gender: 'Female',
Locale: 'th-TH'
},
// ... (Turkish, Ukrainian, Urdu, Uzbek, Vietnamese) ... // ... (Turkish, Ukrainian, Urdu, Uzbek, Vietnamese) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, HoaiMyNeural)', ShortName: 'vi-VN-HoaiMyNeural', Gender: 'Female', Locale: 'vi-VN' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, NamMinhNeural)', ShortName: 'vi-VN-NamMinhNeural', Gender: 'Male', Locale: 'vi-VN' }, Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, HoaiMyNeural)',
ShortName: 'vi-VN-HoaiMyNeural',
Gender: 'Female',
Locale: 'vi-VN'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, NamMinhNeural)',
ShortName: 'vi-VN-NamMinhNeural',
Gender: 'Male',
Locale: 'vi-VN'
},
// ... (Chinese variants) ... // ... (Chinese variants) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)', ShortName: 'zh-CN-XiaoxiaoNeural', Gender: 'Female', Locale: 'zh-CN' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiNeural)', ShortName: 'zh-CN-YunxiNeural', Gender: 'Male', Locale: 'zh-CN' }, Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)',
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunjianNeural)', ShortName: 'zh-CN-YunjianNeural', Gender: 'Male', Locale: 'zh-CN' }, ShortName: 'zh-CN-XiaoxiaoNeural',
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiaNeural)', ShortName: 'zh-CN-YunxiaNeural', Gender: 'Male', Locale: 'zh-CN' }, Gender: 'Female',
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunyangNeural)', ShortName: 'zh-CN-YunyangNeural', Gender: 'Male', Locale: 'zh-CN' }, Locale: 'zh-CN'
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN-liaoning, XiaobeiNeural)', ShortName: 'zh-CN-liaoning-XiaobeiNeural', Gender: 'Female', Locale: 'zh-CN-liaoning' }, },
{
Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiNeural)',
ShortName: 'zh-CN-YunxiNeural',
Gender: 'Male',
Locale: 'zh-CN'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunjianNeural)',
ShortName: 'zh-CN-YunjianNeural',
Gender: 'Male',
Locale: 'zh-CN'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiaNeural)',
ShortName: 'zh-CN-YunxiaNeural',
Gender: 'Male',
Locale: 'zh-CN'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunyangNeural)',
ShortName: 'zh-CN-YunyangNeural',
Gender: 'Male',
Locale: 'zh-CN'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN-liaoning, XiaobeiNeural)',
ShortName: 'zh-CN-liaoning-XiaobeiNeural',
Gender: 'Female',
Locale: 'zh-CN-liaoning'
},
// { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN-shaanxi, XiaoniNeural)', ShortName: 'zh-CN-shaanxi-XiaoniNeural', Gender: 'Female', Locale: 'zh-CN-shaanxi' }, // Example regional voice // { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN-shaanxi, XiaoniNeural)', ShortName: 'zh-CN-shaanxi-XiaoniNeural', Gender: 'Female', Locale: 'zh-CN-shaanxi' }, // Example regional voice
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuGaaiNeural)', ShortName: 'zh-HK-HiuGaaiNeural', Gender: 'Female', Locale: 'zh-HK' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuMaanNeural)', ShortName: 'zh-HK-HiuMaanNeural', Gender: 'Female', Locale: 'zh-HK' }, Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuGaaiNeural)',
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, WanLungNeural)', ShortName: 'zh-HK-WanLungNeural', Gender: 'Male', Locale: 'zh-HK' }, ShortName: 'zh-HK-HiuGaaiNeural',
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoChenNeural)', ShortName: 'zh-TW-HsiaoChenNeural', Gender: 'Female', Locale: 'zh-TW' }, Gender: 'Female',
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoYuNeural)', ShortName: 'zh-TW-HsiaoYuNeural', Gender: 'Female', Locale: 'zh-TW' }, Locale: 'zh-HK'
{ Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, YunJheNeural)', ShortName: 'zh-TW-YunJheNeural', Gender: 'Male', Locale: 'zh-TW' }, },
{
Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuMaanNeural)',
ShortName: 'zh-HK-HiuMaanNeural',
Gender: 'Female',
Locale: 'zh-HK'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, WanLungNeural)',
ShortName: 'zh-HK-WanLungNeural',
Gender: 'Male',
Locale: 'zh-HK'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoChenNeural)',
ShortName: 'zh-TW-HsiaoChenNeural',
Gender: 'Female',
Locale: 'zh-TW'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoYuNeural)',
ShortName: 'zh-TW-HsiaoYuNeural',
Gender: 'Female',
Locale: 'zh-TW'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, YunJheNeural)',
ShortName: 'zh-TW-YunJheNeural',
Gender: 'Male',
Locale: 'zh-TW'
},
// ... (Zulu) ... // ... (Zulu) ...
{ Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThandoNeural)', ShortName: 'zu-ZA-ThandoNeural', Gender: 'Female', Locale: 'zu-ZA' }, {
{ Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThembaNeural)', ShortName: 'zu-ZA-ThembaNeural', Gender: 'Male', Locale: 'zu-ZA' }, Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThandoNeural)',
]; ShortName: 'zu-ZA-ThandoNeural',
Gender: 'Female',
Locale: 'zu-ZA'
},
{
Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThembaNeural)',
ShortName: 'zu-ZA-ThembaNeural',
Gender: 'Male',
Locale: 'zu-ZA'
}
]
// --- END OF HARDCODED VOICE LIST --- // --- END OF HARDCODED VOICE LIST ---
/** /**
* 线TTS服务 * 线TTS服务
* 使线TTS服务API密钥 * 使线TTS服务API密钥
*/ */
class MsTTSService { class MsTTSService {
private static instance: MsTTSService; private static instance: MsTTSService
private tempDir: string; private tempDir: string
private constructor() { private constructor() {
this.tempDir = path.join(app.getPath('temp'), 'cherry-tts'); this.tempDir = path.join(app.getPath('temp'), 'cherry-tts')
if (!fs.existsSync(this.tempDir)) { if (!fs.existsSync(this.tempDir)) {
fs.mkdirSync(this.tempDir, { recursive: true }); fs.mkdirSync(this.tempDir, { recursive: true })
} }
log.info('初始化免费在线TTS服务 (使用硬编码语音列表)'); log.info('初始化免费在线TTS服务 (使用硬编码语音列表)')
} }
public static getInstance(): MsTTSService { public static getInstance(): MsTTSService {
if (!MsTTSService.instance) { if (!MsTTSService.instance) {
MsTTSService.instance = new MsTTSService(); MsTTSService.instance = new MsTTSService()
} }
return MsTTSService.instance; return MsTTSService.instance
} }
/** /**
@ -118,15 +443,15 @@ class MsTTSService {
*/ */
public async getVoices(): Promise<any[]> { public async getVoices(): Promise<any[]> {
try { try {
log.info(`返回硬编码的 ${hardcodedVoices.length} 个语音列表`); log.info(`返回硬编码的 ${hardcodedVoices.length} 个语音列表`)
// 直接返回硬编码的列表 // 直接返回硬编码的列表
// 注意:保持 async 是为了接口兼容性,虽然这里没有实际的异步操作 // 注意:保持 async 是为了接口兼容性,虽然这里没有实际的异步操作
return hardcodedVoices; return hardcodedVoices
} catch (error) { } catch (error) {
// 这个 try/catch 在这里意义不大了,因为返回静态数据不会出错 // 这个 try/catch 在这里意义不大了,因为返回静态数据不会出错
// 但保留结构以防未来改动 // 但保留结构以防未来改动
log.error('获取硬编码语音列表时出错 (理论上不应发生):', error); log.error('获取硬编码语音列表时出错 (理论上不应发生):', error)
return []; // 返回空列表以防万一 return [] // 返回空列表以防万一
} }
} }
@ -140,15 +465,15 @@ class MsTTSService {
public async synthesize(text: string, voice: string, outputFormat: string): Promise<string> { public async synthesize(text: string, voice: string, outputFormat: string): Promise<string> {
try { try {
// 记录详细的请求信息 // 记录详细的请求信息
log.info(`微软在线TTS合成语音: 文本="${text.substring(0, 30)}...", 语音=${voice}, 格式=${outputFormat}`); log.info(`微软在线TTS合成语音: 文本="${text.substring(0, 30)}...", 语音=${voice}, 格式=${outputFormat}`)
// 验证输入参数 // 验证输入参数
if (!text || text.trim() === '') { if (!text || text.trim() === '') {
throw new Error('要合成的文本不能为空'); throw new Error('要合成的文本不能为空')
} }
if (!voice || voice.trim() === '') { if (!voice || voice.trim() === '') {
throw new Error('语音名称不能为空'); throw new Error('语音名称不能为空')
} }
// 创建一个新的EdgeTTS实例并设置参数 // 创建一个新的EdgeTTS实例并设置参数
@ -157,80 +482,80 @@ class MsTTSService {
voice: voice, voice: voice,
outputFormat: outputFormat, outputFormat: outputFormat,
timeout: 30000, // 30秒超时 timeout: 30000, // 30秒超时
rate: '+0%', // 正常语速 rate: '+0%', // 正常语速
pitch: '+0Hz', // 正常音调 pitch: '+0Hz', // 正常音调
volume: '+0%' // 正常音量 volume: '+0%' // 正常音量
}); })
// 生成临时文件路径 // 生成临时文件路径
const timestamp = Date.now(); const timestamp = Date.now()
const fileExtension = outputFormat.includes('mp3') ? 'mp3' : outputFormat.split('-').pop() || 'audio'; const fileExtension = outputFormat.includes('mp3') ? 'mp3' : outputFormat.split('-').pop() || 'audio'
const outputPath = path.join(this.tempDir, `tts_${timestamp}.${fileExtension}`); const outputPath = path.join(this.tempDir, `tts_${timestamp}.${fileExtension}`)
log.info(`开始生成语音文件: ${outputPath}`); log.info(`开始生成语音文件: ${outputPath}`)
// 使用ttsPromise方法生成文件 // 使用ttsPromise方法生成文件
await tts.ttsPromise(text, outputPath); await tts.ttsPromise(text, outputPath)
// 验证生成的文件是否存在且大小大于0 // 验证生成的文件是否存在且大小大于0
if (!fs.existsSync(outputPath)) { if (!fs.existsSync(outputPath)) {
throw new Error(`生成的语音文件不存在: ${outputPath}`); throw new Error(`生成的语音文件不存在: ${outputPath}`)
} }
const stats = fs.statSync(outputPath); const stats = fs.statSync(outputPath)
if (stats.size === 0) { if (stats.size === 0) {
throw new Error(`生成的语音文件大小为0: ${outputPath}`); throw new Error(`生成的语音文件大小为0: ${outputPath}`)
} }
log.info(`微软在线TTS合成成功: ${outputPath}, 文件大小: ${stats.size} 字节`); log.info(`微软在线TTS合成成功: ${outputPath}, 文件大小: ${stats.size} 字节`)
return outputPath; return outputPath
} catch (error: any) { } catch (error: any) {
// 记录详细的错误信息 // 记录详细的错误信息
log.error(`微软在线TTS语音合成失败 (语音=${voice}):`, error); log.error(`微软在线TTS语音合成失败 (语音=${voice}):`, error)
// 尝试提供更有用的错误信息 // 尝试提供更有用的错误信息
if (error.message && typeof error.message === 'string') { if (error.message && typeof error.message === 'string') {
if (error.message.includes('Timed out')) { if (error.message.includes('Timed out')) {
throw new Error(`语音合成超时,请检查网络连接或尝试其他语音`); throw new Error(`语音合成超时,请检查网络连接或尝试其他语音`)
} else if (error.message.includes('ENOTFOUND')) { } else if (error.message.includes('ENOTFOUND')) {
throw new Error(`无法连接到微软语音服务,请检查网络连接`); throw new Error(`无法连接到微软语音服务,请检查网络连接`)
} else if (error.message.includes('ECONNREFUSED')) { } else if (error.message.includes('ECONNREFUSED')) {
throw new Error(`连接被拒绝,请检查网络设置或代理配置`); throw new Error(`连接被拒绝,请检查网络设置或代理配置`)
} }
} }
throw error; throw error
} }
} }
/** /**
* () * ()
*/ */
public async cleanupTempDir(): Promise<void> { public async cleanupTempDir(): Promise<void> {
// (Cleanup method remains the same) // (Cleanup method remains the same)
try { try {
const files = await fs.promises.readdir(this.tempDir); const files = await fs.promises.readdir(this.tempDir)
for (const file of files) { for (const file of files) {
if (file.startsWith('tts_')) { if (file.startsWith('tts_')) {
await fs.promises.unlink(path.join(this.tempDir, file)); await fs.promises.unlink(path.join(this.tempDir, file))
} }
} }
log.info('TTS 临时文件已清理'); log.info('TTS 临时文件已清理')
} catch (error) { } catch (error) {
log.error('清理 TTS 临时文件失败:', error); log.error('清理 TTS 临时文件失败:', error)
} }
} }
} }
// 导出单例方法 (保持不变) // 导出单例方法 (保持不变)
export const getVoices = async () => { export const getVoices = async () => {
return await MsTTSService.getInstance().getVoices(); return await MsTTSService.getInstance().getVoices()
}; }
export const synthesize = async (text: string, voice: string, outputFormat: string) => { export const synthesize = async (text: string, voice: string, outputFormat: string) => {
return await MsTTSService.getInstance().synthesize(text, voice, outputFormat); return await MsTTSService.getInstance().synthesize(text, voice, outputFormat)
}; }
export const cleanupTtsTempFiles = async () => { export const cleanupTtsTempFiles = async () => {
await MsTTSService.getInstance().cleanupTempDir(); await MsTTSService.getInstance().cleanupTempDir()
}; }

View File

@ -1,35 +1,36 @@
import React, { useState } from 'react'; import { LoadingOutlined, PhoneOutlined } from '@ant-design/icons'
import { Button, Tooltip } from 'antd'; import { Button, Tooltip } from 'antd'
import { PhoneOutlined, LoadingOutlined } from '@ant-design/icons'; import React, { useState } from 'react'
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next'
import VoiceCallModal from './VoiceCallModal';
import { VoiceCallService } from '../services/VoiceCallService'; import { VoiceCallService } from '../services/VoiceCallService'
import VoiceCallModal from './VoiceCallModal'
interface Props { interface Props {
disabled?: boolean; disabled?: boolean
style?: React.CSSProperties; style?: React.CSSProperties
} }
const VoiceCallButton: React.FC<Props> = ({ disabled = false, style }) => { const VoiceCallButton: React.FC<Props> = ({ disabled = false, style }) => {
const { t } = useTranslation(); const { t } = useTranslation()
const [isModalVisible, setIsModalVisible] = useState(false); const [isModalVisible, setIsModalVisible] = useState(false)
const [isLoading, setIsLoading] = useState(false); const [isLoading, setIsLoading] = useState(false)
const handleClick = async () => { const handleClick = async () => {
if (disabled || isLoading) return; if (disabled || isLoading) return
setIsLoading(true); setIsLoading(true)
try { try {
// 初始化语音服务 // 初始化语音服务
await VoiceCallService.initialize(); await VoiceCallService.initialize()
setIsModalVisible(true); setIsModalVisible(true)
} catch (error) { } catch (error) {
console.error('Failed to initialize voice call:', error); console.error('Failed to initialize voice call:', error)
window.message.error(t('voice_call.initialization_failed')); window.message.error(t('voice_call.initialization_failed'))
} finally { } finally {
setIsLoading(false); setIsLoading(false)
} }
}; }
return ( return (
<> <>
@ -42,14 +43,9 @@ const VoiceCallButton: React.FC<Props> = ({ disabled = false, style }) => {
style={style} style={style}
/> />
</Tooltip> </Tooltip>
{isModalVisible && ( {isModalVisible && <VoiceCallModal visible={isModalVisible} onClose={() => setIsModalVisible(false)} />}
<VoiceCallModal
visible={isModalVisible}
onClose={() => setIsModalVisible(false)}
/>
)}
</> </>
); )
}; }
export default VoiceCallButton; export default VoiceCallButton

View File

@ -1,5 +1,3 @@
import React, { useEffect, useState } from 'react';
import { Modal, Button, Space, Tooltip } from 'antd';
import { import {
AudioMutedOutlined, AudioMutedOutlined,
AudioOutlined, AudioOutlined,
@ -7,125 +5,128 @@ import {
PauseCircleOutlined, PauseCircleOutlined,
PlayCircleOutlined, PlayCircleOutlined,
SoundOutlined SoundOutlined
} from '@ant-design/icons'; } from '@ant-design/icons'
import styled from 'styled-components'; import { Button, Modal, Space, Tooltip } from 'antd'
import { useTranslation } from 'react-i18next'; import React, { useCallback, useEffect, useState } from 'react'
import VoiceVisualizer from './VoiceVisualizer'; import { useTranslation } from 'react-i18next'
import { VoiceCallService } from '../services/VoiceCallService'; import styled from 'styled-components'
import { VoiceCallService } from '../services/VoiceCallService'
import VoiceVisualizer from './VoiceVisualizer'
interface Props { interface Props {
visible: boolean; visible: boolean
onClose: () => void; onClose: () => void
} }
const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => { const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {
const { t } = useTranslation(); const { t } = useTranslation()
const [isMuted, setIsMuted] = useState(false); const [isMuted, setIsMuted] = useState(false)
const [isPaused, setIsPaused] = useState(false); const [isPaused, setIsPaused] = useState(false)
const [transcript, setTranscript] = useState(''); const [transcript, setTranscript] = useState('')
const [response, setResponse] = useState(''); const [response, setResponse] = useState('')
const [isListening, setIsListening] = useState(false); const [isListening, setIsListening] = useState(false)
const [isSpeaking, setIsSpeaking] = useState(false); const [isSpeaking, setIsSpeaking] = useState(false)
const [isRecording, setIsRecording] = useState(false); const [isRecording, setIsRecording] = useState(false)
const [isProcessing, setIsProcessing] = useState(false); const [isProcessing, setIsProcessing] = useState(false)
const handleClose = useCallback(() => {
VoiceCallService.endCall()
onClose()
}, [onClose])
useEffect(() => { useEffect(() => {
const startVoiceCall = async () => { const startVoiceCall = async () => {
try { try {
await VoiceCallService.startCall({ await VoiceCallService.startCall({
onTranscript: (text) => setTranscript(text), onTranscript: (text: string) => setTranscript(text),
onResponse: (text) => setResponse(text), onResponse: (text: string) => setResponse(text),
onListeningStateChange: setIsListening, onListeningStateChange: setIsListening,
onSpeakingStateChange: setIsSpeaking, onSpeakingStateChange: setIsSpeaking
}); })
} catch (error) { } catch (error) {
console.error('Voice call error:', error); console.error('Voice call error:', error)
window.message.error(t('voice_call.error')); window.message.error(t('voice_call.error'))
handleClose(); handleClose()
} }
}; }
if (visible) { if (visible) {
startVoiceCall(); startVoiceCall()
} }
return () => { return () => {
VoiceCallService.endCall(); VoiceCallService.endCall()
}; }
}, [visible, t]); }, [visible, t, handleClose])
const handleClose = () => {
VoiceCallService.endCall();
onClose();
};
const toggleMute = () => { const toggleMute = () => {
const newMuteState = !isMuted; const newMuteState = !isMuted
setIsMuted(newMuteState); setIsMuted(newMuteState)
VoiceCallService.setMuted(newMuteState); VoiceCallService.setMuted(newMuteState)
}; }
const togglePause = () => { const togglePause = () => {
const newPauseState = !isPaused; const newPauseState = !isPaused
setIsPaused(newPauseState); setIsPaused(newPauseState)
VoiceCallService.setPaused(newPauseState); VoiceCallService.setPaused(newPauseState)
}; }
// 长按说话相关处理 // 长按说话相关处理
const handleRecordStart = async (e: React.MouseEvent | React.TouchEvent) => { const handleRecordStart = async (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault(); // 防止触摸事件的默认行为 e.preventDefault() // 防止触摸事件的默认行为
if (isProcessing || isPaused) return; if (isProcessing || isPaused) return
setIsRecording(true); setIsRecording(true)
await VoiceCallService.startRecording(); await VoiceCallService.startRecording()
}; }
const handleRecordEnd = async (e: React.MouseEvent | React.TouchEvent) => { const handleRecordEnd = async (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault(); // 防止触摸事件的默认行为 e.preventDefault() // 防止触摸事件的默认行为
if (!isRecording) return; if (!isRecording) return
// 立即更新UI状态 // 立即更新UI状态
setIsRecording(false); setIsRecording(false)
setIsProcessing(true); setIsProcessing(true)
// 确保录音完全停止 // 确保录音完全停止
try { try {
await VoiceCallService.stopRecording(); await VoiceCallService.stopRecording()
console.log('录音已停止'); console.log('录音已停止')
} catch (error) { } catch (error) {
console.error('停止录音出错:', error); console.error('停止录音出错:', error)
} }
// 处理结果会通过回调函数返回,不需要在这里处理 // 处理结果会通过回调函数返回,不需要在这里处理
setTimeout(() => { setTimeout(() => {
setIsProcessing(false); setIsProcessing(false)
}, 500); // 添加短暂延迟,防止用户立即再次点击 }, 500) // 添加短暂延迟,防止用户立即再次点击
}; }
// 处理鼠标/触摸离开按钮的情况 // 处理鼠标/触摸离开按钮的情况
const handleRecordCancel = async (e: React.MouseEvent | React.TouchEvent) => { const handleRecordCancel = async (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault(); e.preventDefault()
if (isRecording) { if (isRecording) {
// 立即更新UI状态 // 立即更新UI状态
setIsRecording(false); setIsRecording(false)
setIsProcessing(true); setIsProcessing(true)
// 取消录音不发送给AI // 取消录音不发送给AI
try { try {
await VoiceCallService.cancelRecording(); await VoiceCallService.cancelRecording()
console.log('录音已取消'); console.log('录音已取消')
} catch (error) { } catch (error) {
console.error('取消录音出错:', error); console.error('取消录音出错:', error)
} }
setTimeout(() => { setTimeout(() => {
setIsProcessing(false); setIsProcessing(false)
}, 500); }, 500)
} }
}; }
return ( return (
<Modal <Modal
@ -135,8 +136,7 @@ const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {
footer={null} footer={null}
width={500} width={500}
centered centered
maskClosable={false} maskClosable={false}>
>
<Container> <Container>
<VisualizerContainer> <VisualizerContainer>
<VoiceVisualizer isActive={isListening || isRecording} type="input" /> <VoiceVisualizer isActive={isListening || isRecording} type="input" />
@ -174,7 +174,7 @@ const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {
/> />
<Tooltip title={t('voice_call.press_to_talk')}> <Tooltip title={t('voice_call.press_to_talk')}>
<RecordButton <RecordButton
type={isRecording ? "primary" : "default"} type={isRecording ? 'primary' : 'default'}
icon={<SoundOutlined />} icon={<SoundOutlined />}
onMouseDown={handleRecordStart} onMouseDown={handleRecordStart}
onMouseUp={handleRecordEnd} onMouseUp={handleRecordEnd}
@ -183,8 +183,7 @@ const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {
onTouchEnd={handleRecordEnd} onTouchEnd={handleRecordEnd}
onTouchCancel={handleRecordCancel} onTouchCancel={handleRecordCancel}
size="large" size="large"
disabled={isProcessing || isPaused} disabled={isProcessing || isPaused}>
>
{isRecording ? t('voice_call.release_to_send') : t('voice_call.press_to_talk')} {isRecording ? t('voice_call.release_to_send') : t('voice_call.press_to_talk')}
</RecordButton> </RecordButton>
</Tooltip> </Tooltip>
@ -200,21 +199,21 @@ const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {
</ControlsContainer> </ControlsContainer>
</Container> </Container>
</Modal> </Modal>
); )
}; }
const Container = styled.div` const Container = styled.div`
display: flex; display: flex;
flex-direction: column; flex-direction: column;
gap: 20px; gap: 20px;
height: 400px; height: 400px;
`; `
const VisualizerContainer = styled.div` const VisualizerContainer = styled.div`
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
height: 100px; height: 100px;
`; `
const TranscriptContainer = styled.div` const TranscriptContainer = styled.div`
flex: 1; flex: 1;
@ -223,33 +222,33 @@ const TranscriptContainer = styled.div`
border-radius: 8px; border-radius: 8px;
padding: 16px; padding: 16px;
background-color: var(--color-background-2); background-color: var(--color-background-2);
`; `
const TranscriptText = styled.p` const TranscriptText = styled.p`
margin-bottom: 8px; margin-bottom: 8px;
color: var(--color-text-1); color: var(--color-text-1);
`; `
const ResponseText = styled.p` const ResponseText = styled.p`
margin-bottom: 8px; margin-bottom: 8px;
color: var(--color-primary); color: var(--color-primary);
`; `
const UserLabel = styled.span` const UserLabel = styled.span`
font-weight: bold; font-weight: bold;
color: var(--color-text-1); color: var(--color-text-1);
`; `
const AILabel = styled.span` const AILabel = styled.span`
font-weight: bold; font-weight: bold;
color: var(--color-primary); color: var(--color-primary);
`; `
const ControlsContainer = styled.div` const ControlsContainer = styled.div`
display: flex; display: flex;
justify-content: center; justify-content: center;
padding: 10px 0; padding: 10px 0;
`; `
const RecordButton = styled(Button)` const RecordButton = styled(Button)`
min-width: 150px; min-width: 150px;
@ -258,6 +257,6 @@ const RecordButton = styled(Button)`
&:active { &:active {
transform: scale(0.95); transform: scale(0.95);
} }
`; `
export default VoiceCallModal; export default VoiceCallModal

View File

@ -1,74 +1,74 @@
import React, { useEffect, useRef } from 'react'; import React, { useEffect, useRef } from 'react'
import styled from 'styled-components'; import { useTranslation } from 'react-i18next'
import { useTranslation } from 'react-i18next'; import styled from 'styled-components'
interface Props { interface Props {
isActive: boolean; isActive: boolean
type: 'input' | 'output'; type: 'input' | 'output'
} }
const VoiceVisualizer: React.FC<Props> = ({ isActive, type }) => { const VoiceVisualizer: React.FC<Props> = ({ isActive, type }) => {
const { t } = useTranslation(); const { t } = useTranslation()
const canvasRef = useRef<HTMLCanvasElement>(null); const canvasRef = useRef<HTMLCanvasElement>(null)
const animationRef = useRef<number | undefined>(undefined); const animationRef = useRef<number | undefined>(undefined)
useEffect(() => { useEffect(() => {
const canvas = canvasRef.current; const canvas = canvasRef.current
if (!canvas) return; if (!canvas) return
const ctx = canvas.getContext('2d'); const ctx = canvas.getContext('2d')
if (!ctx) return; if (!ctx) return
const width = canvas.width; const width = canvas.width
const height = canvas.height; const height = canvas.height
const drawVisualizer = () => { const drawVisualizer = () => {
ctx.clearRect(0, 0, width, height); ctx.clearRect(0, 0, width, height)
if (!isActive) { if (!isActive) {
// 绘制静态波形 // 绘制静态波形
ctx.beginPath(); ctx.beginPath()
ctx.moveTo(0, height / 2); ctx.moveTo(0, height / 2)
ctx.lineTo(width, height / 2); ctx.lineTo(width, height / 2)
ctx.strokeStyle = type === 'input' ? 'var(--color-text-2)' : 'var(--color-primary)'; ctx.strokeStyle = type === 'input' ? 'var(--color-text-2)' : 'var(--color-primary)'
ctx.lineWidth = 2; ctx.lineWidth = 2
ctx.stroke(); ctx.stroke()
return; return
} }
// 绘制动态波形 // 绘制动态波形
const barCount = 30; const barCount = 30
const barWidth = width / barCount; const barWidth = width / barCount
const color = type === 'input' ? 'var(--color-text-1)' : 'var(--color-primary)'; const color = type === 'input' ? 'var(--color-text-1)' : 'var(--color-primary)'
for (let i = 0; i < barCount; i++) { for (let i = 0; i < barCount; i++) {
const barHeight = Math.random() * (height / 2) + 10; const barHeight = Math.random() * (height / 2) + 10
const x = i * barWidth; const x = i * barWidth
const y = height / 2 - barHeight / 2; const y = height / 2 - barHeight / 2
ctx.fillStyle = color; ctx.fillStyle = color
ctx.fillRect(x, y, barWidth - 2, barHeight); ctx.fillRect(x, y, barWidth - 2, barHeight)
} }
animationRef.current = requestAnimationFrame(drawVisualizer); animationRef.current = requestAnimationFrame(drawVisualizer)
}; }
drawVisualizer(); drawVisualizer()
return () => { return () => {
if (animationRef.current) { if (animationRef.current) {
cancelAnimationFrame(animationRef.current); cancelAnimationFrame(animationRef.current)
} }
}; }
}, [isActive, type]); }, [isActive, type])
return ( return (
<Container $type={type}> <Container $type={type}>
<Label>{type === 'input' ? t('voice_call.you') : t('voice_call.ai')}</Label> <Label>{type === 'input' ? t('voice_call.you') : t('voice_call.ai')}</Label>
<Canvas ref={canvasRef} width={200} height={50} /> <Canvas ref={canvasRef} width={200} height={50} />
</Container> </Container>
); )
}; }
const Container = styled.div<{ $type: 'input' | 'output' }>` const Container = styled.div<{ $type: 'input' | 'output' }>`
display: flex; display: flex;
@ -77,21 +77,17 @@ const Container = styled.div<{ $type: 'input' | 'output' }>`
width: 45%; width: 45%;
border-radius: 8px; border-radius: 8px;
padding: 10px; padding: 10px;
background-color: ${props => background-color: ${(props) => (props.$type === 'input' ? 'var(--color-background-3)' : 'var(--color-primary-bg)')};
props.$type === 'input' `
? 'var(--color-background-3)'
: 'var(--color-primary-bg)'
};
`;
const Label = styled.div` const Label = styled.div`
margin-bottom: 8px; margin-bottom: 8px;
font-weight: bold; font-weight: bold;
`; `
const Canvas = styled.canvas` const Canvas = styled.canvas`
width: 100%; width: 100%;
height: 50px; height: 50px;
`; `
export default VoiceVisualizer; export default VoiceVisualizer

View File

@ -1507,6 +1507,21 @@
"quit": "終了", "quit": "終了",
"show_window": "ウィンドウを表示", "show_window": "ウィンドウを表示",
"visualization": "可視化" "visualization": "可視化"
},
"voice_call": {
"title": "[to be translated]:语音通话",
"start": "[to be translated]:开始语音通话",
"end": "[to be translated]:结束通话",
"mute": "[to be translated]:静音",
"unmute": "[to be translated]:取消静音",
"pause": "[to be translated]:暂停",
"resume": "[to be translated]:继续",
"you": "[to be translated]:您",
"ai": "[to be translated]:AI",
"press_to_talk": "[to be translated]:长按说话",
"release_to_send": "[to be translated]:松开发送",
"initialization_failed": "[to be translated]:初始化语音通话失败",
"error": "[to be translated]:语音通话出错"
} }
} }
} }

View File

@ -1507,6 +1507,21 @@
"quit": "Выйти", "quit": "Выйти",
"show_window": "Показать окно", "show_window": "Показать окно",
"visualization": "Визуализация" "visualization": "Визуализация"
},
"voice_call": {
"title": "[to be translated]:语音通话",
"start": "[to be translated]:开始语音通话",
"end": "[to be translated]:结束通话",
"mute": "[to be translated]:静音",
"unmute": "[to be translated]:取消静音",
"pause": "[to be translated]:暂停",
"resume": "[to be translated]:继续",
"you": "[to be translated]:您",
"ai": "[to be translated]:AI",
"press_to_talk": "[to be translated]:长按说话",
"release_to_send": "[to be translated]:松开发送",
"initialization_failed": "[to be translated]:初始化语音通话失败",
"error": "[to be translated]:语音通话出错"
} }
} }
} }

View File

@ -1507,6 +1507,21 @@
"quit": "結束", "quit": "結束",
"show_window": "顯示視窗", "show_window": "顯示視窗",
"visualization": "視覺化" "visualization": "視覺化"
},
"voice_call": {
"title": "[to be translated]:语音通话",
"start": "[to be translated]:开始语音通话",
"end": "[to be translated]:结束通话",
"mute": "[to be translated]:静音",
"unmute": "[to be translated]:取消静音",
"pause": "[to be translated]:暂停",
"resume": "[to be translated]:继续",
"you": "[to be translated]:您",
"ai": "[to be translated]:AI",
"press_to_talk": "[to be translated]:长按说话",
"release_to_send": "[to be translated]:松开发送",
"initialization_failed": "[to be translated]:初始化语音通话失败",
"error": "[to be translated]:语音通话出错"
} }
} }
} }

View File

@ -407,10 +407,12 @@ const MessageMenubar: FC<Props> = (props) => {
)} )}
{isAssistantMessage && ttsEnabled && ( {isAssistantMessage && ttsEnabled && (
<Tooltip title={t('chat.tts.play')} mouseEnterDelay={0.8}> <Tooltip title={t('chat.tts.play')} mouseEnterDelay={0.8}>
<ActionButton className="message-action-button" onClick={() => { <ActionButton
console.log('点击MessageMenubar中的TTS按钮开始播放消息') className="message-action-button"
TTSService.speakFromMessage(message) onClick={() => {
}}> console.log('点击MessageMenubar中的TTS按钮开始播放消息')
TTSService.speakFromMessage(message)
}}>
<SoundOutlined /> <SoundOutlined />
</ActionButton> </ActionButton>
</Tooltip> </Tooltip>

View File

@ -14,19 +14,19 @@ import {
setTtsEnabled, setTtsEnabled,
setTtsFilterOptions, setTtsFilterOptions,
setTtsModel, setTtsModel,
setTtsMsOutputFormat,
setTtsMsVoice,
setTtsServiceType, setTtsServiceType,
setTtsVoice,
setTtsSiliconflowApiKey, setTtsSiliconflowApiKey,
setTtsSiliconflowApiUrl, setTtsSiliconflowApiUrl,
setTtsSiliconflowVoice,
setTtsSiliconflowModel, setTtsSiliconflowModel,
setTtsSiliconflowResponseFormat, setTtsSiliconflowResponseFormat,
setTtsSiliconflowSpeed, setTtsSiliconflowSpeed,
setTtsMsVoice, setTtsSiliconflowVoice,
setTtsMsOutputFormat setTtsVoice
} from '@renderer/store/settings' } from '@renderer/store/settings'
import { Button, Form, Input, InputNumber, message, Select, Space, Switch, Tabs, Tag } from 'antd' import { Button, Form, Input, InputNumber, message, Select, Space, Switch, Tabs, Tag } from 'antd'
import { FC, useEffect, useState, useCallback } from 'react' import { FC, useCallback, useEffect, useState } from 'react'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import { useSelector } from 'react-redux' import { useSelector } from 'react-redux'
import styled from 'styled-components' import styled from 'styled-components'
@ -176,32 +176,30 @@ const TTSSettings: FC = () => {
// 免费在线TTS可用的语音列表 // 免费在线TTS可用的语音列表
const [msTtsVoices, setMsTtsVoices] = useState<{ label: string; value: string }[]>([]) const [msTtsVoices, setMsTtsVoices] = useState<{ label: string; value: string }[]>([])
// 获取免费在线TTS可用的语音列表 // 获取免费在线TTS可用的语音列表
const getMsTtsVoices = useCallback(async () => { const getMsTtsVoices = useCallback(async () => {
try { try {
// 调用API获取免费在线TTS语音列表 // 调用API获取免费在线TTS语音列表
const response = await window.api.msTTS.getVoices(); const response = await window.api.msTTS.getVoices()
console.log('获取到的免费在线TTS语音列表:', response); console.log('获取到的免费在线TTS语音列表:', response)
// 转换为选项格式 // 转换为选项格式
const voices = response.map((voice: any) => ({ const voices = response.map((voice: any) => ({
label: `${voice.ShortName} (${voice.Gender === 'Female' ? '女声' : '男声'})`, label: `${voice.ShortName} (${voice.Gender === 'Female' ? '女声' : '男声'})`,
value: voice.ShortName value: voice.ShortName
})); }))
// 按语言和性别排序 // 按语言和性别排序
voices.sort((a: any, b: any) => { voices.sort((a: any, b: any) => {
const localeA = a.value.split('-')[0] + a.value.split('-')[1]; const localeA = a.value.split('-')[0] + a.value.split('-')[1]
const localeB = b.value.split('-')[0] + b.value.split('-')[1]; const localeB = b.value.split('-')[0] + b.value.split('-')[1]
if (localeA !== localeB) return localeA.localeCompare(localeB); if (localeA !== localeB) return localeA.localeCompare(localeB)
return a.label.localeCompare(b.label); return a.label.localeCompare(b.label)
}); })
setMsTtsVoices(voices); setMsTtsVoices(voices)
} catch (error) { } catch (error) {
console.error('获取免费在线TTS语音列表失败:', error); console.error('获取免费在线TTS语音列表失败:', error)
// 如果获取失败,设置一些默认的中文语音 // 如果获取失败,设置一些默认的中文语音
setMsTtsVoices([ setMsTtsVoices([
{ label: 'zh-CN-XiaoxiaoNeural (女声)', value: 'zh-CN-XiaoxiaoNeural' }, { label: 'zh-CN-XiaoxiaoNeural (女声)', value: 'zh-CN-XiaoxiaoNeural' },
@ -211,10 +209,10 @@ const TTSSettings: FC = () => {
{ label: 'zh-CN-XiaomoNeural (女声)', value: 'zh-CN-XiaomoNeural' }, { label: 'zh-CN-XiaomoNeural (女声)', value: 'zh-CN-XiaomoNeural' },
{ label: 'zh-CN-XiaoxuanNeural (女声)', value: 'zh-CN-XiaoxuanNeural' }, { label: 'zh-CN-XiaoxuanNeural (女声)', value: 'zh-CN-XiaoxuanNeural' },
{ label: 'zh-CN-XiaoruiNeural (女声)', value: 'zh-CN-XiaoruiNeural' }, { label: 'zh-CN-XiaoruiNeural (女声)', value: 'zh-CN-XiaoruiNeural' },
{ label: 'zh-CN-YunfengNeural (男声)', value: 'zh-CN-YunfengNeural' }, { label: 'zh-CN-YunfengNeural (男声)', value: 'zh-CN-YunfengNeural' }
]); ])
} }
}, []); }, [])
// 获取浏览器可用的语音列表 // 获取浏览器可用的语音列表
const getVoices = useCallback(() => { const getVoices = useCallback(() => {
@ -323,8 +321,8 @@ const TTSSettings: FC = () => {
// 获取免费在线TTS语音列表 // 获取免费在线TTS语音列表
useEffect(() => { useEffect(() => {
// 获取免费在线TTS语音列表 // 获取免费在线TTS语音列表
getMsTtsVoices(); getMsTtsVoices()
}, [getMsTtsVoices]); }, [getMsTtsVoices])
useEffect(() => { useEffect(() => {
// 初始化语音合成引擎 // 初始化语音合成引擎
@ -634,9 +632,7 @@ const TTSSettings: FC = () => {
<Select <Select
value={ttsSiliconflowModel} value={ttsSiliconflowModel}
onChange={(value) => dispatch(setTtsSiliconflowModel(value))} onChange={(value) => dispatch(setTtsSiliconflowModel(value))}
options={[ options={[{ label: 'FunAudioLLM/CosyVoice2-0.5B', value: 'FunAudioLLM/CosyVoice2-0.5B' }]}
{ label: 'FunAudioLLM/CosyVoice2-0.5B', value: 'FunAudioLLM/CosyVoice2-0.5B' }
]}
disabled={!ttsEnabled} disabled={!ttsEnabled}
style={{ width: '100%' }} style={{ width: '100%' }}
placeholder={t('settings.tts.siliconflow_model.placeholder')} placeholder={t('settings.tts.siliconflow_model.placeholder')}
@ -726,24 +722,28 @@ const TTSSettings: FC = () => {
<Form.Item label={t('settings.tts.mstts.voice')} style={{ marginBottom: 16 }}> <Form.Item label={t('settings.tts.mstts.voice')} style={{ marginBottom: 16 }}>
<VoiceSelectContainer> <VoiceSelectContainer>
<Select <Select
value={ttsMsVoice} value={ttsMsVoice}
onChange={(value) => dispatch(setTtsMsVoice(value))} onChange={(value) => dispatch(setTtsMsVoice(value))}
disabled={!ttsEnabled} disabled={!ttsEnabled}
style={{ width: '100%' }} style={{ width: '100%' }}
options={msTtsVoices.length > 0 ? msTtsVoices : [ options={
{ label: 'zh-CN-XiaoxiaoNeural (女声)', value: 'zh-CN-XiaoxiaoNeural' }, msTtsVoices.length > 0
{ label: 'zh-CN-YunxiNeural (男声)', value: 'zh-CN-YunxiNeural' }, ? msTtsVoices
{ label: 'zh-CN-YunyangNeural (男声)', value: 'zh-CN-YunyangNeural' }, : [
{ label: 'zh-CN-XiaohanNeural (女声)', value: 'zh-CN-XiaohanNeural' }, { label: 'zh-CN-XiaoxiaoNeural (女声)', value: 'zh-CN-XiaoxiaoNeural' },
{ label: 'zh-CN-XiaomoNeural (女声)', value: 'zh-CN-XiaomoNeural' }, { label: 'zh-CN-YunxiNeural (男声)', value: 'zh-CN-YunxiNeural' },
{ label: 'zh-CN-XiaoxuanNeural (女声)', value: 'zh-CN-XiaoxuanNeural' }, { label: 'zh-CN-YunyangNeural (男声)', value: 'zh-CN-YunyangNeural' },
{ label: 'zh-CN-XiaoruiNeural (女声)', value: 'zh-CN-XiaoruiNeural' }, { label: 'zh-CN-XiaohanNeural (女声)', value: 'zh-CN-XiaohanNeural' },
{ label: 'zh-CN-YunfengNeural (男声)', value: 'zh-CN-YunfengNeural' }, { label: 'zh-CN-XiaomoNeural (女声)', value: 'zh-CN-XiaomoNeural' },
]} { label: 'zh-CN-XiaoxuanNeural (女声)', value: 'zh-CN-XiaoxuanNeural' },
showSearch { label: 'zh-CN-XiaoruiNeural (女声)', value: 'zh-CN-XiaoruiNeural' },
optionFilterProp="label" { label: 'zh-CN-YunfengNeural (男声)', value: 'zh-CN-YunfengNeural' }
placeholder={t('settings.tts.voice.placeholder', { defaultValue: '请选择音色' })} ]
notFoundContent={t('settings.tts.voice.not_found', { defaultValue: '未找到音色' })} }
showSearch
optionFilterProp="label"
placeholder={t('settings.tts.voice.placeholder', { defaultValue: '请选择音色' })}
notFoundContent={t('settings.tts.voice.not_found', { defaultValue: '未找到音色' })}
/> />
<Button <Button
icon={<ReloadOutlined />} icon={<ReloadOutlined />}
@ -754,7 +754,10 @@ const TTSSettings: FC = () => {
</VoiceSelectContainer> </VoiceSelectContainer>
{msTtsVoices.length > 0 && ( {msTtsVoices.length > 0 && (
<InfoText> <InfoText>
{t('settings.tts.mstts.available_count', { count: msTtsVoices.length, defaultValue: '可用语音: {{count}}个' })} {t('settings.tts.mstts.available_count', {
count: msTtsVoices.length,
defaultValue: '可用语音: {{count}}个'
})}
</InfoText> </InfoText>
)} )}
</Form.Item> </Form.Item>
@ -767,11 +770,15 @@ const TTSSettings: FC = () => {
options={[ options={[
{ label: 'MP3 (24kHz, 48kbps)', value: 'audio-24khz-48kbitrate-mono-mp3' }, { label: 'MP3 (24kHz, 48kbps)', value: 'audio-24khz-48kbitrate-mono-mp3' },
{ label: 'MP3 (24kHz, 96kbps)', value: 'audio-24khz-96kbitrate-mono-mp3' }, { label: 'MP3 (24kHz, 96kbps)', value: 'audio-24khz-96kbitrate-mono-mp3' },
{ label: 'Webm (24kHz)', value: 'webm-24khz-16bit-mono-opus' }, { label: 'Webm (24kHz)', value: 'webm-24khz-16bit-mono-opus' }
]} ]}
/> />
</Form.Item> </Form.Item>
<InfoText>{t('settings.tts.mstts.info', { defaultValue: '免费在线TTS服务不需要API密钥完全免费使用。' })}</InfoText> <InfoText>
{t('settings.tts.mstts.info', {
defaultValue: '免费在线TTS服务不需要API密钥完全免费使用。'
})}
</InfoText>
</> </>
)} )}
@ -960,11 +967,8 @@ const TTSSettings: FC = () => {
!ttsEnabled || !ttsEnabled ||
(ttsServiceType === 'openai' && (!ttsApiKey || !ttsVoice || !ttsModel)) || (ttsServiceType === 'openai' && (!ttsApiKey || !ttsVoice || !ttsModel)) ||
(ttsServiceType === 'edge' && !ttsEdgeVoice) || (ttsServiceType === 'edge' && !ttsEdgeVoice) ||
(ttsServiceType === 'siliconflow' && ( (ttsServiceType === 'siliconflow' &&
!ttsSiliconflowApiKey || (!ttsSiliconflowApiKey || !ttsSiliconflowVoice || !ttsSiliconflowModel))
!ttsSiliconflowVoice ||
!ttsSiliconflowModel
))
}> }>
{t('settings.tts.test')} {t('settings.tts.test')}
</Button> </Button>

View File

@ -138,9 +138,9 @@ class ASRService {
// 如果有回调函数,调用一次空字符串,触发按钮状态重置 // 如果有回调函数,调用一次空字符串,触发按钮状态重置
if (this.resultCallback && typeof this.resultCallback === 'function') { if (this.resultCallback && typeof this.resultCallback === 'function') {
// 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置 // 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置
const callback = this.resultCallback as (text: string, isFinal?: boolean) => void; // 明确指定类型 const callback = this.resultCallback as (text: string, isFinal?: boolean) => void // 明确指定类型
setTimeout(() => { setTimeout(() => {
callback('', false); callback('', false)
}, 100) }, 100)
} }
} }
@ -334,7 +334,7 @@ class ASRService {
// 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置 // 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置
// 传递false表示这不是最终结果只是状态更新 // 传递false表示这不是最终结果只是状态更新
setTimeout(() => { setTimeout(() => {
onTranscribed('', false); onTranscribed('', false)
}, 100) }, 100)
} }
} else { } else {

View File

@ -3,9 +3,10 @@
* *
*/ */
import { TTSService as NewTTSService } from './tts/index'
import { Message } from '@renderer/types' import { Message } from '@renderer/types'
import { TTSService as NewTTSService } from './tts/index'
/** /**
* TTS服务 * TTS服务
* @deprecated 使 src/renderer/src/services/tts/TTSService.ts * @deprecated 使 src/renderer/src/services/tts/TTSService.ts

View File

@ -1,180 +1,180 @@
import store from '@renderer/store'; import { fetchChatCompletion } from '@renderer/services/ApiService'
import { fetchChatCompletion } from '@renderer/services/ApiService'; import ASRService from '@renderer/services/ASRService'
import { getAssistantMessage, getUserMessage } from '@renderer/services/MessagesService'; import { getDefaultAssistant } from '@renderer/services/AssistantService'
import { getDefaultAssistant } from '@renderer/services/AssistantService'; import { getAssistantMessage, getUserMessage } from '@renderer/services/MessagesService'
import TTSService from '@renderer/services/TTSService'; import TTSService from '@renderer/services/TTSService'
import ASRService from '@renderer/services/ASRService'; import store from '@renderer/store'
// 导入类型 // 导入类型
import type { Message } from '@renderer/types'; import type { Message } from '@renderer/types'
interface VoiceCallCallbacks { interface VoiceCallCallbacks {
onTranscript: (text: string) => void; onTranscript: (text: string) => void
onResponse: (text: string) => void; onResponse: (text: string) => void
onListeningStateChange: (isListening: boolean) => void; onListeningStateChange: (isListening: boolean) => void
onSpeakingStateChange: (isSpeaking: boolean) => void; onSpeakingStateChange: (isSpeaking: boolean) => void
} }
// 为TypeScript添加SpeechRecognition类型 // 为TypeScript添加SpeechRecognition类型
declare global { declare global {
interface Window { interface Window {
SpeechRecognition: any; SpeechRecognition: any
webkitSpeechRecognition: any; webkitSpeechRecognition: any
} }
} }
class VoiceCallServiceClass { class VoiceCallServiceClass {
private recognition: any = null; private recognition: any = null
private isCallActive = false; private isCallActive = false
private isRecording = false; // 新增录音状态 private isRecording = false // 新增录音状态
private isMuted = false; private isMuted = false
private isPaused = false; private isPaused = false
private callbacks: VoiceCallCallbacks | null = null; private callbacks: VoiceCallCallbacks | null = null
private _currentTranscript = ''; // 使用下划线前缀避免未使用警告 private _currentTranscript = '' // 使用下划线前缀避免未使用警告
private _accumulatedTranscript = ''; // 累积的语音识别结果 private _accumulatedTranscript = '' // 累积的语音识别结果
private conversationHistory: { role: string; content: string }[] = []; private conversationHistory: { role: string; content: string }[] = []
private isProcessingResponse = false; private isProcessingResponse = false
private ttsService = TTSService; private ttsService = TTSService
private recordingTimeout: NodeJS.Timeout | null = null; // 录音超时定时器 private recordingTimeout: NodeJS.Timeout | null = null // 录音超时定时器
async initialize() { async initialize() {
// 检查麦克风权限 // 检查麦克风权限
try { try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
stream.getTracks().forEach(track => track.stop()); stream.getTracks().forEach((track) => track.stop())
} catch (error) { } catch (error) {
console.error('Microphone permission denied:', error); console.error('Microphone permission denied:', error)
throw new Error('Microphone permission denied'); throw new Error('Microphone permission denied')
} }
// 获取当前ASR服务类型 // 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings; const { asrServiceType } = store.getState().settings
// 如果使用浏览器ASR检查浏览器支持 // 如果使用浏览器ASR检查浏览器支持
if (asrServiceType === 'browser') { if (asrServiceType === 'browser') {
if (!('webkitSpeechRecognition' in window) && !('SpeechRecognition' in window)) { if (!('webkitSpeechRecognition' in window) && !('SpeechRecognition' in window)) {
throw new Error('Speech recognition not supported in this browser'); throw new Error('Speech recognition not supported in this browser')
} }
// 初始化浏览器语音识别 // 初始化浏览器语音识别
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition
this.recognition = new SpeechRecognition(); this.recognition = new SpeechRecognition()
this.recognition.continuous = true; this.recognition.continuous = true
this.recognition.interimResults = true; this.recognition.interimResults = true
this.recognition.lang = navigator.language || 'zh-CN'; this.recognition.lang = navigator.language || 'zh-CN'
} else if (asrServiceType === 'local') { } else if (asrServiceType === 'local') {
// 如果使用本地服务器ASR检查连接 // 如果使用本地服务器ASR检查连接
try { try {
// 尝试连接本地ASR服务器 // 尝试连接本地ASR服务器
const connected = await ASRService.connectToWebSocketServer(); const connected = await ASRService.connectToWebSocketServer()
if (!connected) { if (!connected) {
throw new Error('无法连接到语音识别服务'); throw new Error('无法连接到语音识别服务')
} }
} catch (error) { } catch (error) {
console.error('Failed to connect to ASR server:', error); console.error('Failed to connect to ASR server:', error)
throw new Error('Failed to connect to ASR server'); throw new Error('Failed to connect to ASR server')
} }
} }
return true; return true
} }
async startCall(callbacks: VoiceCallCallbacks) { async startCall(callbacks: VoiceCallCallbacks) {
this.callbacks = callbacks; this.callbacks = callbacks
this.isCallActive = true; this.isCallActive = true
this.conversationHistory = []; this.conversationHistory = []
// 获取当前ASR服务类型 // 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings; const { asrServiceType } = store.getState().settings
// 根据不同的ASR服务类型进行初始化 // 根据不同的ASR服务类型进行初始化
if (asrServiceType === 'browser') { if (asrServiceType === 'browser') {
if (!this.recognition) { if (!this.recognition) {
throw new Error('Browser speech recognition not initialized'); throw new Error('Browser speech recognition not initialized')
} }
// 设置浏览器语音识别事件处理 // 设置浏览器语音识别事件处理
this.recognition.onresult = (event: any) => { this.recognition.onresult = (event: any) => {
let interimTranscript = ''; let interimTranscript = ''
let finalTranscript = ''; let finalTranscript = ''
for (let i = event.resultIndex; i < event.results.length; ++i) { for (let i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) { if (event.results[i].isFinal) {
finalTranscript += event.results[i][0].transcript; finalTranscript += event.results[i][0].transcript
} else { } else {
interimTranscript += event.results[i][0].transcript; interimTranscript += event.results[i][0].transcript
} }
} }
if (interimTranscript) { if (interimTranscript) {
// 更新当前的临时识别结果 // 更新当前的临时识别结果
this._currentTranscript = interimTranscript; this._currentTranscript = interimTranscript
// 显示累积结果 + 当前临时结果 // 显示累积结果 + 当前临时结果
this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + interimTranscript); this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + interimTranscript)
} }
if (finalTranscript) { if (finalTranscript) {
// 将最终结果累积到总结果中 // 将最终结果累积到总结果中
if (this._accumulatedTranscript) { if (this._accumulatedTranscript) {
// 如果已经有累积的文本,添加空格再追加 // 如果已经有累积的文本,添加空格再追加
this._accumulatedTranscript += ' ' + finalTranscript; this._accumulatedTranscript += ' ' + finalTranscript
} else { } else {
// 如果是第一段文本,直接设置 // 如果是第一段文本,直接设置
this._accumulatedTranscript = finalTranscript; this._accumulatedTranscript = finalTranscript
} }
// 更新当前的识别结果 // 更新当前的识别结果
this._currentTranscript = ''; this._currentTranscript = ''
// 显示累积的完整结果 // 显示累积的完整结果
this.callbacks?.onTranscript(this._accumulatedTranscript); this.callbacks?.onTranscript(this._accumulatedTranscript)
// 在录音过程中只更新transcript不触发handleUserSpeech // 在录音过程中只更新transcript不触发handleUserSpeech
// 松开按钮后才会处理完整的录音内容 // 松开按钮后才会处理完整的录音内容
} }
}; }
this.recognition.onstart = () => { this.recognition.onstart = () => {
this.isRecording = true; this.isRecording = true
this.callbacks?.onListeningStateChange(true); this.callbacks?.onListeningStateChange(true)
}; }
this.recognition.onend = () => { this.recognition.onend = () => {
this.isRecording = false; this.isRecording = false
this.callbacks?.onListeningStateChange(false); this.callbacks?.onListeningStateChange(false)
}; }
this.recognition.onerror = (event: any) => { this.recognition.onerror = (event: any) => {
console.error('Speech recognition error', event.error); console.error('Speech recognition error', event.error)
this.isRecording = false; this.isRecording = false
this.callbacks?.onListeningStateChange(false); this.callbacks?.onListeningStateChange(false)
}; }
} }
// 播放欢迎语音 // 播放欢迎语音
const welcomeMessage = '您好我是您的AI助手请长按说话按钮进行对话。'; const welcomeMessage = '您好我是您的AI助手请长按说话按钮进行对话。'
this.callbacks?.onResponse(welcomeMessage); this.callbacks?.onResponse(welcomeMessage)
// 监听TTS状态 // 监听TTS状态
const ttsStateHandler = (isPlaying: boolean) => { const ttsStateHandler = (isPlaying: boolean) => {
this.callbacks?.onSpeakingStateChange(isPlaying); this.callbacks?.onSpeakingStateChange(isPlaying)
}; }
// 监听TTS播放状态 // 监听TTS播放状态
window.addEventListener('tts-state-change', (event: any) => { window.addEventListener('tts-state-change', (event: any) => {
ttsStateHandler(event.detail.isPlaying); ttsStateHandler(event.detail.isPlaying)
}); })
// 播放欢迎语音,并手动设置初始状态 // 播放欢迎语音,并手动设置初始状态
this.callbacks?.onSpeakingStateChange(true); this.callbacks?.onSpeakingStateChange(true)
this.ttsService.speak(welcomeMessage); this.ttsService.speak(welcomeMessage)
// 确保欢迎语音结束后状态正确 // 确保欢迎语音结束后状态正确
setTimeout(() => { setTimeout(() => {
if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) { if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
this.callbacks?.onSpeakingStateChange(false); this.callbacks?.onSpeakingStateChange(false)
} }
}, 5000); // 5秒后检查TTS状态 }, 5000) // 5秒后检查TTS状态
return true; return true
} }
/** /**
@ -183,25 +183,24 @@ class VoiceCallServiceClass {
*/ */
async startRecording(): Promise<boolean> { async startRecording(): Promise<boolean> {
if (!this.isCallActive || this.isPaused || this.isProcessingResponse || this.isRecording) { if (!this.isCallActive || this.isPaused || this.isProcessingResponse || this.isRecording) {
return false; return false
} }
// 重置累积的文本 // 重置累积的文本
this._accumulatedTranscript = ''; this._accumulatedTranscript = ''
// 获取当前ASR服务类型 // 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings; const { asrServiceType } = store.getState().settings
try { try {
if (asrServiceType === 'browser') { if (asrServiceType === 'browser') {
// 浏览器ASR // 浏览器ASR
if (!this.recognition) { if (!this.recognition) {
throw new Error('Browser speech recognition not initialized'); throw new Error('Browser speech recognition not initialized')
} }
this.recognition.start(); this.recognition.start()
this.isRecording = true; this.isRecording = true
} else if (asrServiceType === 'local') { } else if (asrServiceType === 'local') {
// 本地服务器ASR // 本地服务器ASR
await ASRService.startRecording((text, isFinal) => { await ASRService.startRecording((text, isFinal) => {
@ -210,51 +209,50 @@ class VoiceCallServiceClass {
// 如果是最终结果,累积到总结果中 // 如果是最终结果,累积到总结果中
if (this._accumulatedTranscript) { if (this._accumulatedTranscript) {
// 如果已经有累积的文本,添加空格再追加 // 如果已经有累积的文本,添加空格再追加
this._accumulatedTranscript += ' ' + text; this._accumulatedTranscript += ' ' + text
} else { } else {
// 如果是第一段文本,直接设置 // 如果是第一段文本,直接设置
this._accumulatedTranscript = text; this._accumulatedTranscript = text
} }
// 更新当前的识别结果 // 更新当前的识别结果
this._currentTranscript = ''; this._currentTranscript = ''
// 显示累积的完整结果 // 显示累积的完整结果
this.callbacks?.onTranscript(this._accumulatedTranscript); this.callbacks?.onTranscript(this._accumulatedTranscript)
} else { } else {
// 如果是临时结果,更新当前的识别结果 // 如果是临时结果,更新当前的识别结果
this._currentTranscript = text; this._currentTranscript = text
// 显示累积结果 + 当前临时结果 // 显示累积结果 + 当前临时结果
this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + text); this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + text)
} }
// 在录音过程中只更新transcript不触发handleUserSpeech // 在录音过程中只更新transcript不触发handleUserSpeech
// 松开按钮后才会处理完整的录音内容 // 松开按钮后才会处理完整的录音内容
} }
}); })
this.isRecording = true;
this.callbacks?.onListeningStateChange(true);
this.isRecording = true
this.callbacks?.onListeningStateChange(true)
} else if (asrServiceType === 'openai') { } else if (asrServiceType === 'openai') {
// OpenAI ASR // OpenAI ASR
await ASRService.startRecording(); await ASRService.startRecording()
this.isRecording = true; this.isRecording = true
this.callbacks?.onListeningStateChange(true); this.callbacks?.onListeningStateChange(true)
} }
// 设置最长录音时间,防止用户忘记松开 // 设置最长录音时间,防止用户忘记松开
this.recordingTimeout = setTimeout(() => { this.recordingTimeout = setTimeout(() => {
if (this.isRecording) { if (this.isRecording) {
this.stopRecording(); this.stopRecording()
} }
}, 60000); // 60秒最长录音时间 }, 60000) // 60秒最长录音时间
return true; return true
} catch (error) { } catch (error) {
console.error('Failed to start recording:', error); console.error('Failed to start recording:', error)
this.isRecording = false; this.isRecording = false
this.callbacks?.onListeningStateChange(false); this.callbacks?.onListeningStateChange(false)
return false; return false
} }
} }
@ -264,143 +262,141 @@ class VoiceCallServiceClass {
*/ */
async stopRecording(): Promise<boolean> { async stopRecording(): Promise<boolean> {
if (!this.isCallActive || !this.isRecording) { if (!this.isCallActive || !this.isRecording) {
return false; return false
} }
// 清除录音超时定时器 // 清除录音超时定时器
if (this.recordingTimeout) { if (this.recordingTimeout) {
clearTimeout(this.recordingTimeout); clearTimeout(this.recordingTimeout)
this.recordingTimeout = null; this.recordingTimeout = null
} }
// 获取当前ASR服务类型 // 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings; const { asrServiceType } = store.getState().settings
try { try {
// 存储当前的语音识别结果用于松开按钮后发送给AI // 存储当前的语音识别结果用于松开按钮后发送给AI
const currentTranscript = this._currentTranscript; const currentTranscript = this._currentTranscript
// 存储累积的语音识别结果 // 存储累积的语音识别结果
const accumulatedTranscript = this._accumulatedTranscript; const accumulatedTranscript = this._accumulatedTranscript
if (asrServiceType === 'browser') { if (asrServiceType === 'browser') {
// 浏览器ASR // 浏览器ASR
if (!this.recognition) { if (!this.recognition) {
throw new Error('Browser speech recognition not initialized'); throw new Error('Browser speech recognition not initialized')
} }
this.recognition.stop(); this.recognition.stop()
// onend事件将设置isRecording = false // onend事件将设置isRecording = false
this.isRecording = false; this.isRecording = false
this.callbacks?.onListeningStateChange(false); this.callbacks?.onListeningStateChange(false)
// 优先使用累积的文本,如果有的话 // 优先使用累积的文本,如果有的话
if (accumulatedTranscript) { if (accumulatedTranscript) {
console.log('发送累积的语音识别结果给AI:', accumulatedTranscript); console.log('发送累积的语音识别结果给AI:', accumulatedTranscript)
this.handleUserSpeech(accumulatedTranscript); this.handleUserSpeech(accumulatedTranscript)
} else if (currentTranscript) { } else if (currentTranscript) {
// 如果没有累积结果,使用当前结果 // 如果没有累积结果,使用当前结果
console.log('没有累积结果,使用当前结果:', currentTranscript); console.log('没有累积结果,使用当前结果:', currentTranscript)
this.handleUserSpeech(currentTranscript); this.handleUserSpeech(currentTranscript)
} }
} else if (asrServiceType === 'local') { } else if (asrServiceType === 'local') {
// 本地服务器ASR // 本地服务器ASR
// 创建一个承诺,等待最终结果 // 创建一个承诺,等待最终结果
const finalResultPromise = new Promise<string>((resolve) => { const finalResultPromise = new Promise<string>((resolve) => {
// 设置一个超时器,确保不会无限等待 // 设置一个超时器,确保不会无限等待
const timeoutId = setTimeout(() => { const timeoutId = setTimeout(() => {
console.log('等待最终结果超时,使用当前结果'); console.log('等待最终结果超时,使用当前结果')
resolve(this._currentTranscript); resolve(this._currentTranscript)
}, 1500); // 1.5秒超时 }, 1500) // 1.5秒超时
// 设置回调函数来接收最终结果 // 设置回调函数来接收最终结果
const resultCallback = (text: string) => { const resultCallback = (text: string) => {
// 如果是空字符串,表示只是重置状态,不处理 // 如果是空字符串,表示只是重置状态,不处理
if (text === '') return; if (text === '') return
if (text) { if (text) {
clearTimeout(timeoutId); clearTimeout(timeoutId)
console.log('收到最终语音识别结果:', text); console.log('收到最终语音识别结果:', text)
this._currentTranscript = text; this._currentTranscript = text
this.callbacks?.onTranscript(text); this.callbacks?.onTranscript(text)
resolve(text); resolve(text)
} }
}; }
// 停止录音,但不取消,以获取最终结果 // 停止录音,但不取消,以获取最终结果
ASRService.stopRecording(resultCallback); ASRService.stopRecording(resultCallback)
this.isRecording = false; this.isRecording = false
this.callbacks?.onListeningStateChange(false); this.callbacks?.onListeningStateChange(false)
// 添加额外的安全措施,在停止后立即发送重置命令 // 添加额外的安全措施,在停止后立即发送重置命令
setTimeout(() => { setTimeout(() => {
// 发送重置命令,确保浏览器不会继续发送结果 // 发送重置命令,确保浏览器不会继续发送结果
ASRService.cancelRecording(); ASRService.cancelRecording()
}, 2000); // 2秒后强制取消作为安全措施 }, 2000) // 2秒后强制取消作为安全措施
}); })
// 等待最终结果 // 等待最终结果
const finalText = await finalResultPromise; const finalText = await finalResultPromise
// 优先使用累积的文本,如果有的话 // 优先使用累积的文本,如果有的话
if (accumulatedTranscript) { if (accumulatedTranscript) {
console.log('发送累积的语音识别结果给AI:', accumulatedTranscript); console.log('发送累积的语音识别结果给AI:', accumulatedTranscript)
this.handleUserSpeech(accumulatedTranscript); this.handleUserSpeech(accumulatedTranscript)
} else if (finalText) { } else if (finalText) {
// 如果没有累积结果,使用最终结果 // 如果没有累积结果,使用最终结果
console.log('发送最终语音识别结果给AI:', finalText); console.log('发送最终语音识别结果给AI:', finalText)
this.handleUserSpeech(finalText); this.handleUserSpeech(finalText)
} else if (currentTranscript) { } else if (currentTranscript) {
// 如果没有最终结果,使用当前结果 // 如果没有最终结果,使用当前结果
console.log('没有最终结果,使用当前结果:', currentTranscript); console.log('没有最终结果,使用当前结果:', currentTranscript)
this.handleUserSpeech(currentTranscript); this.handleUserSpeech(currentTranscript)
} }
} else if (asrServiceType === 'openai') { } else if (asrServiceType === 'openai') {
// OpenAI ASR // OpenAI ASR
await ASRService.stopRecording((text) => { await ASRService.stopRecording((text) => {
// 更新最终的语音识别结果 // 更新最终的语音识别结果
if (text) { if (text) {
this._currentTranscript = text; this._currentTranscript = text
this.callbacks?.onTranscript(text); this.callbacks?.onTranscript(text)
} }
}); })
this.isRecording = false; this.isRecording = false
this.callbacks?.onListeningStateChange(false); this.callbacks?.onListeningStateChange(false)
// 使用最新的语音识别结果 // 使用最新的语音识别结果
const finalTranscript = this._currentTranscript; const finalTranscript = this._currentTranscript
if (finalTranscript) { if (finalTranscript) {
this.handleUserSpeech(finalTranscript); this.handleUserSpeech(finalTranscript)
} }
} }
return true; return true
} catch (error) { } catch (error) {
console.error('Failed to stop recording:', error); console.error('Failed to stop recording:', error)
this.isRecording = false; this.isRecording = false
this.callbacks?.onListeningStateChange(false); this.callbacks?.onListeningStateChange(false)
return false; return false
} }
} }
async handleUserSpeech(text: string) { async handleUserSpeech(text: string) {
if (!this.isCallActive || this.isProcessingResponse || this.isPaused) return; if (!this.isCallActive || this.isProcessingResponse || this.isPaused) return
// 暂停语音识别避免在AI回复时继续识别 // 暂停语音识别避免在AI回复时继续识别
const { asrServiceType } = store.getState().settings; const { asrServiceType } = store.getState().settings
if (asrServiceType === 'browser') { if (asrServiceType === 'browser') {
this.recognition?.stop(); this.recognition?.stop()
} else if (asrServiceType === 'local' || asrServiceType === 'openai') { } else if (asrServiceType === 'local' || asrServiceType === 'openai') {
ASRService.cancelRecording(); ASRService.cancelRecording()
} }
this.isProcessingResponse = true; this.isProcessingResponse = true
try { try {
// 获取当前助手 // 获取当前助手
const assistant = getDefaultAssistant(); const assistant = getDefaultAssistant()
// 创建一个简单的Topic对象 // 创建一个简单的Topic对象
const topic = { const topic = {
@ -410,7 +406,7 @@ class VoiceCallServiceClass {
createdAt: new Date().toISOString(), createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
messages: [] messages: []
}; }
// 创建用户消息 // 创建用户消息
const userMessage = getUserMessage({ const userMessage = getUserMessage({
@ -418,35 +414,35 @@ class VoiceCallServiceClass {
topic, topic,
type: 'text', type: 'text',
content: text content: text
}); })
// 创建助手消息 // 创建助手消息
const assistantMessage = getAssistantMessage({ const assistantMessage = getAssistantMessage({
assistant, assistant,
topic topic
}); })
// 更新对话历史 // 更新对话历史
this.conversationHistory.push({ role: 'user', content: text }); this.conversationHistory.push({ role: 'user', content: text })
// 构建消息列表 // 构建消息列表
// 将历史消息转换为正确的Message对象 // 将历史消息转换为正确的Message对象
const historyMessages = this.conversationHistory.map(msg => { const historyMessages = this.conversationHistory.map((msg) => {
if (msg.role === 'user') { if (msg.role === 'user') {
return getUserMessage({ return getUserMessage({
assistant, assistant,
topic, topic,
type: 'text', type: 'text',
content: msg.content content: msg.content
}); })
} else { } else {
const assistantMsg = getAssistantMessage({ const assistantMsg = getAssistantMessage({
assistant, assistant,
topic topic
}); })
return { ...assistantMsg, content: msg.content, status: 'success' }; return { ...assistantMsg, content: msg.content, status: 'success' }
} }
}); })
// 修改用户消息,添加语音通话提示 // 修改用户消息,添加语音通话提示
const voiceCallPrompt = `当前是语音通话模式。请注意: const voiceCallPrompt = `当前是语音通话模式。请注意:
@ -457,7 +453,7 @@ class VoiceCallServiceClass {
5. 便 5. 便
6. 使 6. 使
7. 使 7. 使
8. 使`; 8. 使`
// 创建系统指令消息 // 创建系统指令消息
const systemMessage = getUserMessage({ const systemMessage = getUserMessage({
@ -465,17 +461,17 @@ class VoiceCallServiceClass {
topic, topic,
type: 'text', type: 'text',
content: voiceCallPrompt content: voiceCallPrompt
}); })
// 修改用户消息的内容 // 修改用户消息的内容
userMessage.content = text; userMessage.content = text
// 构建最终消息列表 // 构建最终消息列表
// 使用类型断言解决类型问题 // 使用类型断言解决类型问题
const messages = [systemMessage, ...historyMessages, userMessage] as Message[]; const messages = [systemMessage, ...historyMessages, userMessage] as Message[]
// 流式响应处理 // 流式响应处理
let fullResponse = ''; let fullResponse = ''
try { try {
// 调用真实的LLM API // 调用真实的LLM API
@ -485,60 +481,58 @@ class VoiceCallServiceClass {
assistant, assistant,
onResponse: async (msg) => { onResponse: async (msg) => {
if (msg.content && msg.content !== fullResponse) { if (msg.content && msg.content !== fullResponse) {
fullResponse = msg.content; fullResponse = msg.content
// 更新UI // 更新UI
this.callbacks?.onResponse(fullResponse); this.callbacks?.onResponse(fullResponse)
// 如果TTS正在播放停止它 // 如果TTS正在播放停止它
if (this.ttsService.isCurrentlyPlaying()) { if (this.ttsService.isCurrentlyPlaying()) {
this.ttsService.stop(); this.ttsService.stop()
} }
} }
} }
}); })
// 播放完整响应 // 播放完整响应
if (!this.isMuted && this.isCallActive) { if (!this.isMuted && this.isCallActive) {
// 手动设置语音状态 // 手动设置语音状态
this.callbacks?.onSpeakingStateChange(true); this.callbacks?.onSpeakingStateChange(true)
this.ttsService.speak(fullResponse); this.ttsService.speak(fullResponse)
// 确保语音结束后状态正确 // 确保语音结束后状态正确
setTimeout(() => { setTimeout(() => {
if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) { if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
this.callbacks?.onSpeakingStateChange(false); this.callbacks?.onSpeakingStateChange(false)
} }
}, 1000); // 1秒后检查TTS状态 }, 1000) // 1秒后检查TTS状态
} }
// 更新对话历史 // 更新对话历史
this.conversationHistory.push({ role: 'assistant', content: fullResponse }); this.conversationHistory.push({ role: 'assistant', content: fullResponse })
} catch (innerError) { } catch (innerError) {
console.error('Error generating response:', innerError); console.error('Error generating response:', innerError)
// 如果出错,使用一个简单的回复 // 如果出错,使用一个简单的回复
fullResponse = `抱歉,处理您的请求时出错了。`; fullResponse = `抱歉,处理您的请求时出错了。`
this.callbacks?.onResponse(fullResponse); this.callbacks?.onResponse(fullResponse)
if (!this.isMuted && this.isCallActive) { if (!this.isMuted && this.isCallActive) {
// 手动设置语音状态 // 手动设置语音状态
this.callbacks?.onSpeakingStateChange(true); this.callbacks?.onSpeakingStateChange(true)
this.ttsService.speak(fullResponse); this.ttsService.speak(fullResponse)
// 确保语音结束后状态正确 // 确保语音结束后状态正确
setTimeout(() => { setTimeout(() => {
if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) { if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
this.callbacks?.onSpeakingStateChange(false); this.callbacks?.onSpeakingStateChange(false)
} }
}, 1000); // 1秒后检查TTS状态 }, 1000) // 1秒后检查TTS状态
} }
} }
} catch (error) { } catch (error) {
console.error('Error processing voice response:', error); console.error('Error processing voice response:', error)
} finally { } finally {
this.isProcessingResponse = false; this.isProcessingResponse = false
// 不自动恢复语音识别,等待用户长按按钮 // 不自动恢复语音识别,等待用户长按按钮
// 长按说话模式下,我们不需要自动恢复语音识别 // 长按说话模式下,我们不需要自动恢复语音识别
@ -551,106 +545,104 @@ class VoiceCallServiceClass {
*/ */
async cancelRecording(): Promise<boolean> { async cancelRecording(): Promise<boolean> {
if (!this.isCallActive || !this.isRecording) { if (!this.isCallActive || !this.isRecording) {
return false; return false
} }
// 清除录音超时定时器 // 清除录音超时定时器
if (this.recordingTimeout) { if (this.recordingTimeout) {
clearTimeout(this.recordingTimeout); clearTimeout(this.recordingTimeout)
this.recordingTimeout = null; this.recordingTimeout = null
} }
// 获取当前ASR服务类型 // 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings; const { asrServiceType } = store.getState().settings
try { try {
if (asrServiceType === 'browser') { if (asrServiceType === 'browser') {
// 浏览器ASR // 浏览器ASR
if (!this.recognition) { if (!this.recognition) {
throw new Error('Browser speech recognition not initialized'); throw new Error('Browser speech recognition not initialized')
} }
this.recognition.stop(); this.recognition.stop()
this.isRecording = false; this.isRecording = false
this.callbacks?.onListeningStateChange(false); this.callbacks?.onListeningStateChange(false)
} else if (asrServiceType === 'local') { } else if (asrServiceType === 'local') {
// 本地服务器ASR // 本地服务器ASR
ASRService.cancelRecording(); ASRService.cancelRecording()
this.isRecording = false; this.isRecording = false
this.callbacks?.onListeningStateChange(false); this.callbacks?.onListeningStateChange(false)
} else if (asrServiceType === 'openai') { } else if (asrServiceType === 'openai') {
// OpenAI ASR // OpenAI ASR
ASRService.cancelRecording(); ASRService.cancelRecording()
this.isRecording = false; this.isRecording = false
this.callbacks?.onListeningStateChange(false); this.callbacks?.onListeningStateChange(false)
} }
// 清除当前识别结果 // 清除当前识别结果
this._currentTranscript = ''; this._currentTranscript = ''
this.callbacks?.onTranscript(''); this.callbacks?.onTranscript('')
return true; return true
} catch (error) { } catch (error) {
console.error('Failed to cancel recording:', error); console.error('Failed to cancel recording:', error)
this.isRecording = false; this.isRecording = false
this.callbacks?.onListeningStateChange(false); this.callbacks?.onListeningStateChange(false)
return false; return false
} }
} }
setMuted(muted: boolean) { setMuted(muted: boolean) {
this.isMuted = muted; this.isMuted = muted
// 如果设置为静音停止当前TTS播放 // 如果设置为静音停止当前TTS播放
if (muted && this.ttsService.isCurrentlyPlaying()) { if (muted && this.ttsService.isCurrentlyPlaying()) {
this.ttsService.stop(); this.ttsService.stop()
} }
} }
setPaused(paused: boolean) { setPaused(paused: boolean) {
this.isPaused = paused; this.isPaused = paused
// 获取当前ASR服务类型 // 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings; const { asrServiceType } = store.getState().settings
if (paused) { if (paused) {
// 暂停语音识别 // 暂停语音识别
if (asrServiceType === 'browser') { if (asrServiceType === 'browser') {
this.recognition?.stop(); this.recognition?.stop()
} else if (asrServiceType === 'local' || asrServiceType === 'openai') { } else if (asrServiceType === 'local' || asrServiceType === 'openai') {
ASRService.cancelRecording(); ASRService.cancelRecording()
} }
// 暂停TTS // 暂停TTS
if (this.ttsService.isCurrentlyPlaying()) { if (this.ttsService.isCurrentlyPlaying()) {
this.ttsService.stop(); this.ttsService.stop()
} }
} }
// 不自动恢复语音识别,等待用户长按按钮 // 不自动恢复语音识别,等待用户长按按钮
} }
endCall() { endCall() {
this.isCallActive = false; this.isCallActive = false
// 获取当前ASR服务类型 // 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings; const { asrServiceType } = store.getState().settings
// 停止语音识别 // 停止语音识别
if (asrServiceType === 'browser') { if (asrServiceType === 'browser') {
this.recognition?.stop(); this.recognition?.stop()
} else if (asrServiceType === 'local' || asrServiceType === 'openai') { } else if (asrServiceType === 'local' || asrServiceType === 'openai') {
ASRService.cancelRecording(); ASRService.cancelRecording()
} }
// 停止TTS // 停止TTS
if (this.ttsService.isCurrentlyPlaying()) { if (this.ttsService.isCurrentlyPlaying()) {
this.ttsService.stop(); this.ttsService.stop()
} }
this.callbacks = null; this.callbacks = null
} }
} }
export const VoiceCallService = new VoiceCallServiceClass(); export const VoiceCallService = new VoiceCallServiceClass()

View File

@ -1,22 +1,23 @@
import { TTSServiceInterface } from './TTSServiceInterface'; import i18n from '@renderer/i18n'
import i18n from '@renderer/i18n';
import { TTSServiceInterface } from './TTSServiceInterface'
// 全局变量来跟踪当前正在播放的语音 // 全局变量来跟踪当前正在播放的语音
let currentUtterance: SpeechSynthesisUtterance | null = null; let currentUtterance: SpeechSynthesisUtterance | null = null
/** /**
* Edge TTS服务实现类 * Edge TTS服务实现类
*/ */
export class EdgeTTSService implements TTSServiceInterface { export class EdgeTTSService implements TTSServiceInterface {
private edgeVoice: string; private edgeVoice: string
/** /**
* *
* @param edgeVoice Edge语音 * @param edgeVoice Edge语音
*/ */
constructor(edgeVoice: string) { constructor(edgeVoice: string) {
this.edgeVoice = edgeVoice; this.edgeVoice = edgeVoice
console.log('初始化EdgeTTSService语音:', edgeVoice); console.log('初始化EdgeTTSService语音:', edgeVoice)
} }
/** /**
@ -25,7 +26,7 @@ export class EdgeTTSService implements TTSServiceInterface {
*/ */
private validateParams(): void { private validateParams(): void {
if (!this.edgeVoice) { if (!this.edgeVoice) {
throw new Error(i18n.t('settings.tts.error.no_edge_voice')); throw new Error(i18n.t('settings.tts.error.no_edge_voice'))
} }
} }
@ -37,79 +38,79 @@ export class EdgeTTSService implements TTSServiceInterface {
private playDirectly(text: string): boolean { private playDirectly(text: string): boolean {
try { try {
// 验证参数 // 验证参数
this.validateParams(); this.validateParams()
// 使用Web Speech API // 使用Web Speech API
if (!('speechSynthesis' in window)) { if (!('speechSynthesis' in window)) {
throw new Error(i18n.t('settings.tts.error.browser_not_support')); throw new Error(i18n.t('settings.tts.error.browser_not_support'))
} }
// 停止当前正在播放的语音 // 停止当前正在播放的语音
window.speechSynthesis.cancel(); window.speechSynthesis.cancel()
if (currentUtterance) { if (currentUtterance) {
currentUtterance = null; currentUtterance = null
} }
// 创建语音合成器实例 // 创建语音合成器实例
const utterance = new SpeechSynthesisUtterance(text); const utterance = new SpeechSynthesisUtterance(text)
currentUtterance = utterance; currentUtterance = utterance
// 获取可用的语音合成声音 // 获取可用的语音合成声音
const voices = window.speechSynthesis.getVoices(); const voices = window.speechSynthesis.getVoices()
console.log('可用的语音合成声音:', voices); console.log('可用的语音合成声音:', voices)
// 查找指定的语音 // 查找指定的语音
let selectedVoice = voices.find((v) => v.name === this.edgeVoice); let selectedVoice = voices.find((v) => v.name === this.edgeVoice)
// 如果没有找到指定的语音,尝试使用中文语音 // 如果没有找到指定的语音,尝试使用中文语音
if (!selectedVoice) { if (!selectedVoice) {
console.warn('未找到指定的语音:', this.edgeVoice); console.warn('未找到指定的语音:', this.edgeVoice)
// 尝试找中文语音 // 尝试找中文语音
selectedVoice = voices.find((v) => v.lang === 'zh-CN'); selectedVoice = voices.find((v) => v.lang === 'zh-CN')
if (selectedVoice) { if (selectedVoice) {
console.log('使用替代中文语音:', selectedVoice.name); console.log('使用替代中文语音:', selectedVoice.name)
} else { } else {
// 如果没有中文语音,使用第一个可用的语音 // 如果没有中文语音,使用第一个可用的语音
if (voices.length > 0) { if (voices.length > 0) {
selectedVoice = voices[0]; selectedVoice = voices[0]
console.log('使用第一个可用的语音:', selectedVoice.name); console.log('使用第一个可用的语音:', selectedVoice.name)
} else { } else {
console.warn('没有可用的语音'); console.warn('没有可用的语音')
return false; return false
} }
} }
} else { } else {
console.log('已选择语音:', selectedVoice.name); console.log('已选择语音:', selectedVoice.name)
} }
// 设置语音 // 设置语音
if (selectedVoice) { if (selectedVoice) {
utterance.voice = selectedVoice; utterance.voice = selectedVoice
} }
// 设置事件处理程序 // 设置事件处理程序
utterance.onend = () => { utterance.onend = () => {
console.log('语音合成已结束'); console.log('语音合成已结束')
currentUtterance = null; currentUtterance = null
// 分发一个自定义事件,通知语音合成已结束 // 分发一个自定义事件,通知语音合成已结束
// 这样TTSService可以监听这个事件并重置播放状态 // 这样TTSService可以监听这个事件并重置播放状态
const event = new CustomEvent('edgeTTSComplete', { detail: { text } }); const event = new CustomEvent('edgeTTSComplete', { detail: { text } })
document.dispatchEvent(event); document.dispatchEvent(event)
}; }
utterance.onerror = (event) => { utterance.onerror = (event) => {
console.error('语音合成错误:', event); console.error('语音合成错误:', event)
currentUtterance = null; currentUtterance = null
}; }
// 开始语音合成 // 开始语音合成
window.speechSynthesis.speak(utterance); window.speechSynthesis.speak(utterance)
return true; return true
} catch (error) { } catch (error) {
console.error('直接播放语音失败:', error); console.error('直接播放语音失败:', error)
return false; return false
} }
} }
@ -120,151 +121,151 @@ export class EdgeTTSService implements TTSServiceInterface {
*/ */
async synthesize(text: string): Promise<Blob> { async synthesize(text: string): Promise<Blob> {
// 验证参数 // 验证参数
this.validateParams(); this.validateParams()
// 先尝试直接播放 // 先尝试直接播放
const playResult = this.playDirectly(text); const playResult = this.playDirectly(text)
if (playResult) { if (playResult) {
// 如果直接播放成功返回一个有效的音频Blob // 如果直接播放成功返回一个有效的音频Blob
// 创建一个简单的音频文件,包含一个短暂停 // 创建一个简单的音频文件,包含一个短暂停
// 这个文件可以被浏览器正常播放,但实际上不会发出声音 // 这个文件可以被浏览器正常播放,但实际上不会发出声音
// 因为我们已经使用Web Speech API直接播放了语音 // 因为我们已经使用Web Speech API直接播放了语音
const silentAudioBase64 = 'UklGRiQAAABXQVZFZm10IBAAAAABAAEARKwAAIhYAQACABAAZGF0YQAAAAA='; const silentAudioBase64 = 'UklGRiQAAABXQVZFZm10IBAAAAABAAEARKwAAIhYAQACABAAZGF0YQAAAAA='
const silentAudioBuffer = Uint8Array.from(atob(silentAudioBase64), c => c.charCodeAt(0)); const silentAudioBuffer = Uint8Array.from(atob(silentAudioBase64), (c) => c.charCodeAt(0))
return new Blob([silentAudioBuffer], { type: 'audio/wav' }); return new Blob([silentAudioBuffer], { type: 'audio/wav' })
} }
// 如果直接播放失败,尝试录制方法 // 如果直接播放失败,尝试录制方法
console.log('直接播放失败,尝试录制方法'); console.log('直接播放失败,尝试录制方法')
try { try {
console.log('使用浏览器TTS生成语音音色:', this.edgeVoice); console.log('使用浏览器TTS生成语音音色:', this.edgeVoice)
// 使用Web Speech API // 使用Web Speech API
if (!('speechSynthesis' in window)) { if (!('speechSynthesis' in window)) {
throw new Error(i18n.t('settings.tts.error.browser_not_support')); throw new Error(i18n.t('settings.tts.error.browser_not_support'))
} }
// 停止当前正在播放的语音 // 停止当前正在播放的语音
window.speechSynthesis.cancel(); window.speechSynthesis.cancel()
// 创建语音合成器实例 // 创建语音合成器实例
const utterance = new SpeechSynthesisUtterance(text); const utterance = new SpeechSynthesisUtterance(text)
// 获取可用的语音合成声音 // 获取可用的语音合成声音
const voices = window.speechSynthesis.getVoices(); const voices = window.speechSynthesis.getVoices()
console.log('初始可用的语音合成声音:', voices); console.log('初始可用的语音合成声音:', voices)
// 如果没有可用的声音,等待声音加载 // 如果没有可用的声音,等待声音加载
if (voices.length === 0) { if (voices.length === 0) {
try { try {
await new Promise<void>((resolve) => { await new Promise<void>((resolve) => {
const voicesChangedHandler = () => { const voicesChangedHandler = () => {
window.speechSynthesis.onvoiceschanged = null; window.speechSynthesis.onvoiceschanged = null
resolve(); resolve()
}; }
window.speechSynthesis.onvoiceschanged = voicesChangedHandler; window.speechSynthesis.onvoiceschanged = voicesChangedHandler
// 设置超时,防止无限等待 // 设置超时,防止无限等待
setTimeout(() => { setTimeout(() => {
window.speechSynthesis.onvoiceschanged = null; window.speechSynthesis.onvoiceschanged = null
resolve(); resolve()
}, 5000); }, 5000)
}); })
} catch (error) { } catch (error) {
console.error('等待语音加载超时:', error); console.error('等待语音加载超时:', error)
} }
} }
// 重新获取可用的语音合成声音 // 重新获取可用的语音合成声音
const updatedVoices = window.speechSynthesis.getVoices(); const updatedVoices = window.speechSynthesis.getVoices()
console.log('更新后可用的语音合成声音:', updatedVoices); console.log('更新后可用的语音合成声音:', updatedVoices)
// 查找指定的语音 // 查找指定的语音
let selectedVoice = updatedVoices.find((v) => v.name === this.edgeVoice); let selectedVoice = updatedVoices.find((v) => v.name === this.edgeVoice)
// 如果没有找到指定的语音,尝试使用中文语音 // 如果没有找到指定的语音,尝试使用中文语音
if (!selectedVoice) { if (!selectedVoice) {
console.warn('未找到指定的语音:', this.edgeVoice); console.warn('未找到指定的语音:', this.edgeVoice)
// 尝试找中文语音 // 尝试找中文语音
selectedVoice = updatedVoices.find((v) => v.lang === 'zh-CN'); selectedVoice = updatedVoices.find((v) => v.lang === 'zh-CN')
if (selectedVoice) { if (selectedVoice) {
console.log('使用替代中文语音:', selectedVoice.name); console.log('使用替代中文语音:', selectedVoice.name)
} else { } else {
// 如果没有中文语音,使用第一个可用的语音 // 如果没有中文语音,使用第一个可用的语音
if (updatedVoices.length > 0) { if (updatedVoices.length > 0) {
selectedVoice = updatedVoices[0]; selectedVoice = updatedVoices[0]
console.log('使用第一个可用的语音:', selectedVoice.name); console.log('使用第一个可用的语音:', selectedVoice.name)
} else { } else {
console.warn('没有可用的语音'); console.warn('没有可用的语音')
} }
} }
} else { } else {
console.log('已选择语音:', selectedVoice.name); console.log('已选择语音:', selectedVoice.name)
} }
// 设置语音 // 设置语音
if (selectedVoice) { if (selectedVoice) {
utterance.voice = selectedVoice; utterance.voice = selectedVoice
} }
// 创建一个Promise来等待语音合成完成 // 创建一个Promise来等待语音合成完成
return await new Promise<Blob>((resolve, reject) => { return await new Promise<Blob>((resolve, reject) => {
try { try {
// 使用AudioContext捕获语音合成的音频 // 使用AudioContext捕获语音合成的音频
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)(); const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)()
const audioDestination = audioContext.createMediaStreamDestination(); const audioDestination = audioContext.createMediaStreamDestination()
const mediaRecorder = new MediaRecorder(audioDestination.stream); const mediaRecorder = new MediaRecorder(audioDestination.stream)
const audioChunks: BlobPart[] = []; const audioChunks: BlobPart[] = []
mediaRecorder.ondataavailable = (event) => { mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) { if (event.data.size > 0) {
audioChunks.push(event.data); audioChunks.push(event.data)
} }
}; }
mediaRecorder.onstop = () => { mediaRecorder.onstop = () => {
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); const audioBlob = new Blob(audioChunks, { type: 'audio/wav' })
resolve(audioBlob); resolve(audioBlob)
}; }
// 开始录制 // 开始录制
mediaRecorder.start(); mediaRecorder.start()
// 设置语音合成事件 // 设置语音合成事件
utterance.onend = () => { utterance.onend = () => {
// 语音合成结束后停止录制 // 语音合成结束后停止录制
setTimeout(() => { setTimeout(() => {
mediaRecorder.stop(); mediaRecorder.stop()
}, 500); // 等待一下,确保所有音频都被捕获 }, 500) // 等待一下,确保所有音频都被捕获
}; }
utterance.onerror = (event) => { utterance.onerror = (event) => {
console.error('语音合成错误:', event); console.error('语音合成错误:', event)
mediaRecorder.stop(); mediaRecorder.stop()
reject(new Error('语音合成错误')); reject(new Error('语音合成错误'))
}; }
// 开始语音合成 // 开始语音合成
window.speechSynthesis.speak(utterance); window.speechSynthesis.speak(utterance)
// 设置超时,防止无限等待 // 设置超时,防止无限等待
setTimeout(() => { setTimeout(() => {
if (mediaRecorder.state === 'recording') { if (mediaRecorder.state === 'recording') {
console.warn('语音合成超时,强制停止'); console.warn('语音合成超时,强制停止')
mediaRecorder.stop(); mediaRecorder.stop()
} }
}, 10000); // 10秒超时 }, 10000) // 10秒超时
} catch (error: any) { } catch (error: any) {
console.error('浏览器TTS语音合成失败:', error); console.error('浏览器TTS语音合成失败:', error)
reject(new Error(`浏览器TTS语音合成失败: ${error?.message || '未知错误'}`)); reject(new Error(`浏览器TTS语音合成失败: ${error?.message || '未知错误'}`))
} }
}); })
} catch (error: any) { } catch (error: any) {
console.error('浏览器TTS语音合成失败:', error); console.error('浏览器TTS语音合成失败:', error)
// 即使失败也返回一个空的Blob而不是抛出异常 // 即使失败也返回一个空的Blob而不是抛出异常
// 这样可以避免在UI上显示错误消息 // 这样可以避免在UI上显示错误消息
return new Blob([], { type: 'audio/wav' }); return new Blob([], { type: 'audio/wav' })
} }
} }
} }

View File

@ -1,13 +1,14 @@
import { TTSServiceInterface } from './TTSServiceInterface'; import i18n from '@renderer/i18n'
import i18n from '@renderer/i18n';
import { TTSServiceInterface } from './TTSServiceInterface'
/** /**
* 线TTS服务实现类 * 线TTS服务实现类
* 使线TTS服务API密钥 * 使线TTS服务API密钥
*/ */
export class MsTTSService implements TTSServiceInterface { export class MsTTSService implements TTSServiceInterface {
private voice: string; private voice: string
private outputFormat: string; private outputFormat: string
/** /**
* *
@ -15,9 +16,9 @@ export class MsTTSService implements TTSServiceInterface {
* @param outputFormat * @param outputFormat
*/ */
constructor(voice: string, outputFormat: string) { constructor(voice: string, outputFormat: string) {
this.voice = voice; this.voice = voice
this.outputFormat = outputFormat; this.outputFormat = outputFormat
console.log('初始化MsTTSService语音:', voice, '输出格式:', outputFormat); console.log('初始化MsTTSService语音:', voice, '输出格式:', outputFormat)
} }
/** /**
@ -26,7 +27,7 @@ export class MsTTSService implements TTSServiceInterface {
*/ */
private validateParams(): void { private validateParams(): void {
if (!this.voice) { if (!this.voice) {
throw new Error(i18n.t('settings.tts.error.no_mstts_voice')); throw new Error(i18n.t('settings.tts.error.no_mstts_voice'))
} }
} }
@ -37,22 +38,22 @@ export class MsTTSService implements TTSServiceInterface {
*/ */
async synthesize(text: string): Promise<Blob> { async synthesize(text: string): Promise<Blob> {
// 验证参数 // 验证参数
this.validateParams(); this.validateParams()
try { try {
console.log('使用免费在线TTS生成语音音色:', this.voice); console.log('使用免费在线TTS生成语音音色:', this.voice)
// 通过IPC调用主进程的MsTTSService // 通过IPC调用主进程的MsTTSService
const outputPath = await window.api.msTTS.synthesize(text, this.voice, this.outputFormat); const outputPath = await window.api.msTTS.synthesize(text, this.voice, this.outputFormat)
// 读取生成的音频文件 // 读取生成的音频文件
const audioData = await window.api.fs.read(outputPath); const audioData = await window.api.fs.read(outputPath)
// 将Buffer转换为Blob // 将Buffer转换为Blob
return new Blob([audioData], { type: 'audio/mp3' }); return new Blob([audioData], { type: 'audio/mp3' })
} catch (error: any) { } catch (error: any) {
console.error('免费在线TTS语音合成失败:', error); console.error('免费在线TTS语音合成失败:', error)
throw new Error(`免费在线TTS语音合成失败: ${error?.message || '未知错误'}`); throw new Error(`免费在线TTS语音合成失败: ${error?.message || '未知错误'}`)
} }
} }
} }

View File

@ -1,14 +1,15 @@
import { TTSServiceInterface } from './TTSServiceInterface'; import i18n from '@renderer/i18n'
import i18n from '@renderer/i18n';
import { TTSServiceInterface } from './TTSServiceInterface'
/** /**
* OpenAI TTS服务实现类 * OpenAI TTS服务实现类
*/ */
export class OpenAITTSService implements TTSServiceInterface { export class OpenAITTSService implements TTSServiceInterface {
private apiKey: string; private apiKey: string
private apiUrl: string; private apiUrl: string
private voice: string; private voice: string
private model: string; private model: string
/** /**
* *
@ -18,10 +19,10 @@ export class OpenAITTSService implements TTSServiceInterface {
* @param model * @param model
*/ */
constructor(apiKey: string, apiUrl: string, voice: string, model: string) { constructor(apiKey: string, apiUrl: string, voice: string, model: string) {
this.apiKey = apiKey; this.apiKey = apiKey
this.apiUrl = apiUrl; this.apiUrl = apiUrl
this.voice = voice; this.voice = voice
this.model = model; this.model = model
} }
/** /**
@ -30,19 +31,19 @@ export class OpenAITTSService implements TTSServiceInterface {
*/ */
private validateParams(): void { private validateParams(): void {
if (!this.apiKey) { if (!this.apiKey) {
throw new Error(i18n.t('settings.tts.error.no_api_key')); throw new Error(i18n.t('settings.tts.error.no_api_key'))
} }
if (!this.apiUrl) { if (!this.apiUrl) {
throw new Error(i18n.t('settings.tts.error.no_api_url')); throw new Error(i18n.t('settings.tts.error.no_api_url'))
} }
if (!this.voice) { if (!this.voice) {
throw new Error(i18n.t('settings.tts.error.no_voice')); throw new Error(i18n.t('settings.tts.error.no_voice'))
} }
if (!this.model) { if (!this.model) {
throw new Error(i18n.t('settings.tts.error.no_model')); throw new Error(i18n.t('settings.tts.error.no_model'))
} }
} }
@ -53,24 +54,24 @@ export class OpenAITTSService implements TTSServiceInterface {
*/ */
async synthesize(text: string): Promise<Blob> { async synthesize(text: string): Promise<Blob> {
// 验证参数 // 验证参数
this.validateParams(); this.validateParams()
// 准备OpenAI TTS请求体 // 准备OpenAI TTS请求体
const requestBody: any = { const requestBody: any = {
input: text input: text
}; }
// 只有当模型和音色不为空时才添加到请求体中 // 只有当模型和音色不为空时才添加到请求体中
if (this.model) { if (this.model) {
requestBody.model = this.model; requestBody.model = this.model
} }
if (this.voice) { if (this.voice) {
requestBody.voice = this.voice; requestBody.voice = this.voice
} }
// 调用OpenAI TTS API // 调用OpenAI TTS API
console.log('调用OpenAI TTS API开始合成语音'); console.log('调用OpenAI TTS API开始合成语音')
const response = await fetch(this.apiUrl, { const response = await fetch(this.apiUrl, {
method: 'POST', method: 'POST',
headers: { headers: {
@ -78,15 +79,15 @@ export class OpenAITTSService implements TTSServiceInterface {
Authorization: `Bearer ${this.apiKey}` Authorization: `Bearer ${this.apiKey}`
}, },
body: JSON.stringify(requestBody) body: JSON.stringify(requestBody)
}); })
if (!response.ok) { if (!response.ok) {
const errorData = await response.json(); const errorData = await response.json()
throw new Error(errorData.error?.message || 'OpenAI语音合成失败'); throw new Error(errorData.error?.message || 'OpenAI语音合成失败')
} }
// 获取音频数据 // 获取音频数据
console.log('获取到OpenAI TTS响应开始处理音频数据'); console.log('获取到OpenAI TTS响应开始处理音频数据')
return await response.blob(); return await response.blob()
} }
} }

View File

@ -1,16 +1,17 @@
import { TTSServiceInterface } from './TTSServiceInterface'; import i18n from '@renderer/i18n'
import i18n from '@renderer/i18n';
import { TTSServiceInterface } from './TTSServiceInterface'
/** /**
* TTS服务实现类 * TTS服务实现类
*/ */
export class SiliconflowTTSService implements TTSServiceInterface { export class SiliconflowTTSService implements TTSServiceInterface {
private apiKey: string; private apiKey: string
private apiUrl: string; private apiUrl: string
private voice: string; private voice: string
private model: string; private model: string
private responseFormat: string; private responseFormat: string
private speed: number; private speed: number
/** /**
* *
@ -29,12 +30,12 @@ export class SiliconflowTTSService implements TTSServiceInterface {
responseFormat: string = 'mp3', responseFormat: string = 'mp3',
speed: number = 1.0 speed: number = 1.0
) { ) {
this.apiKey = apiKey; this.apiKey = apiKey
this.apiUrl = apiUrl || 'https://api.siliconflow.cn/v1/audio/speech'; this.apiUrl = apiUrl || 'https://api.siliconflow.cn/v1/audio/speech'
this.voice = voice; this.voice = voice
this.model = model; this.model = model
this.responseFormat = responseFormat; this.responseFormat = responseFormat
this.speed = speed; this.speed = speed
} }
/** /**
@ -43,15 +44,15 @@ export class SiliconflowTTSService implements TTSServiceInterface {
*/ */
private validateParams(): void { private validateParams(): void {
if (!this.apiKey) { if (!this.apiKey) {
throw new Error(i18n.t('settings.tts.error.no_api_key')); throw new Error(i18n.t('settings.tts.error.no_api_key'))
} }
if (!this.voice) { if (!this.voice) {
throw new Error(i18n.t('settings.tts.error.no_voice')); throw new Error(i18n.t('settings.tts.error.no_voice'))
} }
if (!this.model) { if (!this.model) {
throw new Error(i18n.t('settings.tts.error.no_model')); throw new Error(i18n.t('settings.tts.error.no_model'))
} }
} }
@ -62,7 +63,7 @@ export class SiliconflowTTSService implements TTSServiceInterface {
*/ */
async synthesize(text: string): Promise<Blob> { async synthesize(text: string): Promise<Blob> {
// 验证参数 // 验证参数
this.validateParams(); this.validateParams()
// 准备硅基流动TTS请求体 // 准备硅基流动TTS请求体
const requestBody: any = { const requestBody: any = {
@ -73,44 +74,44 @@ export class SiliconflowTTSService implements TTSServiceInterface {
response_format: this.responseFormat, response_format: this.responseFormat,
stream: false, stream: false,
speed: this.speed speed: this.speed
}; }
console.log('硅基流动TTS请求参数:', { console.log('硅基流动TTS请求参数:', {
model: this.model, model: this.model,
voice: this.voice, voice: this.voice,
response_format: 'mp3', response_format: 'mp3',
speed: this.speed speed: this.speed
}); })
// 调用硅基流动TTS API // 调用硅基流动TTS API
console.log('调用硅基流动TTS API开始合成语音'); console.log('调用硅基流动TTS API开始合成语音')
const response = await fetch(this.apiUrl, { const response = await fetch(this.apiUrl, {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}` Authorization: `Bearer ${this.apiKey}`
}, },
body: JSON.stringify(requestBody) body: JSON.stringify(requestBody)
}); })
if (!response.ok) { if (!response.ok) {
let errorMessage = '硅基流动语音合成失败'; let errorMessage = '硅基流动语音合成失败'
try { try {
const errorData = await response.json(); const errorData = await response.json()
errorMessage = errorData.error?.message || errorMessage; errorMessage = errorData.error?.message || errorMessage
} catch (e) { } catch (e) {
// 如果无法解析JSON使用默认错误消息 // 如果无法解析JSON使用默认错误消息
} }
throw new Error(errorMessage); throw new Error(errorMessage)
} }
// 获取音频数据 // 获取音频数据
console.log('获取到硅基流动TTS响应开始处理音频数据'); console.log('获取到硅基流动TTS响应开始处理音频数据')
// 获取原始Blob // 获取原始Blob
const originalBlob = await response.blob(); const originalBlob = await response.blob()
// 创建一个新的Blob并指定正确的MIME类型 // 创建一个新的Blob并指定正确的MIME类型
return new Blob([originalBlob], { type: 'audio/mpeg' }); return new Blob([originalBlob], { type: 'audio/mpeg' })
} }
} }

View File

@ -1,21 +1,22 @@
import store from '@renderer/store'; import i18n from '@renderer/i18n'
import i18n from '@renderer/i18n'; import store from '@renderer/store'
import { TTSServiceFactory } from './TTSServiceFactory'; import { Message } from '@renderer/types'
import { TTSTextFilter } from './TTSTextFilter';
import { Message } from '@renderer/types'; import { TTSServiceFactory } from './TTSServiceFactory'
import { TTSTextFilter } from './TTSTextFilter'
/** /**
* TTS服务类 * TTS服务类
* *
*/ */
export class TTSService { export class TTSService {
private static instance: TTSService; private static instance: TTSService
private audioElement: HTMLAudioElement | null = null; private audioElement: HTMLAudioElement | null = null
private isPlaying = false; private isPlaying = false
// 错误消息节流控制 // 错误消息节流控制
private lastErrorTime = 0; private lastErrorTime = 0
private errorThrottleTime = 2000; // 2秒内不重复显示相同错误 private errorThrottleTime = 2000 // 2秒内不重复显示相同错误
/** /**
* *
@ -25,8 +26,8 @@ export class TTSService {
// 每次调用时强制重新创建实例,确保使用最新的设置 // 每次调用时强制重新创建实例,确保使用最新的设置
// 注意:这会导致每次调用时都创建新的音频元素,可能会有内存泄漏风险 // 注意:这会导致每次调用时都创建新的音频元素,可能会有内存泄漏风险
// 但在当前情况下这是解决TTS服务类型切换问题的最简单方法 // 但在当前情况下这是解决TTS服务类型切换问题的最简单方法
TTSService.instance = new TTSService(); TTSService.instance = new TTSService()
return TTSService.instance; return TTSService.instance
} }
/** /**
@ -34,21 +35,21 @@ export class TTSService {
*/ */
private constructor() { private constructor() {
// 创建音频元素 // 创建音频元素
this.audioElement = document.createElement('audio'); this.audioElement = document.createElement('audio')
this.audioElement.style.display = 'none'; this.audioElement.style.display = 'none'
document.body.appendChild(this.audioElement); document.body.appendChild(this.audioElement)
// 监听音频播放结束事件 // 监听音频播放结束事件
this.audioElement.addEventListener('ended', () => { this.audioElement.addEventListener('ended', () => {
this.isPlaying = false; this.isPlaying = false
console.log('TTS播放结束'); console.log('TTS播放结束')
}); })
// 监听浏览器TTS直接播放结束的自定义事件 // 监听浏览器TTS直接播放结束的自定义事件
document.addEventListener('edgeTTSComplete', () => { document.addEventListener('edgeTTSComplete', () => {
console.log('收到浏览器TTS直接播放结束事件'); console.log('收到浏览器TTS直接播放结束事件')
this.isPlaying = false; this.isPlaying = false
}); })
} }
/** /**
@ -58,21 +59,21 @@ export class TTSService {
*/ */
public async speakFromMessage(message: Message): Promise<boolean> { public async speakFromMessage(message: Message): Promise<boolean> {
// 获取最新的TTS过滤选项 // 获取最新的TTS过滤选项
const settings = store.getState().settings; const settings = store.getState().settings
const ttsFilterOptions = settings.ttsFilterOptions || { const ttsFilterOptions = settings.ttsFilterOptions || {
filterThinkingProcess: true, filterThinkingProcess: true,
filterMarkdown: true, filterMarkdown: true,
filterCodeBlocks: true, filterCodeBlocks: true,
filterHtmlTags: true, filterHtmlTags: true,
maxTextLength: 4000 maxTextLength: 4000
}; }
// 应用过滤 // 应用过滤
const filteredText = TTSTextFilter.filterText(message.content, ttsFilterOptions); const filteredText = TTSTextFilter.filterText(message.content, ttsFilterOptions)
console.log('TTS过滤前文本长度:', message.content.length, '过滤后:', filteredText.length); console.log('TTS过滤前文本长度:', message.content.length, '过滤后:', filteredText.length)
// 播放过滤后的文本 // 播放过滤后的文本
return this.speak(filteredText); return this.speak(filteredText)
} }
/** /**
@ -83,30 +84,30 @@ export class TTSService {
public async speak(text: string): Promise<boolean> { public async speak(text: string): Promise<boolean> {
try { try {
// 检查TTS是否启用 // 检查TTS是否启用
const settings = store.getState().settings; const settings = store.getState().settings
const ttsEnabled = settings.ttsEnabled; const ttsEnabled = settings.ttsEnabled
if (!ttsEnabled) { if (!ttsEnabled) {
this.showErrorMessage(i18n.t('settings.tts.error.not_enabled')); this.showErrorMessage(i18n.t('settings.tts.error.not_enabled'))
return false; return false
} }
// 如果正在播放,先停止 // 如果正在播放,先停止
if (this.isPlaying) { if (this.isPlaying) {
this.stop(); this.stop()
} }
// 确保文本不为空 // 确保文本不为空
if (!text || text.trim() === '') { if (!text || text.trim() === '') {
this.showErrorMessage(i18n.t('settings.tts.error.empty_text')); this.showErrorMessage(i18n.t('settings.tts.error.empty_text'))
return false; return false
} }
// 获取最新的设置 // 获取最新的设置
// 强制刷新状态对象,确保获取最新的设置 // 强制刷新状态对象,确保获取最新的设置
const latestSettings = store.getState().settings; const latestSettings = store.getState().settings
const serviceType = latestSettings.ttsServiceType || 'openai'; const serviceType = latestSettings.ttsServiceType || 'openai'
console.log('使用的TTS服务类型:', serviceType); console.log('使用的TTS服务类型:', serviceType)
console.log('当前TTS设置详情:', { console.log('当前TTS设置详情:', {
ttsServiceType: serviceType, ttsServiceType: serviceType,
ttsEdgeVoice: latestSettings.ttsEdgeVoice, ttsEdgeVoice: latestSettings.ttsEdgeVoice,
@ -115,18 +116,18 @@ export class TTSService {
ttsSiliconflowModel: latestSettings.ttsSiliconflowModel, ttsSiliconflowModel: latestSettings.ttsSiliconflowModel,
ttsSiliconflowResponseFormat: latestSettings.ttsSiliconflowResponseFormat, ttsSiliconflowResponseFormat: latestSettings.ttsSiliconflowResponseFormat,
ttsSiliconflowSpeed: latestSettings.ttsSiliconflowSpeed ttsSiliconflowSpeed: latestSettings.ttsSiliconflowSpeed
}); })
try { try {
// 使用工厂创建TTS服务 // 使用工厂创建TTS服务
const ttsService = TTSServiceFactory.createService(serviceType, latestSettings); const ttsService = TTSServiceFactory.createService(serviceType, latestSettings)
// 合成语音 // 合成语音
const audioBlob = await ttsService.synthesize(text); const audioBlob = await ttsService.synthesize(text)
// 播放音频 // 播放音频
if (audioBlob) { if (audioBlob) {
const audioUrl = URL.createObjectURL(audioBlob); const audioUrl = URL.createObjectURL(audioBlob)
if (this.audioElement) { if (this.audioElement) {
// 打印音频Blob信息帮助调试 // 打印音频Blob信息帮助调试
@ -134,36 +135,36 @@ export class TTSService {
size: audioBlob.size, size: audioBlob.size,
type: audioBlob.type, type: audioBlob.type,
serviceType: serviceType serviceType: serviceType
}); })
this.audioElement.src = audioUrl; this.audioElement.src = audioUrl
this.audioElement.play().catch((error) => { this.audioElement.play().catch((error) => {
// 检查是否是浏览器TTS直接播放的情况 // 检查是否是浏览器TTS直接播放的情况
// 如果是浏览器TTS且音频大小很小则不显示错误消息 // 如果是浏览器TTS且音频大小很小则不显示错误消息
const isEdgeTTS = serviceType === 'edge'; const isEdgeTTS = serviceType === 'edge'
const isSmallBlob = audioBlob.size < 100; // 小于100字节的音频文件可能是我们的静音文件 const isSmallBlob = audioBlob.size < 100 // 小于100字节的音频文件可能是我们的静音文件
if (isEdgeTTS && isSmallBlob) { if (isEdgeTTS && isSmallBlob) {
console.log('浏览器TTS直接播放中忽略音频元素错误'); console.log('浏览器TTS直接播放中忽略音频元素错误')
} else { } else {
console.error('播放TTS音频失败:', error); console.error('播放TTS音频失败:', error)
console.error('音频URL:', audioUrl); console.error('音频URL:', audioUrl)
console.error('音频Blob类型:', audioBlob.type); console.error('音频Blob类型:', audioBlob.type)
console.error('音频Blob大小:', audioBlob.size); console.error('音频Blob大小:', audioBlob.size)
this.showErrorMessage(i18n.t('settings.tts.error.play_failed')); this.showErrorMessage(i18n.t('settings.tts.error.play_failed'))
} }
}); })
this.isPlaying = true; this.isPlaying = true
console.log('开始播放TTS音频'); console.log('开始播放TTS音频')
// 释放URL对象 // 释放URL对象
this.audioElement.onended = () => { this.audioElement.onended = () => {
URL.revokeObjectURL(audioUrl); URL.revokeObjectURL(audioUrl)
// 检查是否是浏览器TTS直接播放的情况 // 检查是否是浏览器TTS直接播放的情况
const isEdgeTTS = serviceType === 'edge'; const isEdgeTTS = serviceType === 'edge'
const isSmallBlob = audioBlob.size < 100; const isSmallBlob = audioBlob.size < 100
// 如果是浏览器TTS直接播放则等待当前语音合成结束 // 如果是浏览器TTS直接播放则等待当前语音合成结束
if (isEdgeTTS && isSmallBlob) { if (isEdgeTTS && isSmallBlob) {
@ -171,33 +172,33 @@ export class TTSService {
// 如果还在播放,则不重置播放状态 // 如果还在播放,则不重置播放状态
// 注意:这里我们无法直接访问 EdgeTTSService 中的 currentUtterance // 注意:这里我们无法直接访问 EdgeTTSService 中的 currentUtterance
// 所以我们使用定时器来检查语音合成是否完成 // 所以我们使用定时器来检查语音合成是否完成
console.log('浏览器TTS直接播放中等待语音合成结束'); console.log('浏览器TTS直接播放中等待语音合成结束')
// 保持播放状态,直到语音合成结束 // 保持播放状态,直到语音合成结束
// 使用定时器来检查语音合成是否完成 // 使用定时器来检查语音合成是否完成
// 大多数语音合成应该在几秒内完成 // 大多数语音合成应该在几秒内完成
setTimeout(() => { setTimeout(() => {
this.isPlaying = false; this.isPlaying = false
console.log('浏览器TTS直接播放完成'); console.log('浏览器TTS直接播放完成')
}, 10000); // 10秒后自动重置状态 }, 10000) // 10秒后自动重置状态
} else { } else {
this.isPlaying = false; this.isPlaying = false
} }
}; }
return true; return true
} }
} }
return false; return false
} catch (error: any) { } catch (error: any) {
console.error('TTS合成失败:', error); console.error('TTS合成失败:', error)
this.showErrorMessage(error?.message || i18n.t('settings.tts.error.synthesis_failed')); this.showErrorMessage(error?.message || i18n.t('settings.tts.error.synthesis_failed'))
return false; return false
} }
} catch (error) { } catch (error) {
console.error('TTS播放失败:', error); console.error('TTS播放失败:', error)
this.showErrorMessage(i18n.t('settings.tts.error.general')); this.showErrorMessage(i18n.t('settings.tts.error.general'))
return false; return false
} }
} }
@ -206,10 +207,10 @@ export class TTSService {
*/ */
public stop(): void { public stop(): void {
if (this.audioElement && this.isPlaying) { if (this.audioElement && this.isPlaying) {
this.audioElement.pause(); this.audioElement.pause()
this.audioElement.currentTime = 0; this.audioElement.currentTime = 0
this.isPlaying = false; this.isPlaying = false
console.log('停止TTS播放'); console.log('停止TTS播放')
} }
} }
@ -218,7 +219,7 @@ export class TTSService {
* @returns * @returns
*/ */
public isCurrentlyPlaying(): boolean { public isCurrentlyPlaying(): boolean {
return this.isPlaying; return this.isPlaying
} }
/** /**
@ -226,15 +227,15 @@ export class TTSService {
* @param message * @param message
*/ */
private showErrorMessage(message: string): void { private showErrorMessage(message: string): void {
const now = Date.now(); const now = Date.now()
// 如果距离上次错误消息的时间小于节流时间,则不显示 // 如果距离上次错误消息的时间小于节流时间,则不显示
if (now - this.lastErrorTime < this.errorThrottleTime) { if (now - this.lastErrorTime < this.errorThrottleTime) {
console.log('错误消息被节流:', message); console.log('错误消息被节流:', message)
return; return
} }
// 更新上次错误消息时间 // 更新上次错误消息时间
this.lastErrorTime = now; this.lastErrorTime = now
window.message.error({ content: message, key: 'tts-error' }); window.message.error({ content: message, key: 'tts-error' })
} }
} }

View File

@ -1,9 +1,10 @@
import { TTSServiceInterface } from './TTSServiceInterface'; import i18n from '@renderer/i18n'
import { OpenAITTSService } from './OpenAITTSService';
import { EdgeTTSService } from './EdgeTTSService'; import { EdgeTTSService } from './EdgeTTSService'
import { SiliconflowTTSService } from './SiliconflowTTSService'; import { MsTTSService } from './MsTTSService'
import { MsTTSService } from './MsTTSService'; import { OpenAITTSService } from './OpenAITTSService'
import i18n from '@renderer/i18n'; import { SiliconflowTTSService } from './SiliconflowTTSService'
import { TTSServiceInterface } from './TTSServiceInterface'
/** /**
* TTS服务工厂类 * TTS服务工厂类
@ -17,24 +18,19 @@ export class TTSServiceFactory {
* @returns TTS服务实例 * @returns TTS服务实例
*/ */
static createService(serviceType: string, settings: any): TTSServiceInterface { static createService(serviceType: string, settings: any): TTSServiceInterface {
console.log('创建TTS服务实例类型:', serviceType); console.log('创建TTS服务实例类型:', serviceType)
switch (serviceType) { switch (serviceType) {
case 'openai': case 'openai':
console.log('创建OpenAI TTS服务实例'); console.log('创建OpenAI TTS服务实例')
return new OpenAITTSService( return new OpenAITTSService(settings.ttsApiKey, settings.ttsApiUrl, settings.ttsVoice, settings.ttsModel)
settings.ttsApiKey,
settings.ttsApiUrl,
settings.ttsVoice,
settings.ttsModel
);
case 'edge': case 'edge':
console.log('创建Edge TTS服务实例'); console.log('创建Edge TTS服务实例')
return new EdgeTTSService(settings.ttsEdgeVoice); return new EdgeTTSService(settings.ttsEdgeVoice)
case 'siliconflow': case 'siliconflow':
console.log('创建硅基流动 TTS服务实例'); console.log('创建硅基流动 TTS服务实例')
console.log('硅基流动TTS设置:', { console.log('硅基流动TTS设置:', {
apiKey: settings.ttsSiliconflowApiKey ? '已设置' : '未设置', apiKey: settings.ttsSiliconflowApiKey ? '已设置' : '未设置',
apiUrl: settings.ttsSiliconflowApiUrl, apiUrl: settings.ttsSiliconflowApiUrl,
@ -42,7 +38,7 @@ export class TTSServiceFactory {
model: settings.ttsSiliconflowModel, model: settings.ttsSiliconflowModel,
responseFormat: settings.ttsSiliconflowResponseFormat, responseFormat: settings.ttsSiliconflowResponseFormat,
speed: settings.ttsSiliconflowSpeed speed: settings.ttsSiliconflowSpeed
}); })
return new SiliconflowTTSService( return new SiliconflowTTSService(
settings.ttsSiliconflowApiKey, settings.ttsSiliconflowApiKey,
settings.ttsSiliconflowApiUrl, settings.ttsSiliconflowApiUrl,
@ -50,21 +46,18 @@ export class TTSServiceFactory {
settings.ttsSiliconflowModel, settings.ttsSiliconflowModel,
settings.ttsSiliconflowResponseFormat, settings.ttsSiliconflowResponseFormat,
settings.ttsSiliconflowSpeed settings.ttsSiliconflowSpeed
); )
case 'mstts': case 'mstts':
console.log('创建免费在线TTS服务实例'); console.log('创建免费在线TTS服务实例')
console.log('免费在线TTS设置:', { console.log('免费在线TTS设置:', {
voice: settings.ttsMsVoice, voice: settings.ttsMsVoice,
outputFormat: settings.ttsMsOutputFormat outputFormat: settings.ttsMsOutputFormat
}); })
return new MsTTSService( return new MsTTSService(settings.ttsMsVoice, settings.ttsMsOutputFormat)
settings.ttsMsVoice,
settings.ttsMsOutputFormat
);
default: default:
throw new Error(i18n.t('settings.tts.error.unsupported_service_type', { serviceType })); throw new Error(i18n.t('settings.tts.error.unsupported_service_type', { serviceType }))
} }
} }
} }

View File

@ -8,5 +8,5 @@ export interface TTSServiceInterface {
* @param text * @param text
* @returns Blob对象的Promise * @returns Blob对象的Promise
*/ */
synthesize(text: string): Promise<Blob>; synthesize(text: string): Promise<Blob>
} }

View File

@ -12,43 +12,43 @@ export class TTSTextFilter {
public static filterText( public static filterText(
text: string, text: string,
options: { options: {
filterThinkingProcess: boolean; filterThinkingProcess: boolean
filterMarkdown: boolean; filterMarkdown: boolean
filterCodeBlocks: boolean; filterCodeBlocks: boolean
filterHtmlTags: boolean; filterHtmlTags: boolean
maxTextLength: number; maxTextLength: number
} }
): string { ): string {
if (!text) return ''; if (!text) return ''
let filteredText = text; let filteredText = text
// 过滤思考过程 // 过滤思考过程
if (options.filterThinkingProcess) { if (options.filterThinkingProcess) {
filteredText = this.filterThinkingProcess(filteredText); filteredText = this.filterThinkingProcess(filteredText)
} }
// 过滤Markdown标记 // 过滤Markdown标记
if (options.filterMarkdown) { if (options.filterMarkdown) {
filteredText = this.filterMarkdown(filteredText); filteredText = this.filterMarkdown(filteredText)
} }
// 过滤代码块 // 过滤代码块
if (options.filterCodeBlocks) { if (options.filterCodeBlocks) {
filteredText = this.filterCodeBlocks(filteredText); filteredText = this.filterCodeBlocks(filteredText)
} }
// 过滤HTML标签 // 过滤HTML标签
if (options.filterHtmlTags) { if (options.filterHtmlTags) {
filteredText = this.filterHtmlTags(filteredText); filteredText = this.filterHtmlTags(filteredText)
} }
// 限制文本长度 // 限制文本长度
if (options.maxTextLength > 0 && filteredText.length > options.maxTextLength) { if (options.maxTextLength > 0 && filteredText.length > options.maxTextLength) {
filteredText = filteredText.substring(0, options.maxTextLength); filteredText = filteredText.substring(0, options.maxTextLength)
} }
return filteredText.trim(); return filteredText.trim()
} }
/** /**
@ -58,27 +58,27 @@ export class TTSTextFilter {
*/ */
private static filterThinkingProcess(text: string): string { private static filterThinkingProcess(text: string): string {
// 过滤<think>标签内容 // 过滤<think>标签内容
text = text.replace(/<think>[\s\S]*?<\/think>/g, ''); text = text.replace(/<think>[\s\S]*?<\/think>/g, '')
// 过滤未闭合的<think>标签 // 过滤未闭合的<think>标签
if (text.includes('<think>')) { if (text.includes('<think>')) {
const parts = text.split('<think>'); const parts = text.split('<think>')
text = parts[0]; text = parts[0]
} }
// 过滤思考过程部分(###Thinking和###Response格式 // 过滤思考过程部分(###Thinking和###Response格式
const thinkingMatch = text.match(/###\s*Thinking[\s\S]*?(?=###\s*Response|$)/); const thinkingMatch = text.match(/###\s*Thinking[\s\S]*?(?=###\s*Response|$)/)
if (thinkingMatch) { if (thinkingMatch) {
text = text.replace(thinkingMatch[0], ''); text = text.replace(thinkingMatch[0], '')
} }
// 如果有Response部分只保留Response部分 // 如果有Response部分只保留Response部分
const responseMatch = text.match(/###\s*Response\s*([\s\S]*?)(?=###|$)/); const responseMatch = text.match(/###\s*Response\s*([\s\S]*?)(?=###|$)/)
if (responseMatch) { if (responseMatch) {
text = responseMatch[1]; text = responseMatch[1]
} }
return text; return text
} }
/** /**
@ -88,29 +88,29 @@ export class TTSTextFilter {
*/ */
private static filterMarkdown(text: string): string { private static filterMarkdown(text: string): string {
// 过滤标题标记 // 过滤标题标记
text = text.replace(/#{1,6}\s+/g, ''); text = text.replace(/#{1,6}\s+/g, '')
// 过滤粗体和斜体标记 // 过滤粗体和斜体标记
text = text.replace(/(\*\*|__)(.*?)\1/g, '$2'); text = text.replace(/(\*\*|__)(.*?)\1/g, '$2')
text = text.replace(/(\*|_)(.*?)\1/g, '$2'); text = text.replace(/(\*|_)(.*?)\1/g, '$2')
// 过滤链接 // 过滤链接
text = text.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1'); text = text.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1')
// 过滤图片 // 过滤图片
text = text.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, ''); text = text.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, '')
// 过滤引用 // 过滤引用
text = text.replace(/^\s*>\s+/gm, ''); text = text.replace(/^\s*>\s+/gm, '')
// 过滤水平线 // 过滤水平线
text = text.replace(/^\s*[-*_]{3,}\s*$/gm, ''); text = text.replace(/^\s*[-*_]{3,}\s*$/gm, '')
// 过滤列表标记 // 过滤列表标记
text = text.replace(/^\s*[-*+]\s+/gm, ''); text = text.replace(/^\s*[-*+]\s+/gm, '')
text = text.replace(/^\s*\d+\.\s+/gm, ''); text = text.replace(/^\s*\d+\.\s+/gm, '')
return text; return text
} }
/** /**
@ -120,15 +120,15 @@ export class TTSTextFilter {
*/ */
private static filterCodeBlocks(text: string): string { private static filterCodeBlocks(text: string): string {
// 过滤围栏式代码块 // 过滤围栏式代码块
text = text.replace(/```[\s\S]*?```/g, ''); text = text.replace(/```[\s\S]*?```/g, '')
// 过滤缩进式代码块 // 过滤缩进式代码块
text = text.replace(/(?:^|\n)( {4}|\t).*(?:\n|$)/g, '\n'); text = text.replace(/(?:^|\n)( {4}|\t).*(?:\n|$)/g, '\n')
// 过滤行内代码 // 过滤行内代码
text = text.replace(/`([^`]+)`/g, '$1'); text = text.replace(/`([^`]+)`/g, '$1')
return text; return text
} }
/** /**
@ -138,11 +138,11 @@ export class TTSTextFilter {
*/ */
private static filterHtmlTags(text: string): string { private static filterHtmlTags(text: string): string {
// 过滤HTML标签 // 过滤HTML标签
text = text.replace(/<[^>]*>/g, ''); text = text.replace(/<[^>]*>/g, '')
// 过滤HTML实体 // 过滤HTML实体
text = text.replace(/&[a-zA-Z0-9#]+;/g, ' '); text = text.replace(/&[a-zA-Z0-9#]+;/g, ' ')
return text; return text
} }
} }

View File

@ -1,7 +1,7 @@
export * from './TTSService'; export * from './EdgeTTSService'
export * from './TTSServiceInterface'; export * from './MsTTSService'
export * from './TTSServiceFactory'; export * from './OpenAITTSService'
export * from './OpenAITTSService'; export * from './SiliconflowTTSService'
export * from './EdgeTTSService'; export * from './TTSService'
export * from './SiliconflowTTSService'; export * from './TTSServiceFactory'
export * from './MsTTSService'; export * from './TTSServiceInterface'