+ {t('settings.tts.tab_title')}
+
+ ),
+ children: (
+
+
+
+ {t('settings.tts.enable')}
+ dispatch(setTtsEnabled(checked))} />
+
+ {t('settings.tts.enable.help')}
+
- {/* 重置按钮 */}
-
-
- {t('settings.tts.reset_title')}
-
-
- {t('settings.tts.reset_help')}
-
-
- {t('settings.tts.api_settings')}
-
-
-
+
- {/* OpenAI TTS设置 */}
- {ttsServiceType === 'openai' && (
- <>
-
- dispatch(setTtsApiKey(e.target.value))}
- placeholder={t('settings.tts.api_key.placeholder')}
- disabled={!ttsEnabled}
- />
-
-
- dispatch(setTtsApiUrl(e.target.value))}
- placeholder={t('settings.tts.api_url.placeholder')}
- disabled={!ttsEnabled}
- />
-
- >
- )}
+ {/* OpenAI TTS设置 */}
+ {ttsServiceType === 'openai' && (
+ <>
+
+ dispatch(setTtsApiKey(e.target.value))}
+ placeholder={t('settings.tts.api_key.placeholder')}
+ disabled={!ttsEnabled}
+ />
+
+
+ dispatch(setTtsApiUrl(e.target.value))}
+ placeholder={t('settings.tts.api_url.placeholder')}
+ disabled={!ttsEnabled}
+ />
+
+ >
+ )}
- {/* Edge TTS设置 */}
- {ttsServiceType === 'edge' && (
-
-
- dispatch(setTtsEdgeVoice(value))}
- options={
- availableVoices.length > 0
- ? availableVoices
- : [{ label: t('settings.tts.edge_voice.loading'), value: '' }]
- }
- disabled={!ttsEnabled}
- style={{ flex: 1 }}
- showSearch
- optionFilterProp="label"
- placeholder={
- availableVoices.length === 0
- ? t('settings.tts.edge_voice.loading')
- : t('settings.tts.voice.placeholder')
- }
- notFoundContent={
- availableVoices.length === 0
- ? t('settings.tts.edge_voice.loading')
- : t('settings.tts.edge_voice.not_found')
- }
- />
- }
- onClick={refreshVoices}
- disabled={!ttsEnabled}
- title={t('settings.tts.edge_voice.refresh')}
- />
-
- {availableVoices.length === 0 && {t('settings.tts.edge_voice.loading')}}
-
- )}
+ {/* Edge TTS设置 */}
+ {ttsServiceType === 'edge' && (
+
+
+ dispatch(setTtsEdgeVoice(value))}
+ options={
+ availableVoices.length > 0
+ ? availableVoices
+ : [{ label: t('settings.tts.edge_voice.loading'), value: '' }]
+ }
+ disabled={!ttsEnabled}
+ style={{ flex: 1 }}
+ showSearch
+ optionFilterProp="label"
+ placeholder={
+ availableVoices.length === 0
+ ? t('settings.tts.edge_voice.loading')
+ : t('settings.tts.voice.placeholder')
+ }
+ notFoundContent={
+ availableVoices.length === 0
+ ? t('settings.tts.edge_voice.loading')
+ : t('settings.tts.edge_voice.not_found')
+ }
+ />
+ }
+ onClick={refreshVoices}
+ disabled={!ttsEnabled}
+ title={t('settings.tts.edge_voice.refresh')}
+ />
+
+ {availableVoices.length === 0 && {t('settings.tts.edge_voice.loading')}}
+
+ )}
- {/* OpenAI TTS的音色和模型设置 */}
- {ttsServiceType === 'openai' && (
- <>
- {/* 音色选择 */}
-
- dispatch(setTtsVoice(value))}
- options={ttsCustomVoices.map((voice: any) => {
- // 确保voice是字符串
- const voiceStr = typeof voice === 'string' ? voice : String(voice)
- return { label: voiceStr, value: voiceStr }
- })}
- disabled={!ttsEnabled}
- style={{ width: '100%' }}
- placeholder={t('settings.tts.voice.placeholder')}
- showSearch
- optionFilterProp="label"
- allowClear
- />
-
+ {/* OpenAI TTS的音色和模型设置 */}
+ {ttsServiceType === 'openai' && (
+ <>
+ {/* 音色选择 */}
+
+ dispatch(setTtsVoice(value))}
+ options={ttsCustomVoices.map((voice: any) => {
+ // 确保voice是字符串
+ const voiceStr = typeof voice === 'string' ? voice : String(voice)
+ return { label: voiceStr, value: voiceStr }
+ })}
+ disabled={!ttsEnabled}
+ style={{ width: '100%' }}
+ placeholder={t('settings.tts.voice.placeholder')}
+ showSearch
+ optionFilterProp="label"
+ allowClear
+ />
+
- {/* 自定义音色列表 */}
-
- {ttsCustomVoices && ttsCustomVoices.length > 0 ? (
- ttsCustomVoices.map((voice: any, index: number) => {
- // 确保voice是字符串
- const voiceStr = typeof voice === 'string' ? voice : String(voice)
- return (
- handleRemoveVoice(voiceStr)}
- style={{ padding: '4px 8px' }}>
- {voiceStr}
-
- )
- })
- ) : (
- {t('settings.tts.voice_empty')}
- )}
-
+ {/* 自定义音色列表 */}
+
+ {ttsCustomVoices && ttsCustomVoices.length > 0 ? (
+ ttsCustomVoices.map((voice: any, index: number) => {
+ // 确保voice是字符串
+ const voiceStr = typeof voice === 'string' ? voice : String(voice)
+ return (
+ handleRemoveVoice(voiceStr)}
+ style={{ padding: '4px 8px' }}>
+ {voiceStr}
+
+ )
+ })
+ ) : (
+ {t('settings.tts.voice_empty')}
+ )}
+
- {/* 添加自定义音色 */}
-
-
- setNewVoice(e.target.value)}
- disabled={!ttsEnabled}
- style={{ flex: 1 }}
- />
- }
- onClick={handleAddVoice}
- disabled={!ttsEnabled || !newVoice}>
- {t('settings.tts.voice_add')}
-
-
-
+ {/* 添加自定义音色 */}
+
+
+ setNewVoice(e.target.value)}
+ disabled={!ttsEnabled}
+ style={{ flex: 1 }}
+ />
+ }
+ onClick={handleAddVoice}
+ disabled={!ttsEnabled || !newVoice}>
+ {t('settings.tts.voice_add')}
+
+
+
- {/* 模型选择 */}
-
- dispatch(setTtsModel(value))}
- options={ttsCustomModels.map((model: any) => {
- // 确保model是字符串
- const modelStr = typeof model === 'string' ? model : String(model)
- return { label: modelStr, value: modelStr }
- })}
- disabled={!ttsEnabled}
- style={{ width: '100%' }}
- placeholder={t('settings.tts.model.placeholder')}
- showSearch
- optionFilterProp="label"
- allowClear
- />
-
+ {/* 模型选择 */}
+
+ dispatch(setTtsModel(value))}
+ options={ttsCustomModels.map((model: any) => {
+ // 确保model是字符串
+ const modelStr = typeof model === 'string' ? model : String(model)
+ return { label: modelStr, value: modelStr }
+ })}
+ disabled={!ttsEnabled}
+ style={{ width: '100%' }}
+ placeholder={t('settings.tts.model.placeholder')}
+ showSearch
+ optionFilterProp="label"
+ allowClear
+ />
+
- {/* 自定义模型列表 */}
-
- {ttsCustomModels && ttsCustomModels.length > 0 ? (
- ttsCustomModels.map((model: any, index: number) => {
- // 确保model是字符串
- const modelStr = typeof model === 'string' ? model : String(model)
- return (
- handleRemoveModel(modelStr)}
- style={{ padding: '4px 8px' }}>
- {modelStr}
-
- )
- })
- ) : (
- {t('settings.tts.model_empty')}
- )}
-
+ {/* 自定义模型列表 */}
+
+ {ttsCustomModels && ttsCustomModels.length > 0 ? (
+ ttsCustomModels.map((model: any, index: number) => {
+ // 确保model是字符串
+ const modelStr = typeof model === 'string' ? model : String(model)
+ return (
+ handleRemoveModel(modelStr)}
+ style={{ padding: '4px 8px' }}>
+ {modelStr}
+
+ )
+ })
+ ) : (
+ {t('settings.tts.model_empty')}
+ )}
+
- {/* 添加自定义模型 */}
-
-
- setNewModel(e.target.value)}
- disabled={!ttsEnabled}
- style={{ flex: 1 }}
- />
- }
- onClick={handleAddModel}
- disabled={!ttsEnabled || !newModel}>
- {t('settings.tts.model_add')}
-
-
-
- >
- )}
+ {/* 添加自定义模型 */}
+
+
+ setNewModel(e.target.value)}
+ disabled={!ttsEnabled}
+ style={{ flex: 1 }}
+ />
+ }
+ onClick={handleAddModel}
+ disabled={!ttsEnabled || !newModel}>
+ {t('settings.tts.model_add')}
+
+
+
+ >
+ )}
- {/* TTS过滤选项 */}
-
-
- dispatch(setTtsFilterOptions({ filterThinkingProcess: checked }))}
- disabled={!ttsEnabled}
- />{' '}
- {t('settings.tts.filter.thinking_process')}
-
-
- dispatch(setTtsFilterOptions({ filterMarkdown: checked }))}
- disabled={!ttsEnabled}
- />{' '}
- {t('settings.tts.filter.markdown')}
-
-
- dispatch(setTtsFilterOptions({ filterCodeBlocks: checked }))}
- disabled={!ttsEnabled}
- />{' '}
- {t('settings.tts.filter.code_blocks')}
-
-
- dispatch(setTtsFilterOptions({ filterHtmlTags: checked }))}
- disabled={!ttsEnabled}
- />{' '}
- {t('settings.tts.filter.html_tags')}
-
-
- {t('settings.tts.max_text_length')}:
- dispatch(setTtsFilterOptions({ maxTextLength: value }))}
- disabled={!ttsEnabled}
- style={{ width: 120 }}
- options={[
- { label: '1000', value: 1000 },
- { label: '2000', value: 2000 },
- { label: '4000', value: 4000 },
- { label: '8000', value: 8000 },
- { label: '16000', value: 16000 }
- ]}
- />
-
-
+ {/* TTS过滤选项 */}
+
+
+ dispatch(setTtsFilterOptions({ filterThinkingProcess: checked }))}
+ disabled={!ttsEnabled}
+ />{' '}
+ {t('settings.tts.filter.thinking_process')}
+
+
+ dispatch(setTtsFilterOptions({ filterMarkdown: checked }))}
+ disabled={!ttsEnabled}
+ />{' '}
+ {t('settings.tts.filter.markdown')}
+
+
+ dispatch(setTtsFilterOptions({ filterCodeBlocks: checked }))}
+ disabled={!ttsEnabled}
+ />{' '}
+ {t('settings.tts.filter.code_blocks')}
+
+
+ dispatch(setTtsFilterOptions({ filterHtmlTags: checked }))}
+ disabled={!ttsEnabled}
+ />{' '}
+ {t('settings.tts.filter.html_tags')}
+
+
+ {t('settings.tts.max_text_length')}:
+ dispatch(setTtsFilterOptions({ maxTextLength: value }))}
+ disabled={!ttsEnabled}
+ style={{ width: 120 }}
+ options={[
+ { label: '1000', value: 1000 },
+ { label: '2000', value: 2000 },
+ { label: '4000', value: 4000 },
+ { label: '8000', value: 8000 },
+ { label: '16000', value: 16000 }
+ ]}
+ />
+
+
-
-
-
-
-
+
+
+
+
+
+
+ )
+ },
+ {
+ key: 'asr',
+ label: (
+
+ {t('settings.asr.tab_title')}
+
+ ),
+ children:
+ }
+ ]}
+ />
- {t('settings.tts.help')}
-
- {t('settings.tts.learn_more')}
+ {t('settings.voice.help')}
+
+ {t('settings.voice.learn_more')}
diff --git a/src/renderer/src/services/ASRServerService.ts b/src/renderer/src/services/ASRServerService.ts
new file mode 100644
index 0000000000..6c8d77757f
--- /dev/null
+++ b/src/renderer/src/services/ASRServerService.ts
@@ -0,0 +1,129 @@
+import i18n from '@renderer/i18n'
+
+// 使用window.electron而不是直接导入electron模块
+// 这样可以避免__dirname不可用的问题
+
+class ASRServerService {
+ private serverProcess: any = null
+ private isServerRunning = false
+
+ /**
+ * 启动ASR服务器
+ * @returns Promise 是否成功启动
+ */
+ startServer = async (): Promise => {
+ if (this.isServerRunning) {
+ console.log('[ASRServerService] 服务器已经在运行中')
+ window.message.info({ content: i18n.t('settings.asr.server.already_running'), key: 'asr-server' })
+ return true
+ }
+
+ try {
+ console.log('[ASRServerService] 正在启动ASR服务器...')
+ window.message.loading({ content: i18n.t('settings.asr.server.starting'), key: 'asr-server' })
+
+ // 使用IPC调用主进程启动服务器
+ const result = await window.electron.ipcRenderer.invoke('start-asr-server')
+
+ if (result.success) {
+ this.isServerRunning = true
+ this.serverProcess = result.pid
+ console.log('[ASRServerService] ASR服务器启动成功,PID:', result.pid)
+ window.message.success({ content: i18n.t('settings.asr.server.started'), key: 'asr-server' })
+ return true
+ } else {
+ console.error('[ASRServerService] ASR服务器启动失败:', result.error)
+ window.message.error({
+ content: i18n.t('settings.asr.server.start_failed') + ': ' + result.error,
+ key: 'asr-server'
+ })
+ return false
+ }
+ } catch (error) {
+ console.error('[ASRServerService] 启动ASR服务器时出错:', error)
+ window.message.error({
+ content: i18n.t('settings.asr.server.start_failed') + ': ' + (error as Error).message,
+ key: 'asr-server'
+ })
+ return false
+ }
+ }
+
+ /**
+ * 停止ASR服务器
+ * @returns Promise 是否成功停止
+ */
+ stopServer = async (): Promise => {
+ if (!this.isServerRunning || !this.serverProcess) {
+ console.log('[ASRServerService] 服务器未运行')
+ window.message.info({ content: i18n.t('settings.asr.server.not_running'), key: 'asr-server' })
+ return true
+ }
+
+ try {
+ console.log('[ASRServerService] 正在停止ASR服务器...')
+ window.message.loading({ content: i18n.t('settings.asr.server.stopping'), key: 'asr-server' })
+
+ // 使用IPC调用主进程停止服务器
+ const result = await window.electron.ipcRenderer.invoke('stop-asr-server', this.serverProcess)
+
+ if (result.success) {
+ this.isServerRunning = false
+ this.serverProcess = null
+ console.log('[ASRServerService] ASR服务器已停止')
+ window.message.success({ content: i18n.t('settings.asr.server.stopped'), key: 'asr-server' })
+ return true
+ } else {
+ console.error('[ASRServerService] ASR服务器停止失败:', result.error)
+ window.message.error({
+ content: i18n.t('settings.asr.server.stop_failed') + ': ' + result.error,
+ key: 'asr-server'
+ })
+ return false
+ }
+ } catch (error) {
+ console.error('[ASRServerService] 停止ASR服务器时出错:', error)
+ window.message.error({
+ content: i18n.t('settings.asr.server.stop_failed') + ': ' + (error as Error).message,
+ key: 'asr-server'
+ })
+ return false
+ }
+ }
+
+ /**
+ * 检查ASR服务器是否正在运行
+ * @returns boolean 是否正在运行
+ */
+ isRunning = (): boolean => {
+ return this.isServerRunning
+ }
+
+ /**
+ * 获取ASR服务器网页URL
+ * @returns string 网页URL
+ */
+ getServerUrl = (): string => {
+ return 'http://localhost:8080'
+ }
+
+ /**
+ * 获取ASR服务器文件路径
+ * @returns string 服务器文件路径
+ */
+ getServerFilePath = (): string => {
+ // 使用相对路径,因为window.electron.app.getAppPath()不可用
+ return process.env.NODE_ENV === 'development'
+ ? 'src/renderer/src/assets/asr-server/server.js'
+ : 'public/asr-server/server.js'
+ }
+
+ /**
+ * 打开ASR服务器网页
+ */
+ openServerPage = (): void => {
+ window.open(this.getServerUrl(), '_blank')
+ }
+}
+
+export default new ASRServerService()
diff --git a/src/renderer/src/services/ASRService.ts b/src/renderer/src/services/ASRService.ts
new file mode 100644
index 0000000000..260725fdf6
--- /dev/null
+++ b/src/renderer/src/services/ASRService.ts
@@ -0,0 +1,560 @@
+import i18n from '@renderer/i18n'
+import store from '@renderer/store'
+
+/**
+ * ASR服务,用于将语音转换为文本
+ */
+class ASRService {
+ private mediaRecorder: MediaRecorder | null = null
+ private audioChunks: Blob[] = []
+ private isRecording = false
+ private stream: MediaStream | null = null
+
+ // WebSocket相关
+ private ws: WebSocket | null = null
+ private wsConnected = false
+ private browserReady = false
+ private reconnectAttempt = 0
+ private maxReconnectAttempts = 5
+ private reconnectTimeout: NodeJS.Timeout | null = null
+
+ /**
+ * 开始录音
+ * @returns Promise
+ */
+ /**
+ * 连接到WebSocket服务器
+ * @returns Promise 是否连接成功
+ */
+ connectToWebSocketServer = async (): Promise => {
+ return new Promise((resolve) => {
+ if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+ console.log('[ASRService] WebSocket已连接')
+ resolve(true)
+ return
+ }
+
+ if (this.ws && this.ws.readyState === WebSocket.CONNECTING) {
+ console.log('[ASRService] WebSocket正在连接中')
+ // 等待连接完成
+ this.ws.onopen = () => {
+ console.log('[ASRService] WebSocket连接成功')
+ this.wsConnected = true
+ this.reconnectAttempt = 0
+ this.ws?.send(JSON.stringify({ type: 'identify', role: 'electron' }))
+ resolve(true)
+ }
+ this.ws.onerror = () => {
+ console.error('[ASRService] WebSocket连接失败')
+ this.wsConnected = false
+ resolve(false)
+ }
+ return
+ }
+
+ // 关闭之前的连接
+ if (this.ws) {
+ try {
+ this.ws.close()
+ } catch (e) {
+ console.error('[ASRService] 关闭WebSocket连接失败:', e)
+ }
+ }
+
+ // 创建新连接
+ try {
+ console.log('[ASRService] 正在连接WebSocket服务器...')
+ window.message.loading({ content: '正在连接语音识别服务...', key: 'ws-connect' })
+
+ this.ws = new WebSocket('ws://localhost:8080')
+ this.wsConnected = false
+ this.browserReady = false
+
+ this.ws.onopen = () => {
+ console.log('[ASRService] WebSocket连接成功')
+ window.message.success({ content: '语音识别服务连接成功', key: 'ws-connect' })
+ this.wsConnected = true
+ this.reconnectAttempt = 0
+ this.ws?.send(JSON.stringify({ type: 'identify', role: 'electron' }))
+ resolve(true)
+ }
+
+ this.ws.onclose = () => {
+ console.log('[ASRService] WebSocket连接关闭')
+ this.wsConnected = false
+ this.browserReady = false
+ this.attemptReconnect()
+ }
+
+ this.ws.onerror = (error) => {
+ console.error('[ASRService] WebSocket连接错误:', error)
+ this.wsConnected = false
+ window.message.error({ content: '语音识别服务连接失败', key: 'ws-connect' })
+ resolve(false)
+ }
+
+ this.ws.onmessage = this.handleWebSocketMessage
+ } catch (error) {
+ console.error('[ASRService] 创建WebSocket连接失败:', error)
+ window.message.error({ content: '语音识别服务连接失败', key: 'ws-connect' })
+ resolve(false)
+ }
+ })
+ }
+
+ /**
+ * 处理WebSocket消息
+ */
+ private handleWebSocketMessage = (event: MessageEvent) => {
+ try {
+ const data = JSON.parse(event.data)
+ console.log('[ASRService] 收到WebSocket消息:', data)
+
+ if (data.type === 'status') {
+ if (data.message === 'browser_ready' || data.message === 'Browser connected') {
+ console.log('[ASRService] 浏览器已准备好')
+ this.browserReady = true
+ window.message.success({ content: '语音识别浏览器已准备好', key: 'browser-status' })
+ } else if (data.message === 'Browser disconnected' || data.message === 'Browser connection error') {
+ console.log('[ASRService] 浏览器断开连接')
+ this.browserReady = false
+ window.message.error({ content: '语音识别浏览器断开连接', key: 'browser-status' })
+ }
+ } else if (data.type === 'status' && data.message === 'stopped') {
+ // 语音识别已停止
+ console.log('[ASRService] 语音识别已停止')
+ this.isRecording = false
+
+ // 如果没有收到最终结果,显示处理完成消息
+ window.message.success({ content: i18n.t('settings.asr.completed'), key: 'asr-processing' })
+
+ // 如果有回调函数,调用一次空字符串,触发按钮状态重置
+ if (this.resultCallback && typeof this.resultCallback === 'function') {
+ // 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置
+ this.resultCallback('')
+ }
+ } else if (data.type === 'result' && data.data) {
+ // 处理识别结果
+ console.log('[ASRService] 收到识别结果:', data.data)
+ if (this.resultCallback && typeof this.resultCallback === 'function') {
+ // 只在收到最终结果时才调用回调
+ if (data.data.isFinal && data.data.text && data.data.text.trim()) {
+ console.log('[ASRService] 收到最终结果,调用回调函数,文本:', data.data.text)
+ this.resultCallback(data.data.text)
+ window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
+ } else if (!data.data.isFinal) {
+ // 非最终结果,只输出日志,不调用回调
+ console.log('[ASRService] 收到中间结果,文本:', data.data.text)
+ } else {
+ console.log('[ASRService] 识别结果为空,不调用回调')
+ }
+ } else {
+ console.warn('[ASRService] 没有设置结果回调函数')
+ }
+ } else if (data.type === 'error') {
+ console.error('[ASRService] 收到错误消息:', data.message || data.data)
+ window.message.error({ content: `语音识别错误: ${data.message || data.data?.error || '未知错误'}`, key: 'asr-error' })
+ }
+ } catch (error) {
+ console.error('[ASRService] 解析WebSocket消息失败:', error, event.data)
+ }
+ }
+
+ /**
+ * 尝试重新连接WebSocket服务器
+ */
+ private attemptReconnect = () => {
+ if (this.reconnectTimeout) {
+ clearTimeout(this.reconnectTimeout)
+ this.reconnectTimeout = null
+ }
+
+ if (this.reconnectAttempt >= this.maxReconnectAttempts) {
+ console.log('[ASRService] 达到最大重连次数,停止重连')
+ return
+ }
+
+ const delay = Math.min(1000 * Math.pow(2, this.reconnectAttempt), 30000)
+ console.log(`[ASRService] 将在 ${delay}ms 后尝试重连 (尝试 ${this.reconnectAttempt + 1}/${this.maxReconnectAttempts})`)
+
+ this.reconnectTimeout = setTimeout(() => {
+ this.reconnectAttempt++
+ this.connectToWebSocketServer().catch(console.error)
+ }, delay)
+ }
+
+ // 存储结果回调函数
+ resultCallback: ((text: string) => void) | null = null
+
+ startRecording = async (onTranscribed?: (text: string) => void): Promise => {
+ try {
+ const { asrEnabled, asrServiceType } = store.getState().settings
+
+ if (!asrEnabled) {
+ window.message.error({ content: i18n.t('settings.asr.error.not_enabled'), key: 'asr-error' })
+ return
+ }
+
+ // 检查是否已经在录音
+ if (this.isRecording) {
+ console.log('已经在录音中,忽略此次请求')
+ return
+ }
+
+ // 如果是使用本地服务器
+ if (asrServiceType === 'local') {
+ // 连接WebSocket服务器
+ const connected = await this.connectToWebSocketServer()
+ if (!connected) {
+ throw new Error('无法连接到语音识别服务')
+ }
+
+ // 检查浏览器是否准备好
+ if (!this.browserReady) {
+ // 尝试等待浏览器准备好
+ let waitAttempts = 0
+ const maxWaitAttempts = 5
+
+ while (!this.browserReady && waitAttempts < maxWaitAttempts) {
+ window.message.loading({
+ content: `等待浏览器准备就绪 (${waitAttempts + 1}/${maxWaitAttempts})...`,
+ key: 'browser-status'
+ })
+
+ // 等待一秒
+ await new Promise(resolve => setTimeout(resolve, 1000))
+ waitAttempts++
+ }
+
+ if (!this.browserReady) {
+ window.message.warning({
+ content: '语音识别浏览器尚未准备好,请确保已打开浏览器页面',
+ key: 'browser-status'
+ })
+ throw new Error('浏览器尚未准备好')
+ }
+ }
+
+ // 保存回调函数(如果提供了)
+ if (onTranscribed && typeof onTranscribed === 'function') {
+ this.resultCallback = onTranscribed
+ }
+
+ // 发送开始命令
+ if (this.ws && this.wsConnected) {
+ this.ws.send(JSON.stringify({ type: 'start' }))
+ this.isRecording = true
+ console.log('开始语音识别')
+ window.message.info({ content: i18n.t('settings.asr.recording'), key: 'asr-recording' })
+ } else {
+ throw new Error('WebSocket连接未就绪')
+ }
+ return
+ }
+
+ // 以下是原有的录音逻辑(OpenAI或浏览器API)
+ // 请求麦克风权限
+ this.stream = await navigator.mediaDevices.getUserMedia({ audio: true })
+
+ // 创建MediaRecorder实例
+ this.mediaRecorder = new MediaRecorder(this.stream)
+
+ // 清空之前的录音数据
+ this.audioChunks = []
+
+ // 设置数据可用时的回调
+ this.mediaRecorder.ondataavailable = (event) => {
+ if (event.data.size > 0) {
+ this.audioChunks.push(event.data)
+ }
+ }
+
+ // 开始录音
+ this.mediaRecorder.start()
+ this.isRecording = true
+
+ console.log('开始录音')
+ window.message.info({ content: i18n.t('settings.asr.recording'), key: 'asr-recording' })
+ } catch (error) {
+ console.error('开始录音失败:', error)
+ window.message.error({
+ content: i18n.t('settings.asr.error.start_failed') + ': ' + (error as Error).message,
+ key: 'asr-error'
+ })
+ this.isRecording = false
+ }
+ }
+
+ /**
+ * 停止录音并转换为文本
+ * @param onTranscribed 转录完成后的回调函数
+ * @returns Promise
+ */
+ stopRecording = async (onTranscribed: (text: string) => void): Promise => {
+ const { asrServiceType } = store.getState().settings
+
+ // 如果是使用本地服务器
+ if (asrServiceType === 'local') {
+ if (!this.isRecording) {
+ console.log('没有正在进行的语音识别')
+ return
+ }
+
+ try {
+ // 保存回调函数
+ this.resultCallback = onTranscribed
+
+ // 发送停止命令
+ if (this.ws && this.wsConnected) {
+ this.ws.send(JSON.stringify({ type: 'stop' }))
+ console.log('停止语音识别')
+ window.message.loading({ content: i18n.t('settings.asr.processing'), key: 'asr-processing' })
+
+ // 立即调用回调函数,使按钮状态立即更新
+ if (onTranscribed) {
+ // 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置
+ setTimeout(() => onTranscribed(''), 100)
+ }
+ } else {
+ throw new Error('WebSocket连接未就绪')
+ }
+
+ // 重置录音状态
+ this.isRecording = false
+ } catch (error) {
+ console.error('停止语音识别失败:', error)
+ window.message.error({
+ content: i18n.t('settings.asr.error.transcribe_failed') + ': ' + (error as Error).message,
+ key: 'asr-processing'
+ })
+ this.isRecording = false
+ }
+ return
+ }
+
+ // 以下是原有的录音停止逻辑(OpenAI或浏览器API)
+ if (!this.isRecording || !this.mediaRecorder) {
+ console.log('没有正在进行的录音')
+ return
+ }
+
+ try {
+ // 创建一个Promise,等待录音结束
+ const recordingEndedPromise = new Promise((resolve) => {
+ if (this.mediaRecorder) {
+ this.mediaRecorder.onstop = () => {
+ // 将所有音频块合并为一个Blob
+ const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm' })
+ resolve(audioBlob)
+ }
+
+ // 停止录音
+ this.mediaRecorder.stop()
+ }
+ })
+
+ // 停止所有轨道
+ if (this.stream) {
+ this.stream.getTracks().forEach(track => track.stop())
+ this.stream = null
+ }
+
+ // 等待录音结束并获取音频Blob
+ const audioBlob = await recordingEndedPromise
+
+ // 重置录音状态
+ this.isRecording = false
+ this.mediaRecorder = null
+
+ console.log('录音结束,音频大小:', audioBlob.size, 'bytes')
+
+ // 显示处理中消息
+ window.message.loading({ content: i18n.t('settings.asr.processing'), key: 'asr-processing' })
+
+ if (asrServiceType === 'openai') {
+ // 使用OpenAI的Whisper API进行语音识别
+ await this.transcribeWithOpenAI(audioBlob, onTranscribed)
+ } else if (asrServiceType === 'browser') {
+ // 使用浏览器的Web Speech API进行语音识别
+ await this.transcribeWithBrowser(audioBlob, onTranscribed)
+ } else {
+ throw new Error(`不支持的ASR服务类型: ${asrServiceType}`)
+ }
+ } catch (error) {
+ console.error('停止录音或转录失败:', error)
+ window.message.error({
+ content: i18n.t('settings.asr.error.transcribe_failed') + ': ' + (error as Error).message,
+ key: 'asr-processing'
+ })
+
+ // 重置录音状态
+ this.isRecording = false
+ this.mediaRecorder = null
+ if (this.stream) {
+ this.stream.getTracks().forEach(track => track.stop())
+ this.stream = null
+ }
+ }
+ }
+
+ /**
+ * 使用OpenAI的Whisper API进行语音识别
+ * @param audioBlob 音频Blob
+ * @param onTranscribed 转录完成后的回调函数
+ * @returns Promise
+ */
+ private transcribeWithOpenAI = async (audioBlob: Blob, onTranscribed: (text: string) => void): Promise => {
+ try {
+ const { asrApiKey, asrApiUrl, asrModel } = store.getState().settings
+
+ if (!asrApiKey) {
+ throw new Error(i18n.t('settings.asr.error.no_api_key'))
+ }
+
+ // 创建FormData对象
+ const formData = new FormData()
+ formData.append('file', audioBlob, 'recording.webm')
+ formData.append('model', asrModel || 'whisper-1')
+
+ // 调用OpenAI API
+ const response = await fetch(asrApiUrl, {
+ method: 'POST',
+ headers: {
+ 'Authorization': `Bearer ${asrApiKey}`
+ },
+ body: formData
+ })
+
+ if (!response.ok) {
+ const errorData = await response.json()
+ throw new Error(errorData.error?.message || 'OpenAI语音识别失败')
+ }
+
+ // 解析响应
+ const data = await response.json()
+ const transcribedText = data.text
+
+ if (transcribedText) {
+ console.log('语音识别成功:', transcribedText)
+ window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
+ onTranscribed(transcribedText)
+ } else {
+ throw new Error('未能识别出文本')
+ }
+ } catch (error) {
+ console.error('OpenAI语音识别失败:', error)
+ throw error
+ }
+ }
+
+ /**
+ * 使用浏览器的Web Speech API进行语音识别
+ * @param audioBlob 音频Blob
+ * @param onTranscribed 转录完成后的回调函数
+ * @returns Promise
+ */
+ private transcribeWithBrowser = async (_audioBlob: Blob, onTranscribed: (text: string) => void): Promise => {
+ try {
+ // 检查浏览器是否支持Web Speech API
+ if (!('webkitSpeechRecognition' in window) && !('SpeechRecognition' in window)) {
+ throw new Error(i18n.t('settings.asr.error.browser_not_support'))
+ }
+
+ // 由于Web Speech API不支持直接处理录制的音频,这里我们只是模拟一个成功的回调
+ // 实际上,使用Web Speech API时,应该直接使用SpeechRecognition对象进行实时识别
+ // 这里简化处理,实际项目中可能需要更复杂的实现
+ window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
+ onTranscribed('浏览器语音识别功能尚未完全实现')
+ } catch (error) {
+ console.error('浏览器语音识别失败:', error)
+ throw error
+ }
+ }
+
+ /**
+ * 检查是否正在录音
+ * @returns boolean
+ */
+ isCurrentlyRecording = (): boolean => {
+ return this.isRecording
+ }
+
+ /**
+ * 取消录音
+ */
+ cancelRecording = (): void => {
+ const { asrServiceType } = store.getState().settings
+
+ // 如果是使用本地服务器
+ if (asrServiceType === 'local') {
+ if (this.isRecording) {
+ // 发送停止命令
+ if (this.ws && this.wsConnected) {
+ this.ws.send(JSON.stringify({ type: 'stop' }))
+ }
+
+ // 重置状态
+ this.isRecording = false
+ this.resultCallback = null
+
+ console.log('语音识别已取消')
+ window.message.info({ content: i18n.t('settings.asr.canceled'), key: 'asr-recording' })
+ }
+ return
+ }
+
+ // 以下是原有的取消录音逻辑(OpenAI或浏览器API)
+ if (this.isRecording && this.mediaRecorder) {
+ // 停止MediaRecorder
+ this.mediaRecorder.stop()
+
+ // 停止所有轨道
+ if (this.stream) {
+ this.stream.getTracks().forEach(track => track.stop())
+ this.stream = null
+ }
+
+ // 重置状态
+ this.isRecording = false
+ this.mediaRecorder = null
+ this.audioChunks = []
+
+ console.log('录音已取消')
+ window.message.info({ content: i18n.t('settings.asr.canceled'), key: 'asr-recording' })
+ }
+ }
+
+ /**
+ * 关闭WebSocket连接
+ */
+ closeWebSocketConnection = (): void => {
+ if (this.ws) {
+ try {
+ this.ws.close()
+ } catch (e) {
+ console.error('[ASRService] 关闭WebSocket连接失败:', e)
+ }
+ this.ws = null
+ }
+
+ this.wsConnected = false
+ this.browserReady = false
+
+ if (this.reconnectTimeout) {
+ clearTimeout(this.reconnectTimeout)
+ this.reconnectTimeout = null
+ }
+ }
+
+ /**
+ * 打开浏览器页面
+ */
+ openBrowserPage = (): void => {
+ // 使用window.open打开浏览器页面
+ window.open('http://localhost:8080', '_blank')
+ }
+}
+
+// 创建单例实例
+const instance = new ASRService()
+export default instance
diff --git a/src/renderer/src/store/settings.ts b/src/renderer/src/store/settings.ts
index bc3340eea6..57c2a46b82 100644
--- a/src/renderer/src/store/settings.ts
+++ b/src/renderer/src/store/settings.ts
@@ -129,6 +129,12 @@ export interface SettingsState {
filterHtmlTags: boolean // 过滤HTML标签
maxTextLength: number // 最大文本长度
}
+ // ASR配置(语音识别)
+ asrEnabled: boolean
+ asrServiceType: string // ASR服务类型:openai或browser
+ asrApiKey: string
+ asrApiUrl: string
+ asrModel: string
// Quick Panel Triggers
enableQuickPanelTriggers: boolean
// Export Menu Options
@@ -248,6 +254,12 @@ export const initialState: SettingsState = {
filterHtmlTags: true, // 默认过滤HTML标签
maxTextLength: 4000 // 默认最大文本长度
},
+ // ASR配置(语音识别)
+ asrEnabled: false,
+ asrServiceType: 'openai', // 默认使用 OpenAI ASR
+ asrApiKey: '',
+ asrApiUrl: 'https://api.openai.com/v1/audio/transcriptions',
+ asrModel: 'whisper-1',
// Quick Panel Triggers
enableQuickPanelTriggers: false,
// Export Menu Options
@@ -628,6 +640,22 @@ const settingsSlice = createSlice({
...action.payload
}
},
+ // ASR相关的action
+ setAsrEnabled: (state, action: PayloadAction) => {
+ state.asrEnabled = action.payload
+ },
+ setAsrServiceType: (state, action: PayloadAction) => {
+ state.asrServiceType = action.payload
+ },
+ setAsrApiKey: (state, action: PayloadAction) => {
+ state.asrApiKey = action.payload
+ },
+ setAsrApiUrl: (state, action: PayloadAction) => {
+ state.asrApiUrl = action.payload
+ },
+ setAsrModel: (state, action: PayloadAction) => {
+ state.asrModel = action.payload
+ },
// Quick Panel Triggers action
setEnableQuickPanelTriggers: (state, action: PayloadAction) => {
state.enableQuickPanelTriggers = action.payload
@@ -736,7 +764,12 @@ export const {
addTtsCustomModel,
removeTtsCustomVoice,
removeTtsCustomModel,
- setTtsFilterOptions
+ setTtsFilterOptions,
+ setAsrEnabled,
+ setAsrServiceType,
+ setAsrApiKey,
+ setAsrApiUrl,
+ setAsrModel
} = settingsSlice.actions
export default settingsSlice.reducer
diff --git a/src/renderer/src/types/electron.d.ts b/src/renderer/src/types/electron.d.ts
index 7059da90aa..a916179e7a 100644
--- a/src/renderer/src/types/electron.d.ts
+++ b/src/renderer/src/types/electron.d.ts
@@ -4,6 +4,21 @@ interface ObsidianAPI {
getFolders: (vaultName: string) => Promise>
}
+interface IpcRendererAPI {
+ invoke: (channel: string, ...args: any[]) => Promise
+ on: (channel: string, listener: (...args: any[]) => void) => void
+ once: (channel: string, listener: (...args: any[]) => void) => void
+ removeListener: (channel: string, listener: (...args: any[]) => void) => void
+ removeAllListeners: (channel: string) => void
+ send: (channel: string, ...args: any[]) => void
+ sendSync: (channel: string, ...args: any[]) => any
+}
+
+interface ElectronAPI {
+ ipcRenderer: IpcRendererAPI
+}
+
interface Window {
obsidian: ObsidianAPI
+ electron: ElectronAPI
}