From bbe08e2a6cf09fecf587ef4468f8d17f261cc7fb Mon Sep 17 00:00:00 2001 From: 1600822305 <1600822305@qq.com> Date: Thu, 10 Apr 2025 12:30:22 +0800 Subject: [PATCH] ASR-TTS --- electron-builder.yml | 5 + electron.vite.config.ts | 11 + public/asr-server/index.html | 198 ++++++ public/asr-server/server.js | 146 ++++ src/main/ipc.ts | 104 +++ src/renderer/src/assets/asr-server/index.html | 368 ++++++++++ .../src/assets/asr-server/package.json | 27 + src/renderer/src/assets/asr-server/server.js | 172 +++++ src/renderer/src/components/ASRButton.tsx | 226 ++++++ src/renderer/src/i18n/locales/en-us.json | 46 ++ src/renderer/src/i18n/locales/ja-jp.json | 46 ++ src/renderer/src/i18n/locales/zh-cn.json | 64 +- .../src/pages/home/Inputbar/Inputbar.tsx | 14 + .../pages/home/Messages/MessageMenubar.tsx | 14 +- .../src/pages/settings/SettingsPage.tsx | 2 +- .../settings/TTSSettings/ASRSettings.tsx | 271 +++++++ .../settings/TTSSettings/TTSSettings.tsx | 668 +++++++++--------- src/renderer/src/services/ASRServerService.ts | 129 ++++ src/renderer/src/services/ASRService.ts | 560 +++++++++++++++ src/renderer/src/store/settings.ts | 35 +- src/renderer/src/types/electron.d.ts | 15 + 21 files changed, 2790 insertions(+), 331 deletions(-) create mode 100644 public/asr-server/index.html create mode 100644 public/asr-server/server.js create mode 100644 src/renderer/src/assets/asr-server/index.html create mode 100644 src/renderer/src/assets/asr-server/package.json create mode 100644 src/renderer/src/assets/asr-server/server.js create mode 100644 src/renderer/src/components/ASRButton.tsx create mode 100644 src/renderer/src/pages/settings/TTSSettings/ASRSettings.tsx create mode 100644 src/renderer/src/services/ASRServerService.ts create mode 100644 src/renderer/src/services/ASRService.ts diff --git a/electron-builder.yml b/electron-builder.yml index 7cc8ffe1f9..d45562fb86 100644 --- a/electron-builder.yml +++ b/electron-builder.yml @@ -27,6 +27,11 @@ files: - '!node_modules/@tavily/core/node_modules/js-tiktoken' - '!node_modules/pdf-parse/lib/pdf.js/{v1.9.426,v1.10.88,v2.0.550}' - '!node_modules/mammoth/{mammoth.browser.js,mammoth.browser.min.js}' + # 包含 ASR 服务器文件 + - src/renderer/src/assets/asr-server/**/* + # 包含打包后的ASR服务器可执行文件 + - cherry-asr-server.exe + - index.html asarUnpack: - resources/** - '**/*.{node,dll,metal,exp,lib}' diff --git a/electron.vite.config.ts b/electron.vite.config.ts index d0ceafc025..3a52c4ddcb 100644 --- a/electron.vite.config.ts +++ b/electron.vite.config.ts @@ -76,6 +76,17 @@ export default defineConfig({ }, optimizeDeps: { exclude: [] + }, + build: { + rollupOptions: { + input: { + index: resolve('src/renderer/index.html'), + }, + }, + // 复制ASR服务器文件 + assetsInlineLimit: 0, + // 确保复制assets目录下的所有文件 + copyPublicDir: true, } } }) diff --git a/public/asr-server/index.html b/public/asr-server/index.html new file mode 100644 index 0000000000..9cd22e0b5d --- /dev/null +++ b/public/asr-server/index.html @@ -0,0 +1,198 @@ + + + + + + + Edge ASR (External) + + + + +

Edge ASR 中继页面

+

这个页面需要在 Edge 浏览器中保持打开,以便 Electron 应用使用其语音识别功能。

+
正在连接到服务器...
+
+ + + + + \ No newline at end of file diff --git a/public/asr-server/server.js b/public/asr-server/server.js new file mode 100644 index 0000000000..dca76b894c --- /dev/null +++ b/public/asr-server/server.js @@ -0,0 +1,146 @@ +const http = require('http') +const WebSocket = require('ws') +const express = require('express') +const path = require('path') // Need path module + +const app = express() +const port = 8080 // Define the port + +// 提供网页给 Edge 浏览器 +app.get('/', (req, res) => { + // Use path.join for cross-platform compatibility + res.sendFile(path.join(__dirname, 'index.html')) +}) + +const server = http.createServer(app) +const wss = new WebSocket.Server({ server }) + +let browserConnection = null +let electronConnection = null + +wss.on('connection', (ws) => { + console.log('[Server] WebSocket client connected') // Add log + + ws.on('message', (message) => { + let data + try { + // Ensure message is treated as string before parsing + data = JSON.parse(message.toString()) + console.log('[Server] Received message:', data) // Log parsed data + } catch (e) { + console.error('[Server] Failed to parse message or message is not JSON:', message.toString(), e) + return // Ignore non-JSON messages + } + + // 识别客户端类型 + if (data.type === 'identify') { + if (data.role === 'browser') { + browserConnection = ws + console.log('[Server] Browser identified and connected') + // Notify Electron that the browser is ready + if (electronConnection && electronConnection.readyState === WebSocket.OPEN) { + electronConnection.send(JSON.stringify({ type: 'status', message: 'browser_ready' })); + console.log('[Server] Sent browser_ready status to Electron'); + } + // Notify Electron if it's already connected + if (electronConnection) { + electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connected' })) + } + ws.on('close', () => { + console.log('[Server] Browser disconnected') + browserConnection = null + // Notify Electron + if (electronConnection) { + electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser disconnected' })) + } + }) + ws.on('error', (error) => { + console.error('[Server] Browser WebSocket error:', error) + browserConnection = null // Assume disconnected on error + if (electronConnection) { + electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connection error' })) + } + }) + } else if (data.role === 'electron') { + electronConnection = ws + console.log('[Server] Electron identified and connected') + // If browser is already connected when Electron connects, notify Electron immediately + if (browserConnection && browserConnection.readyState === WebSocket.OPEN) { + electronConnection.send(JSON.stringify({ type: 'status', message: 'browser_ready' })); + console.log('[Server] Sent initial browser_ready status to Electron'); + } + ws.on('close', () => { + console.log('[Server] Electron disconnected') + electronConnection = null + // Maybe send stop to browser if electron disconnects? + // if (browserConnection) browserConnection.send(JSON.stringify({ type: 'stop' })); + }) + ws.on('error', (error) => { + console.error('[Server] Electron WebSocket error:', error) + electronConnection = null // Assume disconnected on error + }) + } + } + // Electron 控制开始/停止 + else if (data.type === 'start' && ws === electronConnection) { + if (browserConnection && browserConnection.readyState === WebSocket.OPEN) { + console.log('[Server] Relaying START command to browser') + browserConnection.send(JSON.stringify({ type: 'start' })) + } else { + console.log('[Server] Cannot relay START: Browser not connected') + // Optionally notify Electron back + electronConnection.send(JSON.stringify({ type: 'error', message: 'Browser not connected for ASR' })) + } + } else if (data.type === 'stop' && ws === electronConnection) { + if (browserConnection && browserConnection.readyState === WebSocket.OPEN) { + console.log('[Server] Relaying STOP command to browser') + browserConnection.send(JSON.stringify({ type: 'stop' })) + } else { + console.log('[Server] Cannot relay STOP: Browser not connected') + } + } + // 浏览器发送识别结果 + else if (data.type === 'result' && ws === browserConnection) { + if (electronConnection && electronConnection.readyState === WebSocket.OPEN) { + // console.log('[Server] Relaying RESULT to Electron:', data.data); // Log less frequently if needed + electronConnection.send(JSON.stringify({ type: 'result', data: data.data })) + } else { + // console.log('[Server] Cannot relay RESULT: Electron not connected'); + } + } + // 浏览器发送状态更新 (例如 'stopped') + else if (data.type === 'status' && ws === browserConnection) { + if (electronConnection && electronConnection.readyState === WebSocket.OPEN) { + console.log('[Server] Relaying STATUS to Electron:', data.message) // Log status being relayed + electronConnection.send(JSON.stringify({ type: 'status', message: data.message })) + } else { + console.log('[Server] Cannot relay STATUS: Electron not connected') + } + } else { + console.log('[Server] Received unknown message type or from unknown source:', data) + } + }) + + ws.on('error', (error) => { + // Generic error handling for connection before identification + console.error('[Server] Initial WebSocket connection error:', error) + // Attempt to clean up based on which connection it might be (if identified) + if (ws === browserConnection) { + browserConnection = null + if (electronConnection) + electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connection error' })) + } else if (ws === electronConnection) { + electronConnection = null + } + }) +}) + +server.listen(port, () => { + console.log(`[Server] Server running at http://localhost:${port}`) +}) + +// Handle server errors +server.on('error', (error) => { + console.error(`[Server] Failed to start server:`, error) + process.exit(1) // Exit if server fails to start +}) diff --git a/src/main/ipc.ts b/src/main/ipc.ts index c6bceac3ab..47c13b0ef7 100644 --- a/src/main/ipc.ts +++ b/src/main/ipc.ts @@ -1,4 +1,6 @@ import fs from 'node:fs' +import { spawn, ChildProcess } from 'node:child_process' +import path from 'node:path' import { isMac, isWin } from '@main/constant' import { getBinaryPath, isBinaryExists, runInstallScript } from '@main/utils/process' @@ -29,6 +31,9 @@ import { decrypt, encrypt } from './utils/aes' import { getConfigDir, getFilesDir } from './utils/file' import { compress, decompress } from './utils/zip' +// 存储ASR服务器进程 +let asrServerProcess: ChildProcess | null = null + const fileManager = new FileStorage() const backupManager = new BackupManager() const exportService = new ExportService(fileManager) @@ -291,4 +296,103 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) { ipcMain.handle(IpcChannel.Nutstore_GetDirectoryContents, (_, token: string, path: string) => NutstoreService.getDirectoryContents(token, path) ) + + // 启动ASR服务器 + ipcMain.handle('start-asr-server', async () => { + try { + if (asrServerProcess) { + return { success: true, pid: asrServerProcess.pid } + } + + // 获取服务器文件路径 + console.log('App path:', app.getAppPath()) + // 在开发环境和生产环境中使用不同的路径 + let serverPath = '' + let isExeFile = false + + // 首先检查是否有打包后的exe文件 + const exePath = path.join(app.getAppPath(), 'resources', 'cherry-asr-server.exe') + if (fs.existsSync(exePath)) { + serverPath = exePath + isExeFile = true + console.log('检测到打包后的exe文件:', serverPath) + } else if (process.env.NODE_ENV === 'development') { + // 开发环境 + serverPath = path.join(app.getAppPath(), 'src', 'renderer', 'src', 'assets', 'asr-server', 'server.js') + } else { + // 生产环境 + serverPath = path.join(app.getAppPath(), 'public', 'asr-server', 'server.js') + } + console.log('ASR服务器路径:', serverPath) + + // 检查文件是否存在 + if (!fs.existsSync(serverPath)) { + return { success: false, error: '服务器文件不存在' } + } + + // 启动服务器进程 + if (isExeFile) { + // 如果是exe文件,直接启动 + asrServerProcess = spawn(serverPath, [], { + stdio: 'pipe', + detached: false + }) + } else { + // 如果是js文件,使用node启动 + asrServerProcess = spawn('node', [serverPath], { + stdio: 'pipe', + detached: false + }) + } + + // 处理服务器输出 + asrServerProcess.stdout?.on('data', (data) => { + console.log(`[ASR Server] ${data.toString()}`) + }) + + asrServerProcess.stderr?.on('data', (data) => { + console.error(`[ASR Server Error] ${data.toString()}`) + }) + + // 处理服务器退出 + asrServerProcess.on('close', (code) => { + console.log(`[ASR Server] 进程退出,退出码: ${code}`) + asrServerProcess = null + }) + + // 等待一段时间确保服务器启动 + await new Promise(resolve => setTimeout(resolve, 1000)) + + return { success: true, pid: asrServerProcess.pid } + } catch (error) { + console.error('启动ASR服务器失败:', error) + return { success: false, error: (error as Error).message } + } + }) + + // 停止ASR服务器 + ipcMain.handle('stop-asr-server', async (_event, pid) => { + try { + if (!asrServerProcess) { + return { success: true } + } + + // 检查PID是否匹配 + if (asrServerProcess.pid !== pid) { + console.warn(`请求停止的PID (${pid}) 与当前运行的ASR服务器PID (${asrServerProcess.pid}) 不匹配`) + } + + // 杀死进程 + asrServerProcess.kill() + + // 等待一段时间确保进程已经退出 + await new Promise(resolve => setTimeout(resolve, 500)) + + asrServerProcess = null + return { success: true } + } catch (error) { + console.error('停止ASR服务器失败:', error) + return { success: false, error: (error as Error).message } + } + }) } diff --git a/src/renderer/src/assets/asr-server/index.html b/src/renderer/src/assets/asr-server/index.html new file mode 100644 index 0000000000..1d5d20e5ca --- /dev/null +++ b/src/renderer/src/assets/asr-server/index.html @@ -0,0 +1,368 @@ + + + + + + + Edge ASR (External) + + + + +

Edge ASR 中继页面

+

这个页面需要在 Edge 浏览器中保持打开,以便 Electron 应用使用其语音识别功能。

+
正在连接到服务器...
+
+ + + + + \ No newline at end of file diff --git a/src/renderer/src/assets/asr-server/package.json b/src/renderer/src/assets/asr-server/package.json new file mode 100644 index 0000000000..75eaaadcb4 --- /dev/null +++ b/src/renderer/src/assets/asr-server/package.json @@ -0,0 +1,27 @@ +{ + "name": "cherry-asr-server", + "version": "1.0.0", + "description": "Cherry Studio ASR Server", + "main": "server.js", + "bin": "server.js", + "scripts": { + "start": "node server.js", + "build": "pkg ." + }, + "pkg": { + "targets": [ + "node16-win-x64" + ], + "outputPath": "dist", + "assets": [ + "index.html" + ] + }, + "dependencies": { + "express": "^4.18.2", + "ws": "^8.13.0" + }, + "devDependencies": { + "pkg": "^5.8.1" + } +} diff --git a/src/renderer/src/assets/asr-server/server.js b/src/renderer/src/assets/asr-server/server.js new file mode 100644 index 0000000000..ac57acb360 --- /dev/null +++ b/src/renderer/src/assets/asr-server/server.js @@ -0,0 +1,172 @@ +const http = require('http') +const WebSocket = require('ws') +const express = require('express') +const path = require('path') // Need path module + +const app = express() +const port = 8080 // Define the port + +// 获取index.html文件的路径 +function getIndexHtmlPath() { + // 在开发环境中,直接使用相对路径 + const devPath = path.join(__dirname, 'index.html'); + + // 在pkg打包后,文件会被包含在可执行文件中 + // 使用process.pkg检测是否是打包环境 + if (process.pkg) { + // 在打包环境中,使用绝对路径 + return path.join(path.dirname(process.execPath), 'index.html'); + } + + // 如果文件存在,返回开发路径 + try { + if (require('fs').existsSync(devPath)) { + return devPath; + } + } catch (e) { + console.error('Error checking file existence:', e); + } + + // 如果都不存在,尝试使用当前目录 + return path.join(process.cwd(), 'index.html'); +} + +// 提供网页给 Edge 浏览器 +app.get('/', (req, res) => { + const indexPath = getIndexHtmlPath(); + console.log(`Serving index.html from: ${indexPath}`); + res.sendFile(indexPath); +}) + +const server = http.createServer(app) +const wss = new WebSocket.Server({ server }) + +let browserConnection = null +let electronConnection = null + +wss.on('connection', (ws) => { + console.log('[Server] WebSocket client connected') // Add log + + ws.on('message', (message) => { + let data + try { + // Ensure message is treated as string before parsing + data = JSON.parse(message.toString()) + console.log('[Server] Received message:', data) // Log parsed data + } catch (e) { + console.error('[Server] Failed to parse message or message is not JSON:', message.toString(), e) + return // Ignore non-JSON messages + } + + // 识别客户端类型 + if (data.type === 'identify') { + if (data.role === 'browser') { + browserConnection = ws + console.log('[Server] Browser identified and connected') + // Notify Electron that the browser is ready + if (electronConnection && electronConnection.readyState === WebSocket.OPEN) { + electronConnection.send(JSON.stringify({ type: 'status', message: 'browser_ready' })); + console.log('[Server] Sent browser_ready status to Electron'); + } + // Notify Electron if it's already connected + if (electronConnection) { + electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connected' })) + } + ws.on('close', () => { + console.log('[Server] Browser disconnected') + browserConnection = null + // Notify Electron + if (electronConnection) { + electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser disconnected' })) + } + }) + ws.on('error', (error) => { + console.error('[Server] Browser WebSocket error:', error) + browserConnection = null // Assume disconnected on error + if (electronConnection) { + electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connection error' })) + } + }) + } else if (data.role === 'electron') { + electronConnection = ws + console.log('[Server] Electron identified and connected') + // If browser is already connected when Electron connects, notify Electron immediately + if (browserConnection && browserConnection.readyState === WebSocket.OPEN) { + electronConnection.send(JSON.stringify({ type: 'status', message: 'browser_ready' })); + console.log('[Server] Sent initial browser_ready status to Electron'); + } + ws.on('close', () => { + console.log('[Server] Electron disconnected') + electronConnection = null + // Maybe send stop to browser if electron disconnects? + // if (browserConnection) browserConnection.send(JSON.stringify({ type: 'stop' })); + }) + ws.on('error', (error) => { + console.error('[Server] Electron WebSocket error:', error) + electronConnection = null // Assume disconnected on error + }) + } + } + // Electron 控制开始/停止 + else if (data.type === 'start' && ws === electronConnection) { + if (browserConnection && browserConnection.readyState === WebSocket.OPEN) { + console.log('[Server] Relaying START command to browser') + browserConnection.send(JSON.stringify({ type: 'start' })) + } else { + console.log('[Server] Cannot relay START: Browser not connected') + // Optionally notify Electron back + electronConnection.send(JSON.stringify({ type: 'error', message: 'Browser not connected for ASR' })) + } + } else if (data.type === 'stop' && ws === electronConnection) { + if (browserConnection && browserConnection.readyState === WebSocket.OPEN) { + console.log('[Server] Relaying STOP command to browser') + browserConnection.send(JSON.stringify({ type: 'stop' })) + } else { + console.log('[Server] Cannot relay STOP: Browser not connected') + } + } + // 浏览器发送识别结果 + else if (data.type === 'result' && ws === browserConnection) { + if (electronConnection && electronConnection.readyState === WebSocket.OPEN) { + // console.log('[Server] Relaying RESULT to Electron:', data.data); // Log less frequently if needed + electronConnection.send(JSON.stringify({ type: 'result', data: data.data })) + } else { + // console.log('[Server] Cannot relay RESULT: Electron not connected'); + } + } + // 浏览器发送状态更新 (例如 'stopped') + else if (data.type === 'status' && ws === browserConnection) { + if (electronConnection && electronConnection.readyState === WebSocket.OPEN) { + console.log('[Server] Relaying STATUS to Electron:', data.message) // Log status being relayed + electronConnection.send(JSON.stringify({ type: 'status', message: data.message })) + } else { + console.log('[Server] Cannot relay STATUS: Electron not connected') + } + } else { + console.log('[Server] Received unknown message type or from unknown source:', data) + } + }) + + ws.on('error', (error) => { + // Generic error handling for connection before identification + console.error('[Server] Initial WebSocket connection error:', error) + // Attempt to clean up based on which connection it might be (if identified) + if (ws === browserConnection) { + browserConnection = null + if (electronConnection) + electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connection error' })) + } else if (ws === electronConnection) { + electronConnection = null + } + }) +}) + +server.listen(port, () => { + console.log(`[Server] Server running at http://localhost:${port}`) +}) + +// Handle server errors +server.on('error', (error) => { + console.error(`[Server] Failed to start server:`, error) + process.exit(1) // Exit if server fails to start +}) diff --git a/src/renderer/src/components/ASRButton.tsx b/src/renderer/src/components/ASRButton.tsx new file mode 100644 index 0000000000..67813a9af1 --- /dev/null +++ b/src/renderer/src/components/ASRButton.tsx @@ -0,0 +1,226 @@ +import { AudioOutlined, LoadingOutlined } from '@ant-design/icons' +import { useSettings } from '@renderer/hooks/useSettings' +import ASRService from '@renderer/services/ASRService' +import { Button, Tooltip } from 'antd' +import { FC, useCallback, useEffect, useState } from 'react' +import { useTranslation } from 'react-i18next' +import styled from 'styled-components' + +interface Props { + onTranscribed: (text: string) => void + disabled?: boolean + style?: React.CSSProperties +} + +const ASRButton: FC = ({ onTranscribed, disabled = false, style }) => { + const { t } = useTranslation() + const { asrEnabled } = useSettings() + const [isRecording, setIsRecording] = useState(false) + const [isProcessing, setIsProcessing] = useState(false) + const [countdown, setCountdown] = useState(0) + const [isCountingDown, setIsCountingDown] = useState(false) + + const handleASR = useCallback(async () => { + if (!asrEnabled) { + window.message.error({ content: t('settings.asr.error.not_enabled'), key: 'asr-error' }) + return + } + + if (isRecording) { + // 停止录音并处理 + setIsRecording(false) + setIsProcessing(true) + try { + // 添加事件监听器,监听服务器发送的stopped消息 + const originalCallback = ASRService.resultCallback + const stopCallback = (text: string) => { + // 如果是空字符串,只重置状态,不调用原始回调 + if (text === '') { + setIsProcessing(false) + return + } + + // 否则调用原始回调并重置状态 + if (originalCallback) originalCallback(text) + setIsProcessing(false) + } + + await ASRService.stopRecording(stopCallback) + } catch (error) { + console.error('ASR error:', error) + setIsProcessing(false) + } + } else { + // 开始录音 + // 显示3秒倒计时,同时立即开始录音 + setIsCountingDown(true) + setCountdown(3) + setIsRecording(true) + + // 立即发送开始信号 + try { + await ASRService.startRecording(onTranscribed) + } catch (error) { + console.error('Failed to start recording:', error) + setIsRecording(false) + setIsCountingDown(false) + return + } + + // 倒计时结束后只隐藏倒计时显示 + setTimeout(() => { + setIsCountingDown(false) + }, 3000) // 3秒倒计时 + } + }, [asrEnabled, isRecording, onTranscribed, t]) + + const handleCancel = useCallback(() => { + if (isCountingDown) { + // 如果在倒计时中,取消倒计时和录音 + setIsCountingDown(false) + setCountdown(0) + // 同时取消录音,因为录音已经开始 + ASRService.cancelRecording() + setIsRecording(false) + } else if (isRecording) { + // 如果已经在录音,取消录音 + ASRService.cancelRecording() + setIsRecording(false) + } + }, [isRecording, isCountingDown]) + + // 倒计时效果 + useEffect(() => { + if (isCountingDown && countdown > 0) { + const timer = setTimeout(() => { + setCountdown(countdown - 1) + }, 1000) + return () => clearTimeout(timer) + } + return undefined // 添加返回值以解决TS7030错误 + }, [countdown, isCountingDown]) + + if (!asrEnabled) { + return null + } + + return ( + + + : isCountingDown ? null : } + onClick={handleASR} + onDoubleClick={handleCancel} + disabled={disabled || isProcessing || (isCountingDown && countdown > 0)} + style={style} + className={isCountingDown ? 'counting-down' : ''} + > + {isCountingDown && ( + {countdown} + )} + + {isCountingDown && ( + + {t('settings.asr.preparing')} ({countdown}) + + )} + + + ) +} + +const ButtonWrapper = styled.div` + position: relative; + display: inline-block; +` + +const CountdownIndicator = styled.div` + position: absolute; + top: -25px; + left: 50%; + transform: translateX(-50%); + background-color: var(--color-primary); + color: white; + padding: 2px 8px; + border-radius: 10px; + font-size: 12px; + white-space: nowrap; + box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2); + animation: pulse 1s infinite; + z-index: 10; + + @keyframes pulse { + 0% { opacity: 0.7; } + 50% { opacity: 1; } + 100% { opacity: 0.7; } + } + + &:after { + content: ''; + position: absolute; + bottom: -5px; + left: 50%; + transform: translateX(-50%); + width: 0; + height: 0; + border-left: 5px solid transparent; + border-right: 5px solid transparent; + border-top: 5px solid var(--color-primary); + } +` + +const CountdownNumber = styled.span` + font-size: 18px; + font-weight: bold; + animation: zoom 1s infinite; + + @keyframes zoom { + 0% { transform: scale(0.8); } + 50% { transform: scale(1.2); } + 100% { transform: scale(0.8); } + } +` + +const StyledButton = styled(Button)` + min-width: 30px; + height: 30px; + font-size: 16px; + border-radius: 50%; + transition: all 0.3s ease; + color: var(--color-icon); + display: flex; + flex-direction: row; + justify-content: center; + align-items: center; + padding: 0; + &.anticon, + &.iconfont { + transition: all 0.3s ease; + color: var(--color-icon); + } + &:hover { + background-color: var(--color-background-soft); + .anticon, + .iconfont { + color: var(--color-text-1); + } + } + &.active { + background-color: var(--color-primary) !important; + .anticon, + .iconfont { + color: var(--color-white-soft); + } + &:hover { + background-color: var(--color-primary); + } + } + &.counting-down { + font-weight: bold; + background-color: var(--color-primary); + color: var(--color-white-soft); + } +` + +export default ASRButton diff --git a/src/renderer/src/i18n/locales/en-us.json b/src/renderer/src/i18n/locales/en-us.json index 2bfaadb851..b35ae6d099 100644 --- a/src/renderer/src/i18n/locales/en-us.json +++ b/src/renderer/src/i18n/locales/en-us.json @@ -1377,6 +1377,52 @@ "test": "Test Speech", "help": "Text-to-speech functionality supports converting text to natural-sounding speech.", "learn_more": "Learn more" + }, + "asr": { + "title": "Speech Recognition", + "tab_title": "Speech Recognition", + "enable": "Enable Speech Recognition", + "enable.help": "Enable to convert speech to text", + "service_type": "Service Type", + "service_type.browser": "Browser", + "service_type.local": "Local Server", + "api_key": "API Key", + "api_key.placeholder": "Enter OpenAI API key", + "api_url": "API URL", + "api_url.placeholder": "Example: https://api.openai.com/v1/audio/transcriptions", + "model": "Model", + "browser.info": "Use the browser's built-in speech recognition feature, no additional setup required", + "local.info": "Use local server and browser for speech recognition, need to start the server and open the browser page first", + "local.browser_tip": "Please open this page in Edge browser and keep the browser window open", + "local.test_connection": "Test Connection", + "local.connection_success": "Connection successful", + "local.connection_failed": "Connection failed, please make sure the server is running", + "server.start": "Start Server", + "server.stop": "Stop Server", + "server.starting": "Starting server...", + "server.started": "Server started", + "server.stopping": "Stopping server...", + "server.stopped": "Server stopped", + "server.already_running": "Server is already running", + "server.not_running": "Server is not running", + "server.start_failed": "Failed to start server", + "server.stop_failed": "Failed to stop server", + "open_browser": "Open Browser Page", + "test": "Test Speech Recognition", + "test_info": "Please use the speech recognition button in the input box to test", + "start": "Start Recording", + "stop": "Stop Recording", + "preparing": "Preparing", + "recording": "Recording...", + "processing": "Processing speech...", + "success": "Speech recognition successful", + "completed": "Speech recognition completed", + "canceled": "Recording canceled", + "error": { + "not_enabled": "Speech recognition is not enabled", + "start_failed": "Failed to start recording", + "transcribe_failed": "Failed to transcribe speech" + } } }, "translate": { diff --git a/src/renderer/src/i18n/locales/ja-jp.json b/src/renderer/src/i18n/locales/ja-jp.json index 4b94f9793b..f7fde71c01 100644 --- a/src/renderer/src/i18n/locales/ja-jp.json +++ b/src/renderer/src/i18n/locales/ja-jp.json @@ -1356,6 +1356,52 @@ }, "help": "OpenAIのTTS APIを使用するには、APIキーが必要です。Edge TTSはブラウザの機能を使用するため、APIキーは不要です。", "learn_more": "詳細はこちら" + }, + "asr": { + "title": "音声認識", + "tab_title": "音声認識", + "enable": "音声認識を有効にする", + "enable.help": "音声をテキストに変換する機能を有効にします", + "service_type": "サービスタイプ", + "service_type.browser": "ブラウザ", + "service_type.local": "ローカルサーバー", + "api_key": "APIキー", + "api_key.placeholder": "OpenAI APIキーを入力", + "api_url": "API URL", + "api_url.placeholder": "例:https://api.openai.com/v1/audio/transcriptions", + "model": "モデル", + "browser.info": "ブラウザの内蔵音声認識機能を使用します。追加設定は不要です", + "local.info": "ローカルサーバーとブラウザを使用して音声認識を行います。サーバーを起動してブラウザページを開く必要があります", + "local.browser_tip": "このページをEdgeブラウザで開き、ブラウザウィンドウを開いたままにしてください", + "local.test_connection": "接続テスト", + "local.connection_success": "接続成功", + "local.connection_failed": "接続失敗。サーバーが起動していることを確認してください", + "server.start": "サーバー起動", + "server.stop": "サーバー停止", + "server.starting": "サーバーを起動中...", + "server.started": "サーバーが起動しました", + "server.stopping": "サーバーを停止中...", + "server.stopped": "サーバーが停止しました", + "server.already_running": "サーバーは既に実行中です", + "server.not_running": "サーバーは実行されていません", + "server.start_failed": "サーバーの起動に失敗しました", + "server.stop_failed": "サーバーの停止に失敗しました", + "open_browser": "ブラウザページを開く", + "test": "音声認識テスト", + "test_info": "入力ボックスの音声認識ボタンを使用してテストしてください", + "start": "録音開始", + "stop": "録音停止", + "preparing": "準備中", + "recording": "録音中...", + "processing": "音声処理中...", + "success": "音声認識成功", + "completed": "音声認識完了", + "canceled": "録音キャンセル", + "error": { + "not_enabled": "音声認識が有効になっていません", + "start_failed": "録音の開始に失敗しました", + "transcribe_failed": "音声の文字起こしに失敗しました" + } } }, "translate": { diff --git a/src/renderer/src/i18n/locales/zh-cn.json b/src/renderer/src/i18n/locales/zh-cn.json index 3961f6dc75..a2ddc2a40c 100644 --- a/src/renderer/src/i18n/locales/zh-cn.json +++ b/src/renderer/src/i18n/locales/zh-cn.json @@ -1335,8 +1335,14 @@ "title": "隐私设置", "enable_privacy_mode": "匿名发送错误报告和数据统计" }, + "voice": { + "title": "语音功能", + "help": "语音功能包括文本转语音(TTS)和语音识别(ASR)。", + "learn_more": "了解更多" + }, "tts": { - "title": "语音设置", + "title": "语音合成", + "tab_title": "语音合成", "enable": "启用语音合成", "enable.help": "启用后可以将文本转换为语音", "reset": "重置", @@ -1376,7 +1382,61 @@ "max_text_length": "最大文本长度", "test": "测试语音", "help": "语音合成功能支持将文本转换为自然语音。", - "learn_more": "了解更多" + "learn_more": "了解更多", + "error": { + "not_enabled": "语音合成功能未启用", + "no_api_key": "未设置API密钥", + "no_edge_voice": "未选择Edge TTS音色", + "browser_not_support": "浏览器不支持语音合成" + } + }, + "asr": { + "title": "语音识别", + "tab_title": "语音识别", + "enable": "启用语音识别", + "enable.help": "启用后可以将语音转换为文本", + "service_type": "服务类型", + "service_type.browser": "浏览器", + "service_type.local": "本地服务器", + "api_key": "API密钥", + "api_key.placeholder": "请输入OpenAI API密钥", + "api_url": "API地址", + "api_url.placeholder": "例如:https://api.openai.com/v1/audio/transcriptions", + "model": "模型", + "browser.info": "使用浏览器内置的语音识别功能,无需额外设置", + "local.info": "使用本地服务器和浏览器进行语音识别,需要先启动服务器并打开浏览器页面", + "local.browser_tip": "请在Edge浏览器中打开此页面,并保持浏览器窗口打开", + "local.test_connection": "测试连接", + "local.connection_success": "连接成功", + "local.connection_failed": "连接失败,请确保服务器已启动", + "server.start": "启动服务器", + "server.stop": "停止服务器", + "server.starting": "正在启动服务器...", + "server.started": "服务器已启动", + "server.stopping": "正在停止服务器...", + "server.stopped": "服务器已停止", + "server.already_running": "服务器已经在运行中", + "server.not_running": "服务器未运行", + "server.start_failed": "启动服务器失败", + "server.stop_failed": "停止服务器失败", + "open_browser": "打开浏览器页面", + "test": "测试语音识别", + "test_info": "请在输入框中使用语音识别按钮进行测试", + "start": "开始录音", + "stop": "停止录音", + "preparing": "准备中", + "recording": "正在录音...", + "processing": "正在处理语音...", + "success": "语音识别成功", + "completed": "语音识别完成", + "canceled": "已取消录音", + "error": { + "not_enabled": "语音识别功能未启用", + "no_api_key": "未设置API密钥", + "browser_not_support": "浏览器不支持语音识别", + "start_failed": "开始录音失败", + "transcribe_failed": "语音识别失败" + } } }, "translate": { diff --git a/src/renderer/src/pages/home/Inputbar/Inputbar.tsx b/src/renderer/src/pages/home/Inputbar/Inputbar.tsx index 79f377e14a..a6890bac0c 100644 --- a/src/renderer/src/pages/home/Inputbar/Inputbar.tsx +++ b/src/renderer/src/pages/home/Inputbar/Inputbar.tsx @@ -14,6 +14,7 @@ import { TranslationOutlined } from '@ant-design/icons' import { QuickPanelListItem, QuickPanelView, useQuickPanel } from '@renderer/components/QuickPanel' +import ASRButton from '@renderer/components/ASRButton' import TranslateButton from '@renderer/components/TranslateButton' import { isGenerateImageModel, isVisionModel, isWebSearchModel } from '@renderer/config/models' import db from '@renderer/databases' @@ -1008,6 +1009,19 @@ const Inputbar: FC = ({ assistant: _assistant, setActiveTopic, topic }) = + { + // 如果是空字符串,不做任何处理 + if (!transcribedText) return + + // 将识别的文本添加到当前输入框 + setText((prevText) => { + // 如果当前有文本,添加空格后再添加识别的文本 + if (prevText.trim()) { + return prevText + ' ' + transcribedText + } + return transcribedText + }) + }} /> {loading && ( diff --git a/src/renderer/src/pages/home/Messages/MessageMenubar.tsx b/src/renderer/src/pages/home/Messages/MessageMenubar.tsx index c66718539c..0acbe80234 100644 --- a/src/renderer/src/pages/home/Messages/MessageMenubar.tsx +++ b/src/renderer/src/pages/home/Messages/MessageMenubar.tsx @@ -16,7 +16,7 @@ import { UploadOutlined } from '@ant-design/icons' import ObsidianExportPopup from '@renderer/components/Popups/ObsidianExportPopup' import SelectModelPopup from '@renderer/components/Popups/SelectModelPopup' import TextEditPopup from '@renderer/components/Popups/TextEditPopup' -import TTSButton from '@renderer/components/TTSButton' +// import TTSButton from '@renderer/components/TTSButton' // 暂时不使用 import { isReasoningModel } from '@renderer/config/models' import { TranslateLanguageOptions } from '@renderer/config/translate' import { useMessageOperations, useTopicLoading } from '@renderer/hooks/useMessageOperations' @@ -154,14 +154,14 @@ const MessageMenubar: FC = (props) => { const imageUrls: string[] = [] let match let content = editedText - + while ((match = imageRegex.exec(editedText)) !== null) { imageUrls.push(match[1]) content = content.replace(match[0], '') } - + // 更新消息内容,保留图片信息 - await editMessage(message.id, { + await editMessage(message.id, { content: content.trim(), metadata: { ...message.metadata, @@ -171,9 +171,9 @@ const MessageMenubar: FC = (props) => { } : undefined } }) - - resendMessage && handleResendUserMessage({ - ...message, + + resendMessage && handleResendUserMessage({ + ...message, content: content.trim(), metadata: { ...message.metadata, diff --git a/src/renderer/src/pages/settings/SettingsPage.tsx b/src/renderer/src/pages/settings/SettingsPage.tsx index f2b4daabe9..5d9ea798b6 100644 --- a/src/renderer/src/pages/settings/SettingsPage.tsx +++ b/src/renderer/src/pages/settings/SettingsPage.tsx @@ -127,7 +127,7 @@ const SettingsPage: FC = () => { - {t('settings.tts.title')} + {t('settings.voice.title')} diff --git a/src/renderer/src/pages/settings/TTSSettings/ASRSettings.tsx b/src/renderer/src/pages/settings/TTSSettings/ASRSettings.tsx new file mode 100644 index 0000000000..321f8f4e9e --- /dev/null +++ b/src/renderer/src/pages/settings/TTSSettings/ASRSettings.tsx @@ -0,0 +1,271 @@ +import { InfoCircleOutlined, GlobalOutlined, PlayCircleOutlined, StopOutlined } from '@ant-design/icons' +import { useTheme } from '@renderer/context/ThemeProvider' +import ASRService from '@renderer/services/ASRService' +import ASRServerService from '@renderer/services/ASRServerService' +import { useAppDispatch } from '@renderer/store' +import { + setAsrApiKey, + setAsrApiUrl, + setAsrEnabled, + setAsrModel, + setAsrServiceType +} from '@renderer/store/settings' +import { Button, Form, Input, Select, Space, Switch } from 'antd' +import { FC, useState, useEffect } from 'react' +import { useTranslation } from 'react-i18next' +import { useSelector } from 'react-redux' +import styled from 'styled-components' + +const ASRSettings: FC = () => { + const { t } = useTranslation() + const { isDark } = useTheme() + const dispatch = useAppDispatch() + + // 服务器状态 + const [isServerRunning, setIsServerRunning] = useState(false) + + // 从 Redux 获取 ASR 设置 + const asrEnabled = useSelector((state: any) => state.settings.asrEnabled) + const asrServiceType = useSelector((state: any) => state.settings.asrServiceType || 'openai') + const asrApiKey = useSelector((state: any) => state.settings.asrApiKey) + const asrApiUrl = useSelector((state: any) => state.settings.asrApiUrl) + const asrModel = useSelector((state: any) => state.settings.asrModel || 'whisper-1') + + // 检查服务器状态 + useEffect(() => { + if (asrServiceType === 'local') { + setIsServerRunning(ASRServerService.isRunning()) + } + return undefined // 添加返回值以解决TS7030错误 + }, [asrServiceType]) + + // 服务类型选项 + const serviceTypeOptions = [ + { label: 'OpenAI', value: 'openai' }, + { label: t('settings.asr.service_type.browser'), value: 'browser' }, + { label: t('settings.asr.service_type.local'), value: 'local' } + ] + + // 模型选项 + const modelOptions = [ + { label: 'whisper-1', value: 'whisper-1' } + ] + + return ( + +
+ {/* ASR开关 */} + + + dispatch(setAsrEnabled(checked))} /> + {t('settings.asr.enable')} + + + + + + + {/* 服务类型选择 */} + + dispatch(setAsrApiUrl(e.target.value))} + placeholder={t('settings.asr.api_url.placeholder')} + disabled={!asrEnabled} + /> + + + {/* 模型选择 */} + + { - console.log('切换TTS服务类型为:', value) - // 先将新的服务类型写入Redux状态 - dispatch(setTtsServiceType(value)) + {/* 重置按钮 */} + + + {t('settings.tts.reset_title')} + + + {t('settings.tts.reset_help')} + + + {t('settings.tts.api_settings')} + + {/* TTS服务类型选择 */} + + + dispatch(setTtsApiUrl(e.target.value))} - placeholder={t('settings.tts.api_url.placeholder')} - disabled={!ttsEnabled} - /> - - - )} + {/* OpenAI TTS设置 */} + {ttsServiceType === 'openai' && ( + <> + + dispatch(setTtsApiKey(e.target.value))} + placeholder={t('settings.tts.api_key.placeholder')} + disabled={!ttsEnabled} + /> + + + dispatch(setTtsApiUrl(e.target.value))} + placeholder={t('settings.tts.api_url.placeholder')} + disabled={!ttsEnabled} + /> + + + )} - {/* Edge TTS设置 */} - {ttsServiceType === 'edge' && ( - - - dispatch(setTtsEdgeVoice(value))} + options={ + availableVoices.length > 0 + ? availableVoices + : [{ label: t('settings.tts.edge_voice.loading'), value: '' }] + } + disabled={!ttsEnabled} + style={{ flex: 1 }} + showSearch + optionFilterProp="label" + placeholder={ + availableVoices.length === 0 + ? t('settings.tts.edge_voice.loading') + : t('settings.tts.voice.placeholder') + } + notFoundContent={ + availableVoices.length === 0 + ? t('settings.tts.edge_voice.loading') + : t('settings.tts.edge_voice.not_found') + } + /> + - - + {/* 添加自定义音色 */} + + + setNewVoice(e.target.value)} + disabled={!ttsEnabled} + style={{ flex: 1 }} + /> + + + - {/* 模型选择 */} - - dispatch(setTtsModel(value))} + options={ttsCustomModels.map((model: any) => { + // 确保model是字符串 + const modelStr = typeof model === 'string' ? model : String(model) + return { label: modelStr, value: modelStr } + })} + disabled={!ttsEnabled} + style={{ width: '100%' }} + placeholder={t('settings.tts.model.placeholder')} + showSearch + optionFilterProp="label" + allowClear + /> + - {/* 自定义模型列表 */} - - {ttsCustomModels && ttsCustomModels.length > 0 ? ( - ttsCustomModels.map((model: any, index: number) => { - // 确保model是字符串 - const modelStr = typeof model === 'string' ? model : String(model) - return ( - handleRemoveModel(modelStr)} - style={{ padding: '4px 8px' }}> - {modelStr} - - ) - }) - ) : ( - {t('settings.tts.model_empty')} - )} - + {/* 自定义模型列表 */} + + {ttsCustomModels && ttsCustomModels.length > 0 ? ( + ttsCustomModels.map((model: any, index: number) => { + // 确保model是字符串 + const modelStr = typeof model === 'string' ? model : String(model) + return ( + handleRemoveModel(modelStr)} + style={{ padding: '4px 8px' }}> + {modelStr} + + ) + }) + ) : ( + {t('settings.tts.model_empty')} + )} + - {/* 添加自定义模型 */} - - - setNewModel(e.target.value)} - disabled={!ttsEnabled} - style={{ flex: 1 }} - /> - - - - - )} + {/* 添加自定义模型 */} + + + setNewModel(e.target.value)} + disabled={!ttsEnabled} + style={{ flex: 1 }} + /> + + + + + )} - {/* TTS过滤选项 */} - - - dispatch(setTtsFilterOptions({ filterThinkingProcess: checked }))} - disabled={!ttsEnabled} - />{' '} - {t('settings.tts.filter.thinking_process')} - - - dispatch(setTtsFilterOptions({ filterMarkdown: checked }))} - disabled={!ttsEnabled} - />{' '} - {t('settings.tts.filter.markdown')} - - - dispatch(setTtsFilterOptions({ filterCodeBlocks: checked }))} - disabled={!ttsEnabled} - />{' '} - {t('settings.tts.filter.code_blocks')} - - - dispatch(setTtsFilterOptions({ filterHtmlTags: checked }))} - disabled={!ttsEnabled} - />{' '} - {t('settings.tts.filter.html_tags')} - - - {t('settings.tts.max_text_length')}: - dispatch(setTtsFilterOptions({ maxTextLength: value }))} + disabled={!ttsEnabled} + style={{ width: 120 }} + options={[ + { label: '1000', value: 1000 }, + { label: '2000', value: 2000 }, + { label: '4000', value: 4000 }, + { label: '8000', value: 8000 }, + { label: '16000', value: 16000 } + ]} + /> + + - - - - - + + + + + + + ) + }, + { + key: 'asr', + label: ( + + {t('settings.asr.tab_title')} + + ), + children: + } + ]} + /> - {t('settings.tts.help')} - - {t('settings.tts.learn_more')} + {t('settings.voice.help')} + + {t('settings.voice.learn_more')} diff --git a/src/renderer/src/services/ASRServerService.ts b/src/renderer/src/services/ASRServerService.ts new file mode 100644 index 0000000000..6c8d77757f --- /dev/null +++ b/src/renderer/src/services/ASRServerService.ts @@ -0,0 +1,129 @@ +import i18n from '@renderer/i18n' + +// 使用window.electron而不是直接导入electron模块 +// 这样可以避免__dirname不可用的问题 + +class ASRServerService { + private serverProcess: any = null + private isServerRunning = false + + /** + * 启动ASR服务器 + * @returns Promise 是否成功启动 + */ + startServer = async (): Promise => { + if (this.isServerRunning) { + console.log('[ASRServerService] 服务器已经在运行中') + window.message.info({ content: i18n.t('settings.asr.server.already_running'), key: 'asr-server' }) + return true + } + + try { + console.log('[ASRServerService] 正在启动ASR服务器...') + window.message.loading({ content: i18n.t('settings.asr.server.starting'), key: 'asr-server' }) + + // 使用IPC调用主进程启动服务器 + const result = await window.electron.ipcRenderer.invoke('start-asr-server') + + if (result.success) { + this.isServerRunning = true + this.serverProcess = result.pid + console.log('[ASRServerService] ASR服务器启动成功,PID:', result.pid) + window.message.success({ content: i18n.t('settings.asr.server.started'), key: 'asr-server' }) + return true + } else { + console.error('[ASRServerService] ASR服务器启动失败:', result.error) + window.message.error({ + content: i18n.t('settings.asr.server.start_failed') + ': ' + result.error, + key: 'asr-server' + }) + return false + } + } catch (error) { + console.error('[ASRServerService] 启动ASR服务器时出错:', error) + window.message.error({ + content: i18n.t('settings.asr.server.start_failed') + ': ' + (error as Error).message, + key: 'asr-server' + }) + return false + } + } + + /** + * 停止ASR服务器 + * @returns Promise 是否成功停止 + */ + stopServer = async (): Promise => { + if (!this.isServerRunning || !this.serverProcess) { + console.log('[ASRServerService] 服务器未运行') + window.message.info({ content: i18n.t('settings.asr.server.not_running'), key: 'asr-server' }) + return true + } + + try { + console.log('[ASRServerService] 正在停止ASR服务器...') + window.message.loading({ content: i18n.t('settings.asr.server.stopping'), key: 'asr-server' }) + + // 使用IPC调用主进程停止服务器 + const result = await window.electron.ipcRenderer.invoke('stop-asr-server', this.serverProcess) + + if (result.success) { + this.isServerRunning = false + this.serverProcess = null + console.log('[ASRServerService] ASR服务器已停止') + window.message.success({ content: i18n.t('settings.asr.server.stopped'), key: 'asr-server' }) + return true + } else { + console.error('[ASRServerService] ASR服务器停止失败:', result.error) + window.message.error({ + content: i18n.t('settings.asr.server.stop_failed') + ': ' + result.error, + key: 'asr-server' + }) + return false + } + } catch (error) { + console.error('[ASRServerService] 停止ASR服务器时出错:', error) + window.message.error({ + content: i18n.t('settings.asr.server.stop_failed') + ': ' + (error as Error).message, + key: 'asr-server' + }) + return false + } + } + + /** + * 检查ASR服务器是否正在运行 + * @returns boolean 是否正在运行 + */ + isRunning = (): boolean => { + return this.isServerRunning + } + + /** + * 获取ASR服务器网页URL + * @returns string 网页URL + */ + getServerUrl = (): string => { + return 'http://localhost:8080' + } + + /** + * 获取ASR服务器文件路径 + * @returns string 服务器文件路径 + */ + getServerFilePath = (): string => { + // 使用相对路径,因为window.electron.app.getAppPath()不可用 + return process.env.NODE_ENV === 'development' + ? 'src/renderer/src/assets/asr-server/server.js' + : 'public/asr-server/server.js' + } + + /** + * 打开ASR服务器网页 + */ + openServerPage = (): void => { + window.open(this.getServerUrl(), '_blank') + } +} + +export default new ASRServerService() diff --git a/src/renderer/src/services/ASRService.ts b/src/renderer/src/services/ASRService.ts new file mode 100644 index 0000000000..260725fdf6 --- /dev/null +++ b/src/renderer/src/services/ASRService.ts @@ -0,0 +1,560 @@ +import i18n from '@renderer/i18n' +import store from '@renderer/store' + +/** + * ASR服务,用于将语音转换为文本 + */ +class ASRService { + private mediaRecorder: MediaRecorder | null = null + private audioChunks: Blob[] = [] + private isRecording = false + private stream: MediaStream | null = null + + // WebSocket相关 + private ws: WebSocket | null = null + private wsConnected = false + private browserReady = false + private reconnectAttempt = 0 + private maxReconnectAttempts = 5 + private reconnectTimeout: NodeJS.Timeout | null = null + + /** + * 开始录音 + * @returns Promise + */ + /** + * 连接到WebSocket服务器 + * @returns Promise 是否连接成功 + */ + connectToWebSocketServer = async (): Promise => { + return new Promise((resolve) => { + if (this.ws && this.ws.readyState === WebSocket.OPEN) { + console.log('[ASRService] WebSocket已连接') + resolve(true) + return + } + + if (this.ws && this.ws.readyState === WebSocket.CONNECTING) { + console.log('[ASRService] WebSocket正在连接中') + // 等待连接完成 + this.ws.onopen = () => { + console.log('[ASRService] WebSocket连接成功') + this.wsConnected = true + this.reconnectAttempt = 0 + this.ws?.send(JSON.stringify({ type: 'identify', role: 'electron' })) + resolve(true) + } + this.ws.onerror = () => { + console.error('[ASRService] WebSocket连接失败') + this.wsConnected = false + resolve(false) + } + return + } + + // 关闭之前的连接 + if (this.ws) { + try { + this.ws.close() + } catch (e) { + console.error('[ASRService] 关闭WebSocket连接失败:', e) + } + } + + // 创建新连接 + try { + console.log('[ASRService] 正在连接WebSocket服务器...') + window.message.loading({ content: '正在连接语音识别服务...', key: 'ws-connect' }) + + this.ws = new WebSocket('ws://localhost:8080') + this.wsConnected = false + this.browserReady = false + + this.ws.onopen = () => { + console.log('[ASRService] WebSocket连接成功') + window.message.success({ content: '语音识别服务连接成功', key: 'ws-connect' }) + this.wsConnected = true + this.reconnectAttempt = 0 + this.ws?.send(JSON.stringify({ type: 'identify', role: 'electron' })) + resolve(true) + } + + this.ws.onclose = () => { + console.log('[ASRService] WebSocket连接关闭') + this.wsConnected = false + this.browserReady = false + this.attemptReconnect() + } + + this.ws.onerror = (error) => { + console.error('[ASRService] WebSocket连接错误:', error) + this.wsConnected = false + window.message.error({ content: '语音识别服务连接失败', key: 'ws-connect' }) + resolve(false) + } + + this.ws.onmessage = this.handleWebSocketMessage + } catch (error) { + console.error('[ASRService] 创建WebSocket连接失败:', error) + window.message.error({ content: '语音识别服务连接失败', key: 'ws-connect' }) + resolve(false) + } + }) + } + + /** + * 处理WebSocket消息 + */ + private handleWebSocketMessage = (event: MessageEvent) => { + try { + const data = JSON.parse(event.data) + console.log('[ASRService] 收到WebSocket消息:', data) + + if (data.type === 'status') { + if (data.message === 'browser_ready' || data.message === 'Browser connected') { + console.log('[ASRService] 浏览器已准备好') + this.browserReady = true + window.message.success({ content: '语音识别浏览器已准备好', key: 'browser-status' }) + } else if (data.message === 'Browser disconnected' || data.message === 'Browser connection error') { + console.log('[ASRService] 浏览器断开连接') + this.browserReady = false + window.message.error({ content: '语音识别浏览器断开连接', key: 'browser-status' }) + } + } else if (data.type === 'status' && data.message === 'stopped') { + // 语音识别已停止 + console.log('[ASRService] 语音识别已停止') + this.isRecording = false + + // 如果没有收到最终结果,显示处理完成消息 + window.message.success({ content: i18n.t('settings.asr.completed'), key: 'asr-processing' }) + + // 如果有回调函数,调用一次空字符串,触发按钮状态重置 + if (this.resultCallback && typeof this.resultCallback === 'function') { + // 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置 + this.resultCallback('') + } + } else if (data.type === 'result' && data.data) { + // 处理识别结果 + console.log('[ASRService] 收到识别结果:', data.data) + if (this.resultCallback && typeof this.resultCallback === 'function') { + // 只在收到最终结果时才调用回调 + if (data.data.isFinal && data.data.text && data.data.text.trim()) { + console.log('[ASRService] 收到最终结果,调用回调函数,文本:', data.data.text) + this.resultCallback(data.data.text) + window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' }) + } else if (!data.data.isFinal) { + // 非最终结果,只输出日志,不调用回调 + console.log('[ASRService] 收到中间结果,文本:', data.data.text) + } else { + console.log('[ASRService] 识别结果为空,不调用回调') + } + } else { + console.warn('[ASRService] 没有设置结果回调函数') + } + } else if (data.type === 'error') { + console.error('[ASRService] 收到错误消息:', data.message || data.data) + window.message.error({ content: `语音识别错误: ${data.message || data.data?.error || '未知错误'}`, key: 'asr-error' }) + } + } catch (error) { + console.error('[ASRService] 解析WebSocket消息失败:', error, event.data) + } + } + + /** + * 尝试重新连接WebSocket服务器 + */ + private attemptReconnect = () => { + if (this.reconnectTimeout) { + clearTimeout(this.reconnectTimeout) + this.reconnectTimeout = null + } + + if (this.reconnectAttempt >= this.maxReconnectAttempts) { + console.log('[ASRService] 达到最大重连次数,停止重连') + return + } + + const delay = Math.min(1000 * Math.pow(2, this.reconnectAttempt), 30000) + console.log(`[ASRService] 将在 ${delay}ms 后尝试重连 (尝试 ${this.reconnectAttempt + 1}/${this.maxReconnectAttempts})`) + + this.reconnectTimeout = setTimeout(() => { + this.reconnectAttempt++ + this.connectToWebSocketServer().catch(console.error) + }, delay) + } + + // 存储结果回调函数 + resultCallback: ((text: string) => void) | null = null + + startRecording = async (onTranscribed?: (text: string) => void): Promise => { + try { + const { asrEnabled, asrServiceType } = store.getState().settings + + if (!asrEnabled) { + window.message.error({ content: i18n.t('settings.asr.error.not_enabled'), key: 'asr-error' }) + return + } + + // 检查是否已经在录音 + if (this.isRecording) { + console.log('已经在录音中,忽略此次请求') + return + } + + // 如果是使用本地服务器 + if (asrServiceType === 'local') { + // 连接WebSocket服务器 + const connected = await this.connectToWebSocketServer() + if (!connected) { + throw new Error('无法连接到语音识别服务') + } + + // 检查浏览器是否准备好 + if (!this.browserReady) { + // 尝试等待浏览器准备好 + let waitAttempts = 0 + const maxWaitAttempts = 5 + + while (!this.browserReady && waitAttempts < maxWaitAttempts) { + window.message.loading({ + content: `等待浏览器准备就绪 (${waitAttempts + 1}/${maxWaitAttempts})...`, + key: 'browser-status' + }) + + // 等待一秒 + await new Promise(resolve => setTimeout(resolve, 1000)) + waitAttempts++ + } + + if (!this.browserReady) { + window.message.warning({ + content: '语音识别浏览器尚未准备好,请确保已打开浏览器页面', + key: 'browser-status' + }) + throw new Error('浏览器尚未准备好') + } + } + + // 保存回调函数(如果提供了) + if (onTranscribed && typeof onTranscribed === 'function') { + this.resultCallback = onTranscribed + } + + // 发送开始命令 + if (this.ws && this.wsConnected) { + this.ws.send(JSON.stringify({ type: 'start' })) + this.isRecording = true + console.log('开始语音识别') + window.message.info({ content: i18n.t('settings.asr.recording'), key: 'asr-recording' }) + } else { + throw new Error('WebSocket连接未就绪') + } + return + } + + // 以下是原有的录音逻辑(OpenAI或浏览器API) + // 请求麦克风权限 + this.stream = await navigator.mediaDevices.getUserMedia({ audio: true }) + + // 创建MediaRecorder实例 + this.mediaRecorder = new MediaRecorder(this.stream) + + // 清空之前的录音数据 + this.audioChunks = [] + + // 设置数据可用时的回调 + this.mediaRecorder.ondataavailable = (event) => { + if (event.data.size > 0) { + this.audioChunks.push(event.data) + } + } + + // 开始录音 + this.mediaRecorder.start() + this.isRecording = true + + console.log('开始录音') + window.message.info({ content: i18n.t('settings.asr.recording'), key: 'asr-recording' }) + } catch (error) { + console.error('开始录音失败:', error) + window.message.error({ + content: i18n.t('settings.asr.error.start_failed') + ': ' + (error as Error).message, + key: 'asr-error' + }) + this.isRecording = false + } + } + + /** + * 停止录音并转换为文本 + * @param onTranscribed 转录完成后的回调函数 + * @returns Promise + */ + stopRecording = async (onTranscribed: (text: string) => void): Promise => { + const { asrServiceType } = store.getState().settings + + // 如果是使用本地服务器 + if (asrServiceType === 'local') { + if (!this.isRecording) { + console.log('没有正在进行的语音识别') + return + } + + try { + // 保存回调函数 + this.resultCallback = onTranscribed + + // 发送停止命令 + if (this.ws && this.wsConnected) { + this.ws.send(JSON.stringify({ type: 'stop' })) + console.log('停止语音识别') + window.message.loading({ content: i18n.t('settings.asr.processing'), key: 'asr-processing' }) + + // 立即调用回调函数,使按钮状态立即更新 + if (onTranscribed) { + // 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置 + setTimeout(() => onTranscribed(''), 100) + } + } else { + throw new Error('WebSocket连接未就绪') + } + + // 重置录音状态 + this.isRecording = false + } catch (error) { + console.error('停止语音识别失败:', error) + window.message.error({ + content: i18n.t('settings.asr.error.transcribe_failed') + ': ' + (error as Error).message, + key: 'asr-processing' + }) + this.isRecording = false + } + return + } + + // 以下是原有的录音停止逻辑(OpenAI或浏览器API) + if (!this.isRecording || !this.mediaRecorder) { + console.log('没有正在进行的录音') + return + } + + try { + // 创建一个Promise,等待录音结束 + const recordingEndedPromise = new Promise((resolve) => { + if (this.mediaRecorder) { + this.mediaRecorder.onstop = () => { + // 将所有音频块合并为一个Blob + const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm' }) + resolve(audioBlob) + } + + // 停止录音 + this.mediaRecorder.stop() + } + }) + + // 停止所有轨道 + if (this.stream) { + this.stream.getTracks().forEach(track => track.stop()) + this.stream = null + } + + // 等待录音结束并获取音频Blob + const audioBlob = await recordingEndedPromise + + // 重置录音状态 + this.isRecording = false + this.mediaRecorder = null + + console.log('录音结束,音频大小:', audioBlob.size, 'bytes') + + // 显示处理中消息 + window.message.loading({ content: i18n.t('settings.asr.processing'), key: 'asr-processing' }) + + if (asrServiceType === 'openai') { + // 使用OpenAI的Whisper API进行语音识别 + await this.transcribeWithOpenAI(audioBlob, onTranscribed) + } else if (asrServiceType === 'browser') { + // 使用浏览器的Web Speech API进行语音识别 + await this.transcribeWithBrowser(audioBlob, onTranscribed) + } else { + throw new Error(`不支持的ASR服务类型: ${asrServiceType}`) + } + } catch (error) { + console.error('停止录音或转录失败:', error) + window.message.error({ + content: i18n.t('settings.asr.error.transcribe_failed') + ': ' + (error as Error).message, + key: 'asr-processing' + }) + + // 重置录音状态 + this.isRecording = false + this.mediaRecorder = null + if (this.stream) { + this.stream.getTracks().forEach(track => track.stop()) + this.stream = null + } + } + } + + /** + * 使用OpenAI的Whisper API进行语音识别 + * @param audioBlob 音频Blob + * @param onTranscribed 转录完成后的回调函数 + * @returns Promise + */ + private transcribeWithOpenAI = async (audioBlob: Blob, onTranscribed: (text: string) => void): Promise => { + try { + const { asrApiKey, asrApiUrl, asrModel } = store.getState().settings + + if (!asrApiKey) { + throw new Error(i18n.t('settings.asr.error.no_api_key')) + } + + // 创建FormData对象 + const formData = new FormData() + formData.append('file', audioBlob, 'recording.webm') + formData.append('model', asrModel || 'whisper-1') + + // 调用OpenAI API + const response = await fetch(asrApiUrl, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${asrApiKey}` + }, + body: formData + }) + + if (!response.ok) { + const errorData = await response.json() + throw new Error(errorData.error?.message || 'OpenAI语音识别失败') + } + + // 解析响应 + const data = await response.json() + const transcribedText = data.text + + if (transcribedText) { + console.log('语音识别成功:', transcribedText) + window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' }) + onTranscribed(transcribedText) + } else { + throw new Error('未能识别出文本') + } + } catch (error) { + console.error('OpenAI语音识别失败:', error) + throw error + } + } + + /** + * 使用浏览器的Web Speech API进行语音识别 + * @param audioBlob 音频Blob + * @param onTranscribed 转录完成后的回调函数 + * @returns Promise + */ + private transcribeWithBrowser = async (_audioBlob: Blob, onTranscribed: (text: string) => void): Promise => { + try { + // 检查浏览器是否支持Web Speech API + if (!('webkitSpeechRecognition' in window) && !('SpeechRecognition' in window)) { + throw new Error(i18n.t('settings.asr.error.browser_not_support')) + } + + // 由于Web Speech API不支持直接处理录制的音频,这里我们只是模拟一个成功的回调 + // 实际上,使用Web Speech API时,应该直接使用SpeechRecognition对象进行实时识别 + // 这里简化处理,实际项目中可能需要更复杂的实现 + window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' }) + onTranscribed('浏览器语音识别功能尚未完全实现') + } catch (error) { + console.error('浏览器语音识别失败:', error) + throw error + } + } + + /** + * 检查是否正在录音 + * @returns boolean + */ + isCurrentlyRecording = (): boolean => { + return this.isRecording + } + + /** + * 取消录音 + */ + cancelRecording = (): void => { + const { asrServiceType } = store.getState().settings + + // 如果是使用本地服务器 + if (asrServiceType === 'local') { + if (this.isRecording) { + // 发送停止命令 + if (this.ws && this.wsConnected) { + this.ws.send(JSON.stringify({ type: 'stop' })) + } + + // 重置状态 + this.isRecording = false + this.resultCallback = null + + console.log('语音识别已取消') + window.message.info({ content: i18n.t('settings.asr.canceled'), key: 'asr-recording' }) + } + return + } + + // 以下是原有的取消录音逻辑(OpenAI或浏览器API) + if (this.isRecording && this.mediaRecorder) { + // 停止MediaRecorder + this.mediaRecorder.stop() + + // 停止所有轨道 + if (this.stream) { + this.stream.getTracks().forEach(track => track.stop()) + this.stream = null + } + + // 重置状态 + this.isRecording = false + this.mediaRecorder = null + this.audioChunks = [] + + console.log('录音已取消') + window.message.info({ content: i18n.t('settings.asr.canceled'), key: 'asr-recording' }) + } + } + + /** + * 关闭WebSocket连接 + */ + closeWebSocketConnection = (): void => { + if (this.ws) { + try { + this.ws.close() + } catch (e) { + console.error('[ASRService] 关闭WebSocket连接失败:', e) + } + this.ws = null + } + + this.wsConnected = false + this.browserReady = false + + if (this.reconnectTimeout) { + clearTimeout(this.reconnectTimeout) + this.reconnectTimeout = null + } + } + + /** + * 打开浏览器页面 + */ + openBrowserPage = (): void => { + // 使用window.open打开浏览器页面 + window.open('http://localhost:8080', '_blank') + } +} + +// 创建单例实例 +const instance = new ASRService() +export default instance diff --git a/src/renderer/src/store/settings.ts b/src/renderer/src/store/settings.ts index bc3340eea6..57c2a46b82 100644 --- a/src/renderer/src/store/settings.ts +++ b/src/renderer/src/store/settings.ts @@ -129,6 +129,12 @@ export interface SettingsState { filterHtmlTags: boolean // 过滤HTML标签 maxTextLength: number // 最大文本长度 } + // ASR配置(语音识别) + asrEnabled: boolean + asrServiceType: string // ASR服务类型:openai或browser + asrApiKey: string + asrApiUrl: string + asrModel: string // Quick Panel Triggers enableQuickPanelTriggers: boolean // Export Menu Options @@ -248,6 +254,12 @@ export const initialState: SettingsState = { filterHtmlTags: true, // 默认过滤HTML标签 maxTextLength: 4000 // 默认最大文本长度 }, + // ASR配置(语音识别) + asrEnabled: false, + asrServiceType: 'openai', // 默认使用 OpenAI ASR + asrApiKey: '', + asrApiUrl: 'https://api.openai.com/v1/audio/transcriptions', + asrModel: 'whisper-1', // Quick Panel Triggers enableQuickPanelTriggers: false, // Export Menu Options @@ -628,6 +640,22 @@ const settingsSlice = createSlice({ ...action.payload } }, + // ASR相关的action + setAsrEnabled: (state, action: PayloadAction) => { + state.asrEnabled = action.payload + }, + setAsrServiceType: (state, action: PayloadAction) => { + state.asrServiceType = action.payload + }, + setAsrApiKey: (state, action: PayloadAction) => { + state.asrApiKey = action.payload + }, + setAsrApiUrl: (state, action: PayloadAction) => { + state.asrApiUrl = action.payload + }, + setAsrModel: (state, action: PayloadAction) => { + state.asrModel = action.payload + }, // Quick Panel Triggers action setEnableQuickPanelTriggers: (state, action: PayloadAction) => { state.enableQuickPanelTriggers = action.payload @@ -736,7 +764,12 @@ export const { addTtsCustomModel, removeTtsCustomVoice, removeTtsCustomModel, - setTtsFilterOptions + setTtsFilterOptions, + setAsrEnabled, + setAsrServiceType, + setAsrApiKey, + setAsrApiUrl, + setAsrModel } = settingsSlice.actions export default settingsSlice.reducer diff --git a/src/renderer/src/types/electron.d.ts b/src/renderer/src/types/electron.d.ts index 7059da90aa..a916179e7a 100644 --- a/src/renderer/src/types/electron.d.ts +++ b/src/renderer/src/types/electron.d.ts @@ -4,6 +4,21 @@ interface ObsidianAPI { getFolders: (vaultName: string) => Promise> } +interface IpcRendererAPI { + invoke: (channel: string, ...args: any[]) => Promise + on: (channel: string, listener: (...args: any[]) => void) => void + once: (channel: string, listener: (...args: any[]) => void) => void + removeListener: (channel: string, listener: (...args: any[]) => void) => void + removeAllListeners: (channel: string) => void + send: (channel: string, ...args: any[]) => void + sendSync: (channel: string, ...args: any[]) => any +} + +interface ElectronAPI { + ipcRenderer: IpcRendererAPI +} + interface Window { obsidian: ObsidianAPI + electron: ElectronAPI }