ASR-TTS

2025-12-24 02:20:10 +08:00 · 2025-04-10 12:30:22 +08:00 · 2025-04-10 12:30:22 +08:00 · bbe08e2a6c
commit bbe08e2a6c
parent 4a2f1d5cf6
21 changed files with 2790 additions and 331 deletions
--- a/electron-builder.yml
+++ b/electron-builder.yml
@ -27,6 +27,11 @@ files:
  - '!node_modules/@tavily/core/node_modules/js-tiktoken'
  - '!node_modules/pdf-parse/lib/pdf.js/{v1.9.426,v1.10.88,v2.0.550}'
  - '!node_modules/mammoth/{mammoth.browser.js,mammoth.browser.min.js}'
+  # 包含 ASR 服务器文件
+  - src/renderer/src/assets/asr-server/**/*
+  # 包含打包后的ASR服务器可执行文件
+  - cherry-asr-server.exe
+  - index.html
 asarUnpack:
  - resources/**
  - '**/*.{node,dll,metal,exp,lib}'
--- a/electron.vite.config.ts
+++ b/electron.vite.config.ts
@ -76,6 +76,17 @@ export default defineConfig({
    },
    optimizeDeps: {
      exclude: []
+    },
+    build: {
+      rollupOptions: {
+        input: {
+          index: resolve('src/renderer/index.html'),
+        },
+      },
+      // 复制ASR服务器文件
+      assetsInlineLimit: 0,
+      // 确保复制assets目录下的所有文件
+      copyPublicDir: true,
    }
  }
 })
--- a/public/asr-server/index.html
+++ b/public/asr-server/index.html
@ -0,0 +1,198 @@
+<!DOCTYPE html>
+<html lang="zh">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Edge ASR (External)</title>
+    <style>
+        body {
+            font-family: sans-serif;
+            padding: 1em;
+        }
+
+        #status {
+            margin-top: 1em;
+            font-style: italic;
+            color: #555;
+        }
+
+        #result {
+            margin-top: 0.5em;
+            border: 1px solid #ccc;
+            padding: 0.5em;
+            min-height: 50px;
+            background: #f9f9f9;
+        }
+    </style>
+</head>
+
+<body>
+    <h1>Edge ASR 中继页面</h1>
+    <p>这个页面需要在 Edge 浏览器中保持打开，以便 Electron 应用使用其语音识别功能。</p>
+    <div id="status">正在连接到服务器...</div>
+    <div id="result"></div>
+
+    <script>
+        const statusDiv = document.getElementById('status');
+        const resultDiv = document.getElementById('result');
+        const ws = new WebSocket('ws://localhost:8080'); // Use the defined port
+        let recognition = null;
+        const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+
+        function updateStatus(message) {
+            console.log(`[Browser Page Status] ${message}`);
+            statusDiv.textContent = message;
+        }
+
+        ws.onopen = () => {
+            updateStatus('已连接到服务器，等待指令...');
+            ws.send(JSON.stringify({ type: 'identify', role: 'browser' }));
+        };
+
+        ws.onmessage = (event) => {
+            let data;
+            try {
+                data = JSON.parse(event.data);
+                console.log('[Browser Page] Received command:', data);
+            } catch (e) {
+                console.error('[Browser Page] Received non-JSON message:', event.data);
+                return;
+            }
+
+            if (data.type === 'start') {
+                startRecognition();
+            } else if (data.type === 'stop') {
+                stopRecognition();
+            } else {
+                console.warn('[Browser Page] Received unknown command type:', data.type);
+            }
+        };
+
+        ws.onerror = (error) => {
+            console.error('[Browser Page] WebSocket Error:', error);
+            updateStatus('WebSocket 连接错误！请检查服务器是否运行。');
+        };
+
+        ws.onclose = () => {
+            console.log('[Browser Page] WebSocket Connection Closed');
+            updateStatus('与服务器断开连接。请刷新页面或重启服务器。');
+            stopRecognition();
+        };
+
+        function setupRecognition() {
+            if (!SpeechRecognition) {
+                updateStatus('错误：此浏览器不支持 Web Speech API。');
+                return false;
+            }
+            if (recognition && recognition.recognizing) {
+                console.log('[Browser Page] Recognition already active.');
+                return true;
+            }
+
+            recognition = new SpeechRecognition();
+            recognition.lang = 'zh-CN';
+            recognition.continuous = true;
+            recognition.interimResults = true;
+
+            recognition.onstart = () => {
+                updateStatus("🎤 正在识别...");
+                console.log('[Browser Page] SpeechRecognition started.');
+            };
+
+            recognition.onresult = (event) => {
+                let interim_transcript = '';
+                let final_transcript = '';
+                for (let i = event.resultIndex; i < event.results.length; ++i) {
+                    if (event.results[i].isFinal) {
+                        final_transcript += event.results[i][0].transcript;
+                    } else {
+                        interim_transcript += event.results[i][0].transcript;
+                    }
+                }
+                const resultText = final_transcript || interim_transcript;
+                resultDiv.textContent = resultText;
+
+                if (ws.readyState === WebSocket.OPEN) {
+                    ws.send(JSON.stringify({ type: 'result', data: { text: resultText, isFinal: !!final_transcript } }));
+                }
+            };
+
+            recognition.onerror = (event) => {
+                console.error(`[Browser Page] SpeechRecognition Error - Type: ${event.error}, Message: ${event.message}`);
+                updateStatus(`识别错误: ${event.error}`);
+                if (ws.readyState === WebSocket.OPEN) {
+                    ws.send(JSON.stringify({ type: 'error', data: { error: event.error, message: event.message || `Recognition error: ${event.error}` } }));
+                }
+            };
+
+            recognition.onend = () => {
+                console.log('[Browser Page] SpeechRecognition ended.');
+                if (!statusDiv.textContent.includes('错误') && !statusDiv.textContent.includes('停止')) {
+                    updateStatus("识别已停止。等待指令...");
+                }
+                if (ws.readyState === WebSocket.OPEN) {
+                    ws.send(JSON.stringify({ type: 'status', message: 'stopped' }));
+                }
+                recognition = null;
+            };
+            return true;
+        }
+
+        function startRecognition() {
+            if (!SpeechRecognition) {
+                updateStatus('错误：浏览器不支持 Web Speech API。');
+                return;
+            }
+            if (recognition) {
+                console.log('[Browser Page] Recognition already exists, stopping first.');
+                stopRecognition();
+            }
+
+            if (!setupRecognition()) return;
+
+            console.log('[Browser Page] Attempting to start recognition...');
+            try {
+                navigator.mediaDevices.getUserMedia({ audio: true })
+                    .then(stream => {
+                        console.log('[Browser Page] Microphone access granted.');
+                        stream.getTracks().forEach(track => track.stop());
+                        if (recognition) {
+                            recognition.start();
+                        } else {
+                            updateStatus('错误：Recognition 实例丢失。');
+                            console.error('[Browser Page] Recognition instance lost before start.');
+                        }
+                    })
+                    .catch(err => {
+                        console.error('[Browser Page] Microphone access error:', err);
+                        updateStatus(`错误: 无法访问麦克风 (${err.name})`);
+                        recognition = null;
+                    });
+            } catch (e) {
+                console.error('[Browser Page] Error calling recognition.start():', e);
+                updateStatus(`启动识别时出错: ${e.message}`);
+                recognition = null;
+            }
+        }
+
+        function stopRecognition() {
+            if (recognition) {
+                console.log('[Browser Page] Stopping recognition...');
+                updateStatus("正在停止识别...");
+                try {
+                    recognition.stop();
+                } catch (e) {
+                    console.error('[Browser Page] Error calling recognition.stop():', e);
+                    recognition = null;
+                    updateStatus("停止时出错，已强制重置。");
+                }
+            } else {
+                console.log('[Browser Page] Recognition not active, nothing to stop.');
+                updateStatus("识别未运行。");
+            }
+        }
+    </script>
+</body>
+
+</html>
--- a/public/asr-server/server.js
+++ b/public/asr-server/server.js
@ -0,0 +1,146 @@
+const http = require('http')
+const WebSocket = require('ws')
+const express = require('express')
+const path = require('path') // Need path module
+
+const app = express()
+const port = 8080 // Define the port
+
+// 提供网页给 Edge 浏览器
+app.get('/', (req, res) => {
+  // Use path.join for cross-platform compatibility
+  res.sendFile(path.join(__dirname, 'index.html'))
+})
+
+const server = http.createServer(app)
+const wss = new WebSocket.Server({ server })
+
+let browserConnection = null
+let electronConnection = null
+
+wss.on('connection', (ws) => {
+  console.log('[Server] WebSocket client connected') // Add log
+
+  ws.on('message', (message) => {
+    let data
+    try {
+      // Ensure message is treated as string before parsing
+      data = JSON.parse(message.toString())
+      console.log('[Server] Received message:', data) // Log parsed data
+    } catch (e) {
+      console.error('[Server] Failed to parse message or message is not JSON:', message.toString(), e)
+      return // Ignore non-JSON messages
+    }
+
+    // 识别客户端类型
+    if (data.type === 'identify') {
+      if (data.role === 'browser') {
+        browserConnection = ws
+        console.log('[Server] Browser identified and connected')
+        // Notify Electron that the browser is ready
+        if (electronConnection && electronConnection.readyState === WebSocket.OPEN) {
+            electronConnection.send(JSON.stringify({ type: 'status', message: 'browser_ready' }));
+            console.log('[Server] Sent browser_ready status to Electron');
+        }
+        // Notify Electron if it's already connected
+        if (electronConnection) {
+          electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connected' }))
+        }
+        ws.on('close', () => {
+          console.log('[Server] Browser disconnected')
+          browserConnection = null
+          // Notify Electron
+          if (electronConnection) {
+            electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser disconnected' }))
+          }
+        })
+        ws.on('error', (error) => {
+          console.error('[Server] Browser WebSocket error:', error)
+          browserConnection = null // Assume disconnected on error
+          if (electronConnection) {
+            electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connection error' }))
+          }
+        })
+      } else if (data.role === 'electron') {
+        electronConnection = ws
+        console.log('[Server] Electron identified and connected')
+        // If browser is already connected when Electron connects, notify Electron immediately
+        if (browserConnection && browserConnection.readyState === WebSocket.OPEN) {
+            electronConnection.send(JSON.stringify({ type: 'status', message: 'browser_ready' }));
+            console.log('[Server] Sent initial browser_ready status to Electron');
+        }
+        ws.on('close', () => {
+          console.log('[Server] Electron disconnected')
+          electronConnection = null
+          // Maybe send stop to browser if electron disconnects?
+          // if (browserConnection) browserConnection.send(JSON.stringify({ type: 'stop' }));
+        })
+        ws.on('error', (error) => {
+          console.error('[Server] Electron WebSocket error:', error)
+          electronConnection = null // Assume disconnected on error
+        })
+      }
+    }
+    // Electron 控制开始/停止
+    else if (data.type === 'start' && ws === electronConnection) {
+      if (browserConnection && browserConnection.readyState === WebSocket.OPEN) {
+        console.log('[Server] Relaying START command to browser')
+        browserConnection.send(JSON.stringify({ type: 'start' }))
+      } else {
+        console.log('[Server] Cannot relay START: Browser not connected')
+        // Optionally notify Electron back
+        electronConnection.send(JSON.stringify({ type: 'error', message: 'Browser not connected for ASR' }))
+      }
+    } else if (data.type === 'stop' && ws === electronConnection) {
+      if (browserConnection && browserConnection.readyState === WebSocket.OPEN) {
+        console.log('[Server] Relaying STOP command to browser')
+        browserConnection.send(JSON.stringify({ type: 'stop' }))
+      } else {
+        console.log('[Server] Cannot relay STOP: Browser not connected')
+      }
+    }
+    // 浏览器发送识别结果
+    else if (data.type === 'result' && ws === browserConnection) {
+      if (electronConnection && electronConnection.readyState === WebSocket.OPEN) {
+        // console.log('[Server] Relaying RESULT to Electron:', data.data); // Log less frequently if needed
+        electronConnection.send(JSON.stringify({ type: 'result', data: data.data }))
+      } else {
+        // console.log('[Server] Cannot relay RESULT: Electron not connected');
+      }
+    }
+    // 浏览器发送状态更新 (例如 'stopped')
+    else if (data.type === 'status' && ws === browserConnection) {
+      if (electronConnection && electronConnection.readyState === WebSocket.OPEN) {
+        console.log('[Server] Relaying STATUS to Electron:', data.message) // Log status being relayed
+        electronConnection.send(JSON.stringify({ type: 'status', message: data.message }))
+      } else {
+        console.log('[Server] Cannot relay STATUS: Electron not connected')
+      }
+    } else {
+      console.log('[Server] Received unknown message type or from unknown source:', data)
+    }
+  })
+
+  ws.on('error', (error) => {
+    // Generic error handling for connection before identification
+    console.error('[Server] Initial WebSocket connection error:', error)
+    // Attempt to clean up based on which connection it might be (if identified)
+    if (ws === browserConnection) {
+      browserConnection = null
+      if (electronConnection)
+        electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connection error' }))
+    } else if (ws === electronConnection) {
+      electronConnection = null
+    }
+  })
+})
+
+server.listen(port, () => {
+  console.log(`[Server] Server running at http://localhost:${port}`)
+})
+
+// Handle server errors
+server.on('error', (error) => {
+  console.error(`[Server] Failed to start server:`, error)
+  process.exit(1) // Exit if server fails to start
+})
--- a/src/main/ipc.ts
+++ b/src/main/ipc.ts
@ -1,4 +1,6 @@
 import fs from 'node:fs'
+import { spawn, ChildProcess } from 'node:child_process'
+import path from 'node:path'

 import { isMac, isWin } from '@main/constant'
 import { getBinaryPath, isBinaryExists, runInstallScript } from '@main/utils/process'
@ -29,6 +31,9 @@ import { decrypt, encrypt } from './utils/aes'
 import { getConfigDir, getFilesDir } from './utils/file'
 import { compress, decompress } from './utils/zip'

+// 存储ASR服务器进程
+let asrServerProcess: ChildProcess | null = null
+
 const fileManager = new FileStorage()
 const backupManager = new BackupManager()
 const exportService = new ExportService(fileManager)
@ -291,4 +296,103 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) {
  ipcMain.handle(IpcChannel.Nutstore_GetDirectoryContents, (_, token: string, path: string) =>
    NutstoreService.getDirectoryContents(token, path)
  )
+
+  // 启动ASR服务器
+  ipcMain.handle('start-asr-server', async () => {
+    try {
+      if (asrServerProcess) {
+        return { success: true, pid: asrServerProcess.pid }
+      }
+
+      // 获取服务器文件路径
+      console.log('App path:', app.getAppPath())
+      // 在开发环境和生产环境中使用不同的路径
+      let serverPath = ''
+      let isExeFile = false
+
+      // 首先检查是否有打包后的exe文件
+      const exePath = path.join(app.getAppPath(), 'resources', 'cherry-asr-server.exe')
+      if (fs.existsSync(exePath)) {
+        serverPath = exePath
+        isExeFile = true
+        console.log('检测到打包后的exe文件:', serverPath)
+      } else if (process.env.NODE_ENV === 'development') {
+        // 开发环境
+        serverPath = path.join(app.getAppPath(), 'src', 'renderer', 'src', 'assets', 'asr-server', 'server.js')
+      } else {
+        // 生产环境
+        serverPath = path.join(app.getAppPath(), 'public', 'asr-server', 'server.js')
+      }
+      console.log('ASR服务器路径:', serverPath)
+
+      // 检查文件是否存在
+      if (!fs.existsSync(serverPath)) {
+        return { success: false, error: '服务器文件不存在' }
+      }
+
+      // 启动服务器进程
+      if (isExeFile) {
+        // 如果是exe文件，直接启动
+        asrServerProcess = spawn(serverPath, [], {
+          stdio: 'pipe',
+          detached: false
+        })
+      } else {
+        // 如果是js文件，使用node启动
+        asrServerProcess = spawn('node', [serverPath], {
+          stdio: 'pipe',
+          detached: false
+        })
+      }
+
+      // 处理服务器输出
+      asrServerProcess.stdout?.on('data', (data) => {
+        console.log(`[ASR Server] ${data.toString()}`)
+      })
+
+      asrServerProcess.stderr?.on('data', (data) => {
+        console.error(`[ASR Server Error] ${data.toString()}`)
+      })
+
+      // 处理服务器退出
+      asrServerProcess.on('close', (code) => {
+        console.log(`[ASR Server] 进程退出，退出码: ${code}`)
+        asrServerProcess = null
+      })
+
+      // 等待一段时间确保服务器启动
+      await new Promise(resolve => setTimeout(resolve, 1000))
+
+      return { success: true, pid: asrServerProcess.pid }
+    } catch (error) {
+      console.error('启动ASR服务器失败:', error)
+      return { success: false, error: (error as Error).message }
+    }
+  })
+
+  // 停止ASR服务器
+  ipcMain.handle('stop-asr-server', async (_event, pid) => {
+    try {
+      if (!asrServerProcess) {
+        return { success: true }
+      }
+
+      // 检查PID是否匹配
+      if (asrServerProcess.pid !== pid) {
+        console.warn(`请求停止的PID (${pid}) 与当前运行的ASR服务器PID (${asrServerProcess.pid}) 不匹配`)
+      }
+
+      // 杀死进程
+      asrServerProcess.kill()
+
+      // 等待一段时间确保进程已经退出
+      await new Promise(resolve => setTimeout(resolve, 500))
+
+      asrServerProcess = null
+      return { success: true }
+    } catch (error) {
+      console.error('停止ASR服务器失败:', error)
+      return { success: false, error: (error as Error).message }
+    }
+  })
 }
--- a/src/renderer/src/assets/asr-server/index.html
+++ b/src/renderer/src/assets/asr-server/index.html
@ -0,0 +1,368 @@
+<!DOCTYPE html>
+<html lang="zh">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Edge ASR (External)</title>
+    <style>
+        body {
+            font-family: sans-serif;
+            padding: 1em;
+        }
+
+        #status {
+            margin-top: 1em;
+            font-style: italic;
+            color: #555;
+        }
+
+        #result {
+            margin-top: 0.5em;
+            border: 1px solid #ccc;
+            padding: 0.5em;
+            min-height: 50px;
+            background: #f9f9f9;
+        }
+    </style>
+</head>
+
+<body>
+    <h1>Edge ASR 中继页面</h1>
+    <p>这个页面需要在 Edge 浏览器中保持打开，以便 Electron 应用使用其语音识别功能。</p>
+    <div id="status">正在连接到服务器...</div>
+    <div id="result"></div>
+
+    <script>
+        const statusDiv = document.getElementById('status');
+        const resultDiv = document.getElementById('result');
+        const ws = new WebSocket('ws://localhost:8080'); // Use the defined port
+        let recognition = null;
+        const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+
+        function updateStatus(message) {
+            console.log(`[Browser Page Status] ${message}`);
+            statusDiv.textContent = message;
+        }
+
+        ws.onopen = () => {
+            updateStatus('已连接到服务器，等待指令...');
+            ws.send(JSON.stringify({ type: 'identify', role: 'browser' }));
+        };
+
+        ws.onmessage = (event) => {
+            let data;
+            try {
+                data = JSON.parse(event.data);
+                console.log('[Browser Page] Received command:', data);
+            } catch (e) {
+                console.error('[Browser Page] Received non-JSON message:', event.data);
+                return;
+            }
+
+            if (data.type === 'start') {
+                startRecognition();
+            } else if (data.type === 'stop') {
+                stopRecognition();
+            } else {
+                console.warn('[Browser Page] Received unknown command type:', data.type);
+            }
+        };
+
+        ws.onerror = (error) => {
+            console.error('[Browser Page] WebSocket Error:', error);
+            updateStatus('WebSocket 连接错误！请检查服务器是否运行。');
+        };
+
+        ws.onclose = () => {
+            console.log('[Browser Page] WebSocket Connection Closed');
+            updateStatus('与服务器断开连接。请刷新页面或重启服务器。');
+            stopRecognition();
+        };
+
+        function setupRecognition() {
+            if (!SpeechRecognition) {
+                updateStatus('错误：此浏览器不支持 Web Speech API。');
+                return false;
+            }
+            if (recognition && recognition.recognizing) {
+                console.log('[Browser Page] Recognition already active.');
+                return true;
+            }
+
+            recognition = new SpeechRecognition();
+            recognition.lang = 'zh-CN';
+            recognition.continuous = true;
+            recognition.interimResults = true;
+            // 增加以下设置提高语音识别的可靠性
+            recognition.maxAlternatives = 3; // 返回多个可能的识别结果
+            // 设置较短的语音识别时间，使用户能更快地看到结果
+            // 注意：这个属性不是标准的，可能不是所有浏览器都支持
+            try {
+                // @ts-ignore
+                recognition.audioStart = 0.1; // 尝试设置较低的起始音量阈值
+            } catch (e) {
+                console.log('[Browser Page] audioStart property not supported');
+            }
+
+            recognition.onstart = () => {
+                updateStatus("🎤 正在识别...");
+                console.log('[Browser Page] SpeechRecognition started.');
+            };
+
+            recognition.onresult = (event) => {
+                console.log('[Browser Page] Recognition result event:', event);
+
+                let interim_transcript = '';
+                let final_transcript = '';
+
+                // 输出识别结果的详细信息便于调试
+                for (let i = event.resultIndex; i < event.results.length; ++i) {
+                    const confidence = event.results[i][0].confidence;
+                    console.log(`[Browser Page] Result ${i}: ${event.results[i][0].transcript} (Confidence: ${confidence.toFixed(2)})`);
+
+                    if (event.results[i].isFinal) {
+                        final_transcript += event.results[i][0].transcript;
+                    } else {
+                        interim_transcript += event.results[i][0].transcript;
+                    }
+                }
+
+                const resultText = final_transcript || interim_transcript;
+                resultDiv.textContent = resultText;
+
+                // 更新状态显示
+                if (resultText) {
+                    updateStatus(`🎤 正在识别... (已捕捉到语音)`);
+                }
+
+                if (ws.readyState === WebSocket.OPEN) {
+                    console.log(`[Browser Page] Sending ${final_transcript ? 'final' : 'interim'} result to server:`, resultText);
+                    ws.send(JSON.stringify({ type: 'result', data: { text: resultText, isFinal: !!final_transcript } }));
+                }
+            };
+
+            recognition.onerror = (event) => {
+                console.error(`[Browser Page] SpeechRecognition Error - Type: ${event.error}, Message: ${event.message}`);
+
+                // 根据错误类型提供更友好的错误提示
+                let errorMessage = '';
+                switch (event.error) {
+                    case 'no-speech':
+                        errorMessage = '未检测到语音，请确保麦克风工作正常并尝试说话。';
+                        // 尝试重新启动语音识别
+                        setTimeout(() => {
+                            if (recognition) {
+                                try {
+                                    recognition.start();
+                                    console.log('[Browser Page] Restarting recognition after no-speech error');
+                                } catch (e) {
+                                    console.error('[Browser Page] Failed to restart recognition:', e);
+                                }
+                            }
+                        }, 1000);
+                        break;
+                    case 'audio-capture':
+                        errorMessage = '无法捕获音频，请确保麦克风已连接并已授权。';
+                        break;
+                    case 'not-allowed':
+                        errorMessage = '浏览器不允许使用麦克风，请检查权限设置。';
+                        break;
+                    case 'network':
+                        errorMessage = '网络错误导致语音识别失败。';
+                        break;
+                    case 'aborted':
+                        errorMessage = '语音识别被用户或系统中止。';
+                        break;
+                    default:
+                        errorMessage = `识别错误: ${event.error}`;
+                }
+
+                updateStatus(`错误: ${errorMessage}`);
+
+                if (ws.readyState === WebSocket.OPEN) {
+                    ws.send(JSON.stringify({
+                        type: 'error',
+                        data: {
+                            error: event.error,
+                            message: errorMessage || event.message || `Recognition error: ${event.error}`
+                        }
+                    }));
+                }
+            };
+
+            recognition.onend = () => {
+                console.log('[Browser Page] SpeechRecognition ended.');
+
+                // 检查是否是由于错误或用户手动停止导致的结束
+                const isErrorOrStopped = statusDiv.textContent.includes('错误') || statusDiv.textContent.includes('停止');
+
+                if (!isErrorOrStopped) {
+                    // 如果不是由于错误或手动停止，则自动重新启动语音识别
+                    updateStatus("识别暂停，正在重新启动...");
+
+                    // 保存当前的recognition对象
+                    const currentRecognition = recognition;
+
+                    // 尝试重新启动语音识别
+                    setTimeout(() => {
+                        try {
+                            if (currentRecognition && currentRecognition === recognition) {
+                                currentRecognition.start();
+                                console.log('[Browser Page] Automatically restarting recognition');
+                            } else {
+                                // 如果recognition对象已经变化，重新创建一个
+                                setupRecognition();
+                                if (recognition) {
+                                    recognition.start();
+                                    console.log('[Browser Page] Created new recognition instance and started');
+                                }
+                            }
+                        } catch (e) {
+                            console.error('[Browser Page] Failed to restart recognition:', e);
+                            updateStatus("识别已停止。等待指令...");
+                        }
+                    }, 300);
+                } else {
+                    updateStatus("识别已停止。等待指令...");
+
+                    if (ws.readyState === WebSocket.OPEN) {
+                        ws.send(JSON.stringify({ type: 'status', message: 'stopped' }));
+                    }
+
+                    // 只有在手动停止或错误时才重置recognition对象
+                    recognition = null;
+                }
+            };
+            return true;
+        }
+
+        function startRecognition() {
+            if (!SpeechRecognition) {
+                updateStatus('错误：浏览器不支持 Web Speech API。');
+                return;
+            }
+
+            // 显示正在准备的状态
+            updateStatus('正在准备麦克风...');
+
+            if (recognition) {
+                console.log('[Browser Page] Recognition already exists, stopping first.');
+                stopRecognition();
+            }
+
+            if (!setupRecognition()) return;
+
+            console.log('[Browser Page] Attempting to start recognition...');
+            try {
+                // 设置更长的超时时间，确保有足够的时间获取麦克风权限
+                const micPermissionTimeout = setTimeout(() => {
+                    updateStatus('获取麦克风权限超时，请刷新页面重试。');
+                }, 10000); // 10秒超时
+
+                navigator.mediaDevices.getUserMedia({
+                    audio: {
+                        echoCancellation: true,
+                        noiseSuppression: true,
+                        autoGainControl: true
+                    }
+                })
+                    .then(stream => {
+                        clearTimeout(micPermissionTimeout);
+                        console.log('[Browser Page] Microphone access granted.');
+
+                        // 检查麦克风音量级别
+                        const audioContext = new AudioContext();
+                        const analyser = audioContext.createAnalyser();
+                        const microphone = audioContext.createMediaStreamSource(stream);
+                        const javascriptNode = audioContext.createScriptProcessor(2048, 1, 1);
+
+                        analyser.smoothingTimeConstant = 0.8;
+                        analyser.fftSize = 1024;
+
+                        microphone.connect(analyser);
+                        analyser.connect(javascriptNode);
+                        javascriptNode.connect(audioContext.destination);
+
+                        javascriptNode.onaudioprocess = function () {
+                            const array = new Uint8Array(analyser.frequencyBinCount);
+                            analyser.getByteFrequencyData(array);
+                            let values = 0;
+
+                            const length = array.length;
+                            for (let i = 0; i < length; i++) {
+                                values += (array[i]);
+                            }
+
+                            const average = values / length;
+                            console.log('[Browser Page] Microphone volume level:', average);
+
+                            // 如果音量太低，显示提示
+                            if (average < 5) {
+                                updateStatus('麦克风音量很低，请说话或检查麦克风设置。');
+                            } else {
+                                updateStatus('🎤 正在识别...');
+                            }
+
+                            // 只检查一次就断开连接
+                            microphone.disconnect();
+                            analyser.disconnect();
+                            javascriptNode.disconnect();
+                        };
+
+                        // 释放测试用的音频流
+                        setTimeout(() => {
+                            stream.getTracks().forEach(track => track.stop());
+                            audioContext.close();
+                        }, 1000);
+
+                        // 启动语音识别
+                        if (recognition) {
+                            recognition.start();
+                            updateStatus('🎤 正在识别...');
+                        } else {
+                            updateStatus('错误：Recognition 实例丢失。');
+                            console.error('[Browser Page] Recognition instance lost before start.');
+                        }
+                    })
+                    .catch(err => {
+                        clearTimeout(micPermissionTimeout);
+                        console.error('[Browser Page] Microphone access error:', err);
+
+                        let errorMsg = `无法访问麦克风 (${err.name})`;
+                        if (err.name === 'NotAllowedError') {
+                            errorMsg = '麦克风访问被拒绝。请在浏览器设置中允许麦克风访问权限。';
+                        } else if (err.name === 'NotFoundError') {
+                            errorMsg = '未找到麦克风设备。请确保麦克风已连接。';
+                        }
+
+                        updateStatus(`错误: ${errorMsg}`);
+                        recognition = null;
+                    });
+            } catch (e) {
+                console.error('[Browser Page] Error calling recognition.start():', e);
+                updateStatus(`启动识别时出错: ${e.message}`);
+                recognition = null;
+            }
+        }
+
+        function stopRecognition() {
+            if (recognition) {
+                console.log('[Browser Page] Stopping recognition...');
+                updateStatus("正在停止识别...");
+                try {
+                    recognition.stop();
+                } catch (e) {
+                    console.error('[Browser Page] Error calling recognition.stop():', e);
+                    recognition = null;
+                    updateStatus("停止时出错，已强制重置。");
+                }
+            } else {
+                console.log('[Browser Page] Recognition not active, nothing to stop.');
+                updateStatus("识别未运行。");
+            }
+        }
+    </script>
+</body>
+
+</html>
--- a/src/renderer/src/assets/asr-server/package.json
+++ b/src/renderer/src/assets/asr-server/package.json
@ -0,0 +1,27 @@
+{
+  "name": "cherry-asr-server",
+  "version": "1.0.0",
+  "description": "Cherry Studio ASR Server",
+  "main": "server.js",
+  "bin": "server.js",
+  "scripts": {
+    "start": "node server.js",
+    "build": "pkg ."
+  },
+  "pkg": {
+    "targets": [
+      "node16-win-x64"
+    ],
+    "outputPath": "dist",
+    "assets": [
+      "index.html"
+    ]
+  },
+  "dependencies": {
+    "express": "^4.18.2",
+    "ws": "^8.13.0"
+  },
+  "devDependencies": {
+    "pkg": "^5.8.1"
+  }
+}
--- a/src/renderer/src/assets/asr-server/server.js
+++ b/src/renderer/src/assets/asr-server/server.js
@ -0,0 +1,172 @@
+const http = require('http')
+const WebSocket = require('ws')
+const express = require('express')
+const path = require('path') // Need path module
+
+const app = express()
+const port = 8080 // Define the port
+
+// 获取index.html文件的路径
+function getIndexHtmlPath() {
+  // 在开发环境中，直接使用相对路径
+  const devPath = path.join(__dirname, 'index.html');
+
+  // 在pkg打包后，文件会被包含在可执行文件中
+  // 使用process.pkg检测是否是打包环境
+  if (process.pkg) {
+    // 在打包环境中，使用绝对路径
+    return path.join(path.dirname(process.execPath), 'index.html');
+  }
+
+  // 如果文件存在，返回开发路径
+  try {
+    if (require('fs').existsSync(devPath)) {
+      return devPath;
+    }
+  } catch (e) {
+    console.error('Error checking file existence:', e);
+  }
+
+  // 如果都不存在，尝试使用当前目录
+  return path.join(process.cwd(), 'index.html');
+}
+
+// 提供网页给 Edge 浏览器
+app.get('/', (req, res) => {
+  const indexPath = getIndexHtmlPath();
+  console.log(`Serving index.html from: ${indexPath}`);
+  res.sendFile(indexPath);
+})
+
+const server = http.createServer(app)
+const wss = new WebSocket.Server({ server })
+
+let browserConnection = null
+let electronConnection = null
+
+wss.on('connection', (ws) => {
+  console.log('[Server] WebSocket client connected') // Add log
+
+  ws.on('message', (message) => {
+    let data
+    try {
+      // Ensure message is treated as string before parsing
+      data = JSON.parse(message.toString())
+      console.log('[Server] Received message:', data) // Log parsed data
+    } catch (e) {
+      console.error('[Server] Failed to parse message or message is not JSON:', message.toString(), e)
+      return // Ignore non-JSON messages
+    }
+
+    // 识别客户端类型
+    if (data.type === 'identify') {
+      if (data.role === 'browser') {
+        browserConnection = ws
+        console.log('[Server] Browser identified and connected')
+        // Notify Electron that the browser is ready
+        if (electronConnection && electronConnection.readyState === WebSocket.OPEN) {
+            electronConnection.send(JSON.stringify({ type: 'status', message: 'browser_ready' }));
+            console.log('[Server] Sent browser_ready status to Electron');
+        }
+        // Notify Electron if it's already connected
+        if (electronConnection) {
+          electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connected' }))
+        }
+        ws.on('close', () => {
+          console.log('[Server] Browser disconnected')
+          browserConnection = null
+          // Notify Electron
+          if (electronConnection) {
+            electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser disconnected' }))
+          }
+        })
+        ws.on('error', (error) => {
+          console.error('[Server] Browser WebSocket error:', error)
+          browserConnection = null // Assume disconnected on error
+          if (electronConnection) {
+            electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connection error' }))
+          }
+        })
+      } else if (data.role === 'electron') {
+        electronConnection = ws
+        console.log('[Server] Electron identified and connected')
+        // If browser is already connected when Electron connects, notify Electron immediately
+        if (browserConnection && browserConnection.readyState === WebSocket.OPEN) {
+            electronConnection.send(JSON.stringify({ type: 'status', message: 'browser_ready' }));
+            console.log('[Server] Sent initial browser_ready status to Electron');
+        }
+        ws.on('close', () => {
+          console.log('[Server] Electron disconnected')
+          electronConnection = null
+          // Maybe send stop to browser if electron disconnects?
+          // if (browserConnection) browserConnection.send(JSON.stringify({ type: 'stop' }));
+        })
+        ws.on('error', (error) => {
+          console.error('[Server] Electron WebSocket error:', error)
+          electronConnection = null // Assume disconnected on error
+        })
+      }
+    }
+    // Electron 控制开始/停止
+    else if (data.type === 'start' && ws === electronConnection) {
+      if (browserConnection && browserConnection.readyState === WebSocket.OPEN) {
+        console.log('[Server] Relaying START command to browser')
+        browserConnection.send(JSON.stringify({ type: 'start' }))
+      } else {
+        console.log('[Server] Cannot relay START: Browser not connected')
+        // Optionally notify Electron back
+        electronConnection.send(JSON.stringify({ type: 'error', message: 'Browser not connected for ASR' }))
+      }
+    } else if (data.type === 'stop' && ws === electronConnection) {
+      if (browserConnection && browserConnection.readyState === WebSocket.OPEN) {
+        console.log('[Server] Relaying STOP command to browser')
+        browserConnection.send(JSON.stringify({ type: 'stop' }))
+      } else {
+        console.log('[Server] Cannot relay STOP: Browser not connected')
+      }
+    }
+    // 浏览器发送识别结果
+    else if (data.type === 'result' && ws === browserConnection) {
+      if (electronConnection && electronConnection.readyState === WebSocket.OPEN) {
+        // console.log('[Server] Relaying RESULT to Electron:', data.data); // Log less frequently if needed
+        electronConnection.send(JSON.stringify({ type: 'result', data: data.data }))
+      } else {
+        // console.log('[Server] Cannot relay RESULT: Electron not connected');
+      }
+    }
+    // 浏览器发送状态更新 (例如 'stopped')
+    else if (data.type === 'status' && ws === browserConnection) {
+      if (electronConnection && electronConnection.readyState === WebSocket.OPEN) {
+        console.log('[Server] Relaying STATUS to Electron:', data.message) // Log status being relayed
+        electronConnection.send(JSON.stringify({ type: 'status', message: data.message }))
+      } else {
+        console.log('[Server] Cannot relay STATUS: Electron not connected')
+      }
+    } else {
+      console.log('[Server] Received unknown message type or from unknown source:', data)
+    }
+  })
+
+  ws.on('error', (error) => {
+    // Generic error handling for connection before identification
+    console.error('[Server] Initial WebSocket connection error:', error)
+    // Attempt to clean up based on which connection it might be (if identified)
+    if (ws === browserConnection) {
+      browserConnection = null
+      if (electronConnection)
+        electronConnection.send(JSON.stringify({ type: 'status', message: 'Browser connection error' }))
+    } else if (ws === electronConnection) {
+      electronConnection = null
+    }
+  })
+})
+
+server.listen(port, () => {
+  console.log(`[Server] Server running at http://localhost:${port}`)
+})
+
+// Handle server errors
+server.on('error', (error) => {
+  console.error(`[Server] Failed to start server:`, error)
+  process.exit(1) // Exit if server fails to start
+})
--- a/src/renderer/src/components/ASRButton.tsx
+++ b/src/renderer/src/components/ASRButton.tsx
@ -0,0 +1,226 @@
+import { AudioOutlined, LoadingOutlined } from '@ant-design/icons'
+import { useSettings } from '@renderer/hooks/useSettings'
+import ASRService from '@renderer/services/ASRService'
+import { Button, Tooltip } from 'antd'
+import { FC, useCallback, useEffect, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import styled from 'styled-components'
+
+interface Props {
+  onTranscribed: (text: string) => void
+  disabled?: boolean
+  style?: React.CSSProperties
+}
+
+const ASRButton: FC<Props> = ({ onTranscribed, disabled = false, style }) => {
+  const { t } = useTranslation()
+  const { asrEnabled } = useSettings()
+  const [isRecording, setIsRecording] = useState(false)
+  const [isProcessing, setIsProcessing] = useState(false)
+  const [countdown, setCountdown] = useState(0)
+  const [isCountingDown, setIsCountingDown] = useState(false)
+
+  const handleASR = useCallback(async () => {
+    if (!asrEnabled) {
+      window.message.error({ content: t('settings.asr.error.not_enabled'), key: 'asr-error' })
+      return
+    }
+
+    if (isRecording) {
+      // 停止录音并处理
+      setIsRecording(false)
+      setIsProcessing(true)
+      try {
+        // 添加事件监听器，监听服务器发送的stopped消息
+        const originalCallback = ASRService.resultCallback
+        const stopCallback = (text: string) => {
+          // 如果是空字符串，只重置状态，不调用原始回调
+          if (text === '') {
+            setIsProcessing(false)
+            return
+          }
+
+          // 否则调用原始回调并重置状态
+          if (originalCallback) originalCallback(text)
+          setIsProcessing(false)
+        }
+
+        await ASRService.stopRecording(stopCallback)
+      } catch (error) {
+        console.error('ASR error:', error)
+        setIsProcessing(false)
+      }
+    } else {
+      // 开始录音
+      // 显示3秒倒计时，同时立即开始录音
+      setIsCountingDown(true)
+      setCountdown(3)
+      setIsRecording(true)
+
+      // 立即发送开始信号
+      try {
+        await ASRService.startRecording(onTranscribed)
+      } catch (error) {
+        console.error('Failed to start recording:', error)
+        setIsRecording(false)
+        setIsCountingDown(false)
+        return
+      }
+
+      // 倒计时结束后只隐藏倒计时显示
+      setTimeout(() => {
+        setIsCountingDown(false)
+      }, 3000) // 3秒倒计时
+    }
+  }, [asrEnabled, isRecording, onTranscribed, t])
+
+  const handleCancel = useCallback(() => {
+    if (isCountingDown) {
+      // 如果在倒计时中，取消倒计时和录音
+      setIsCountingDown(false)
+      setCountdown(0)
+      // 同时取消录音，因为录音已经开始
+      ASRService.cancelRecording()
+      setIsRecording(false)
+    } else if (isRecording) {
+      // 如果已经在录音，取消录音
+      ASRService.cancelRecording()
+      setIsRecording(false)
+    }
+  }, [isRecording, isCountingDown])
+
+  // 倒计时效果
+  useEffect(() => {
+    if (isCountingDown && countdown > 0) {
+      const timer = setTimeout(() => {
+        setCountdown(countdown - 1)
+      }, 1000)
+      return () => clearTimeout(timer)
+    }
+    return undefined // 添加返回值以解决TS7030错误
+  }, [countdown, isCountingDown])
+
+  if (!asrEnabled) {
+    return null
+  }
+
+  return (
+    <Tooltip title={isRecording ? t('settings.asr.stop') : isCountingDown ? `${t('settings.asr.preparing')} (${countdown})` : t('settings.asr.start')}>
+      <ButtonWrapper>
+        <StyledButton
+          type={isRecording || isCountingDown ? 'primary' : 'default'}
+          icon={isProcessing ? <LoadingOutlined /> : isCountingDown ? null : <AudioOutlined />}
+          onClick={handleASR}
+          onDoubleClick={handleCancel}
+          disabled={disabled || isProcessing || (isCountingDown && countdown > 0)}
+          style={style}
+          className={isCountingDown ? 'counting-down' : ''}
+        >
+          {isCountingDown && (
+            <CountdownNumber>{countdown}</CountdownNumber>
+          )}
+        </StyledButton>
+        {isCountingDown && (
+          <CountdownIndicator>
+            {t('settings.asr.preparing')} ({countdown})
+          </CountdownIndicator>
+        )}
+      </ButtonWrapper>
+    </Tooltip>
+  )
+}
+
+const ButtonWrapper = styled.div`
+  position: relative;
+  display: inline-block;
+`
+
+const CountdownIndicator = styled.div`
+  position: absolute;
+  top: -25px;
+  left: 50%;
+  transform: translateX(-50%);
+  background-color: var(--color-primary);
+  color: white;
+  padding: 2px 8px;
+  border-radius: 10px;
+  font-size: 12px;
+  white-space: nowrap;
+  box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
+  animation: pulse 1s infinite;
+  z-index: 10;
+
+  @keyframes pulse {
+    0% { opacity: 0.7; }
+    50% { opacity: 1; }
+    100% { opacity: 0.7; }
+  }
+
+  &:after {
+    content: '';
+    position: absolute;
+    bottom: -5px;
+    left: 50%;
+    transform: translateX(-50%);
+    width: 0;
+    height: 0;
+    border-left: 5px solid transparent;
+    border-right: 5px solid transparent;
+    border-top: 5px solid var(--color-primary);
+  }
+`
+
+const CountdownNumber = styled.span`
+  font-size: 18px;
+  font-weight: bold;
+  animation: zoom 1s infinite;
+
+  @keyframes zoom {
+    0% { transform: scale(0.8); }
+    50% { transform: scale(1.2); }
+    100% { transform: scale(0.8); }
+  }
+`
+
+const StyledButton = styled(Button)`
+  min-width: 30px;
+  height: 30px;
+  font-size: 16px;
+  border-radius: 50%;
+  transition: all 0.3s ease;
+  color: var(--color-icon);
+  display: flex;
+  flex-direction: row;
+  justify-content: center;
+  align-items: center;
+  padding: 0;
+  &.anticon,
+  &.iconfont {
+    transition: all 0.3s ease;
+    color: var(--color-icon);
+  }
+  &:hover {
+    background-color: var(--color-background-soft);
+    .anticon,
+    .iconfont {
+      color: var(--color-text-1);
+    }
+  }
+  &.active {
+    background-color: var(--color-primary) !important;
+    .anticon,
+    .iconfont {
+      color: var(--color-white-soft);
+    }
+    &:hover {
+      background-color: var(--color-primary);
+    }
+  }
+  &.counting-down {
+    font-weight: bold;
+    background-color: var(--color-primary);
+    color: var(--color-white-soft);
+  }
+`
+
+export default ASRButton
--- a/src/renderer/src/i18n/locales/en-us.json
+++ b/src/renderer/src/i18n/locales/en-us.json
@ -1377,6 +1377,52 @@
        "test": "Test Speech",
        "help": "Text-to-speech functionality supports converting text to natural-sounding speech.",
        "learn_more": "Learn more"
+      },
+      "asr": {
+        "title": "Speech Recognition",
+        "tab_title": "Speech Recognition",
+        "enable": "Enable Speech Recognition",
+        "enable.help": "Enable to convert speech to text",
+        "service_type": "Service Type",
+        "service_type.browser": "Browser",
+        "service_type.local": "Local Server",
+        "api_key": "API Key",
+        "api_key.placeholder": "Enter OpenAI API key",
+        "api_url": "API URL",
+        "api_url.placeholder": "Example: https://api.openai.com/v1/audio/transcriptions",
+        "model": "Model",
+        "browser.info": "Use the browser's built-in speech recognition feature, no additional setup required",
+        "local.info": "Use local server and browser for speech recognition, need to start the server and open the browser page first",
+        "local.browser_tip": "Please open this page in Edge browser and keep the browser window open",
+        "local.test_connection": "Test Connection",
+        "local.connection_success": "Connection successful",
+        "local.connection_failed": "Connection failed, please make sure the server is running",
+        "server.start": "Start Server",
+        "server.stop": "Stop Server",
+        "server.starting": "Starting server...",
+        "server.started": "Server started",
+        "server.stopping": "Stopping server...",
+        "server.stopped": "Server stopped",
+        "server.already_running": "Server is already running",
+        "server.not_running": "Server is not running",
+        "server.start_failed": "Failed to start server",
+        "server.stop_failed": "Failed to stop server",
+        "open_browser": "Open Browser Page",
+        "test": "Test Speech Recognition",
+        "test_info": "Please use the speech recognition button in the input box to test",
+        "start": "Start Recording",
+        "stop": "Stop Recording",
+        "preparing": "Preparing",
+        "recording": "Recording...",
+        "processing": "Processing speech...",
+        "success": "Speech recognition successful",
+        "completed": "Speech recognition completed",
+        "canceled": "Recording canceled",
+        "error": {
+          "not_enabled": "Speech recognition is not enabled",
+          "start_failed": "Failed to start recording",
+          "transcribe_failed": "Failed to transcribe speech"
+        }
      }
    },
    "translate": {
--- a/src/renderer/src/i18n/locales/ja-jp.json
+++ b/src/renderer/src/i18n/locales/ja-jp.json
@ -1356,6 +1356,52 @@
        },
        "help": "OpenAIのTTS APIを使用するには、APIキーが必要です。Edge TTSはブラウザの機能を使用するため、APIキーは不要です。",
        "learn_more": "詳細はこちら"
+      },
+      "asr": {
+        "title": "音声認識",
+        "tab_title": "音声認識",
+        "enable": "音声認識を有効にする",
+        "enable.help": "音声をテキストに変換する機能を有効にします",
+        "service_type": "サービスタイプ",
+        "service_type.browser": "ブラウザ",
+        "service_type.local": "ローカルサーバー",
+        "api_key": "APIキー",
+        "api_key.placeholder": "OpenAI APIキーを入力",
+        "api_url": "API URL",
+        "api_url.placeholder": "例：https://api.openai.com/v1/audio/transcriptions",
+        "model": "モデル",
+        "browser.info": "ブラウザの内蔵音声認識機能を使用します。追加設定は不要です",
+        "local.info": "ローカルサーバーとブラウザを使用して音声認識を行います。サーバーを起動してブラウザページを開く必要があります",
+        "local.browser_tip": "このページをEdgeブラウザで開き、ブラウザウィンドウを開いたままにしてください",
+        "local.test_connection": "接続テスト",
+        "local.connection_success": "接続成功",
+        "local.connection_failed": "接続失敗。サーバーが起動していることを確認してください",
+        "server.start": "サーバー起動",
+        "server.stop": "サーバー停止",
+        "server.starting": "サーバーを起動中...",
+        "server.started": "サーバーが起動しました",
+        "server.stopping": "サーバーを停止中...",
+        "server.stopped": "サーバーが停止しました",
+        "server.already_running": "サーバーは既に実行中です",
+        "server.not_running": "サーバーは実行されていません",
+        "server.start_failed": "サーバーの起動に失敗しました",
+        "server.stop_failed": "サーバーの停止に失敗しました",
+        "open_browser": "ブラウザページを開く",
+        "test": "音声認識テスト",
+        "test_info": "入力ボックスの音声認識ボタンを使用してテストしてください",
+        "start": "録音開始",
+        "stop": "録音停止",
+        "preparing": "準備中",
+        "recording": "録音中...",
+        "processing": "音声処理中...",
+        "success": "音声認識成功",
+        "completed": "音声認識完了",
+        "canceled": "録音キャンセル",
+        "error": {
+          "not_enabled": "音声認識が有効になっていません",
+          "start_failed": "録音の開始に失敗しました",
+          "transcribe_failed": "音声の文字起こしに失敗しました"
+        }
      }
    },
    "translate": {
--- a/src/renderer/src/i18n/locales/zh-cn.json
+++ b/src/renderer/src/i18n/locales/zh-cn.json
@ -1335,8 +1335,14 @@
        "title": "隐私设置",
        "enable_privacy_mode": "匿名发送错误报告和数据统计"
      },
+      "voice": {
+        "title": "语音功能",
+        "help": "语音功能包括文本转语音(TTS)和语音识别(ASR)。",
+        "learn_more": "了解更多"
+      },
      "tts": {
-        "title": "语音设置",
+        "title": "语音合成",
+        "tab_title": "语音合成",
        "enable": "启用语音合成",
        "enable.help": "启用后可以将文本转换为语音",
        "reset": "重置",
@ -1376,7 +1382,61 @@
        "max_text_length": "最大文本长度",
        "test": "测试语音",
        "help": "语音合成功能支持将文本转换为自然语音。",
-        "learn_more": "了解更多"
+        "learn_more": "了解更多",
+        "error": {
+          "not_enabled": "语音合成功能未启用",
+          "no_api_key": "未设置API密钥",
+          "no_edge_voice": "未选择Edge TTS音色",
+          "browser_not_support": "浏览器不支持语音合成"
+        }
+      },
+      "asr": {
+        "title": "语音识别",
+        "tab_title": "语音识别",
+        "enable": "启用语音识别",
+        "enable.help": "启用后可以将语音转换为文本",
+        "service_type": "服务类型",
+        "service_type.browser": "浏览器",
+        "service_type.local": "本地服务器",
+        "api_key": "API密钥",
+        "api_key.placeholder": "请输入OpenAI API密钥",
+        "api_url": "API地址",
+        "api_url.placeholder": "例如：https://api.openai.com/v1/audio/transcriptions",
+        "model": "模型",
+        "browser.info": "使用浏览器内置的语音识别功能，无需额外设置",
+        "local.info": "使用本地服务器和浏览器进行语音识别，需要先启动服务器并打开浏览器页面",
+        "local.browser_tip": "请在Edge浏览器中打开此页面，并保持浏览器窗口打开",
+        "local.test_connection": "测试连接",
+        "local.connection_success": "连接成功",
+        "local.connection_failed": "连接失败，请确保服务器已启动",
+        "server.start": "启动服务器",
+        "server.stop": "停止服务器",
+        "server.starting": "正在启动服务器...",
+        "server.started": "服务器已启动",
+        "server.stopping": "正在停止服务器...",
+        "server.stopped": "服务器已停止",
+        "server.already_running": "服务器已经在运行中",
+        "server.not_running": "服务器未运行",
+        "server.start_failed": "启动服务器失败",
+        "server.stop_failed": "停止服务器失败",
+        "open_browser": "打开浏览器页面",
+        "test": "测试语音识别",
+        "test_info": "请在输入框中使用语音识别按钮进行测试",
+        "start": "开始录音",
+        "stop": "停止录音",
+        "preparing": "准备中",
+        "recording": "正在录音...",
+        "processing": "正在处理语音...",
+        "success": "语音识别成功",
+        "completed": "语音识别完成",
+        "canceled": "已取消录音",
+        "error": {
+          "not_enabled": "语音识别功能未启用",
+          "no_api_key": "未设置API密钥",
+          "browser_not_support": "浏览器不支持语音识别",
+          "start_failed": "开始录音失败",
+          "transcribe_failed": "语音识别失败"
+        }
      }
    },
    "translate": {
--- a/src/renderer/src/pages/home/Inputbar/Inputbar.tsx
+++ b/src/renderer/src/pages/home/Inputbar/Inputbar.tsx
@ -14,6 +14,7 @@ import {
  TranslationOutlined
 } from '@ant-design/icons'
 import { QuickPanelListItem, QuickPanelView, useQuickPanel } from '@renderer/components/QuickPanel'
+import ASRButton from '@renderer/components/ASRButton'
 import TranslateButton from '@renderer/components/TranslateButton'
 import { isGenerateImageModel, isVisionModel, isWebSearchModel } from '@renderer/config/models'
 import db from '@renderer/databases'
@ -1008,6 +1009,19 @@ const Inputbar: FC<Props> = ({ assistant: _assistant, setActiveTopic, topic }) =
            </ToolbarMenu>
            <ToolbarMenu>
              <TranslateButton text={text} onTranslated={onTranslated} isLoading={isTranslating} />
+              <ASRButton onTranscribed={(transcribedText) => {
+                // 如果是空字符串，不做任何处理
+                if (!transcribedText) return
+
+                // 将识别的文本添加到当前输入框
+                setText((prevText) => {
+                  // 如果当前有文本，添加空格后再添加识别的文本
+                  if (prevText.trim()) {
+                    return prevText + ' ' + transcribedText
+                  }
+                  return transcribedText
+                })
+              }} />
              {loading && (
                <Tooltip placement="top" title={t('chat.input.pause')} arrow>
                  <ToolbarButton type="text" onClick={onPause} style={{ marginRight: -2, marginTop: 1 }}>
--- a/src/renderer/src/pages/home/Messages/MessageMenubar.tsx
+++ b/src/renderer/src/pages/home/Messages/MessageMenubar.tsx
@ -16,7 +16,7 @@ import { UploadOutlined } from '@ant-design/icons'
 import ObsidianExportPopup from '@renderer/components/Popups/ObsidianExportPopup'
 import SelectModelPopup from '@renderer/components/Popups/SelectModelPopup'
 import TextEditPopup from '@renderer/components/Popups/TextEditPopup'
-import TTSButton from '@renderer/components/TTSButton'
+// import TTSButton from '@renderer/components/TTSButton' // 暂时不使用
 import { isReasoningModel } from '@renderer/config/models'
 import { TranslateLanguageOptions } from '@renderer/config/translate'
 import { useMessageOperations, useTopicLoading } from '@renderer/hooks/useMessageOperations'
@ -154,14 +154,14 @@ const MessageMenubar: FC<Props> = (props) => {
      const imageUrls: string[] = []
      let match
      let content = editedText
-      
+
      while ((match = imageRegex.exec(editedText)) !== null) {
        imageUrls.push(match[1])
        content = content.replace(match[0], '')
      }
-      
+
      // 更新消息内容，保留图片信息
-      await editMessage(message.id, { 
+      await editMessage(message.id, {
        content: content.trim(),
        metadata: {
          ...message.metadata,
@ -171,9 +171,9 @@ const MessageMenubar: FC<Props> = (props) => {
          } : undefined
        }
      })
-      
-      resendMessage && handleResendUserMessage({ 
-        ...message, 
+
+      resendMessage && handleResendUserMessage({
+        ...message,
        content: content.trim(),
        metadata: {
          ...message.metadata,
--- a/src/renderer/src/pages/settings/SettingsPage.tsx
+++ b/src/renderer/src/pages/settings/SettingsPage.tsx
@ -127,7 +127,7 @@ const SettingsPage: FC = () => {
          <MenuItemLink to="/settings/tts">
            <MenuItem className={isRoute('/settings/tts')}>
              <SoundOutlined />
-              {t('settings.tts.title')}
+              {t('settings.voice.title')}
            </MenuItem>
          </MenuItemLink>
          <MenuItemLink to="/settings/about">
--- a/src/renderer/src/pages/settings/TTSSettings/ASRSettings.tsx
+++ b/src/renderer/src/pages/settings/TTSSettings/ASRSettings.tsx
@ -0,0 +1,271 @@
+import { InfoCircleOutlined, GlobalOutlined, PlayCircleOutlined, StopOutlined } from '@ant-design/icons'
+import { useTheme } from '@renderer/context/ThemeProvider'
+import ASRService from '@renderer/services/ASRService'
+import ASRServerService from '@renderer/services/ASRServerService'
+import { useAppDispatch } from '@renderer/store'
+import {
+  setAsrApiKey,
+  setAsrApiUrl,
+  setAsrEnabled,
+  setAsrModel,
+  setAsrServiceType
+} from '@renderer/store/settings'
+import { Button, Form, Input, Select, Space, Switch } from 'antd'
+import { FC, useState, useEffect } from 'react'
+import { useTranslation } from 'react-i18next'
+import { useSelector } from 'react-redux'
+import styled from 'styled-components'
+
+const ASRSettings: FC = () => {
+  const { t } = useTranslation()
+  const { isDark } = useTheme()
+  const dispatch = useAppDispatch()
+
+  // 服务器状态
+  const [isServerRunning, setIsServerRunning] = useState(false)
+
+  // 从 Redux 获取 ASR 设置
+  const asrEnabled = useSelector((state: any) => state.settings.asrEnabled)
+  const asrServiceType = useSelector((state: any) => state.settings.asrServiceType || 'openai')
+  const asrApiKey = useSelector((state: any) => state.settings.asrApiKey)
+  const asrApiUrl = useSelector((state: any) => state.settings.asrApiUrl)
+  const asrModel = useSelector((state: any) => state.settings.asrModel || 'whisper-1')
+
+  // 检查服务器状态
+  useEffect(() => {
+    if (asrServiceType === 'local') {
+      setIsServerRunning(ASRServerService.isRunning())
+    }
+    return undefined // 添加返回值以解决TS7030错误
+  }, [asrServiceType])
+
+  // 服务类型选项
+  const serviceTypeOptions = [
+    { label: 'OpenAI', value: 'openai' },
+    { label: t('settings.asr.service_type.browser'), value: 'browser' },
+    { label: t('settings.asr.service_type.local'), value: 'local' }
+  ]
+
+  // 模型选项
+  const modelOptions = [
+    { label: 'whisper-1', value: 'whisper-1' }
+  ]
+
+  return (
+    <Container>
+      <Form layout="vertical">
+        {/* ASR开关 */}
+        <Form.Item>
+          <Space>
+            <Switch checked={asrEnabled} onChange={(checked) => dispatch(setAsrEnabled(checked))} />
+            <span>{t('settings.asr.enable')}</span>
+            <Tooltip title={t('settings.asr.enable.help')}>
+              <InfoCircleOutlined style={{ color: 'var(--color-text-3)' }} />
+            </Tooltip>
+          </Space>
+        </Form.Item>
+
+        {/* 服务类型选择 */}
+        <Form.Item label={t('settings.asr.service_type')} style={{ marginBottom: 16 }}>
+          <Select
+            value={asrServiceType}
+            onChange={(value) => dispatch(setAsrServiceType(value))}
+            options={serviceTypeOptions}
+            disabled={!asrEnabled}
+            style={{ width: '100%' }}
+          />
+        </Form.Item>
+
+        {/* OpenAI ASR设置 */}
+        {asrServiceType === 'openai' && (
+          <>
+            {/* API密钥 */}
+            <Form.Item label={t('settings.asr.api_key')} style={{ marginBottom: 16 }}>
+              <Input.Password
+                value={asrApiKey}
+                onChange={(e) => dispatch(setAsrApiKey(e.target.value))}
+                placeholder={t('settings.asr.api_key.placeholder')}
+                disabled={!asrEnabled}
+              />
+            </Form.Item>
+
+            {/* API地址 */}
+            <Form.Item label={t('settings.asr.api_url')} style={{ marginBottom: 16 }}>
+              <Input
+                value={asrApiUrl}
+                onChange={(e) => dispatch(setAsrApiUrl(e.target.value))}
+                placeholder={t('settings.asr.api_url.placeholder')}
+                disabled={!asrEnabled}
+              />
+            </Form.Item>
+
+            {/* 模型选择 */}
+            <Form.Item label={t('settings.asr.model')} style={{ marginBottom: 16 }}>
+              <Select
+                value={asrModel}
+                onChange={(value) => dispatch(setAsrModel(value))}
+                options={modelOptions}
+                disabled={!asrEnabled}
+                style={{ width: '100%' }}
+              />
+            </Form.Item>
+          </>
+        )}
+
+        {/* 浏览器ASR设置 */}
+        {asrServiceType === 'browser' && (
+          <Form.Item>
+            <Alert type="info" message={t('settings.asr.browser.info')} />
+          </Form.Item>
+        )}
+
+        {/* 本地服务器ASR设置 */}
+        {asrServiceType === 'local' && (
+          <>
+            <Form.Item>
+              <Alert type="info" message={t('settings.asr.local.info')} />
+            </Form.Item>
+            <Form.Item>
+              <Space direction="vertical" style={{ width: '100%' }}>
+                <Space>
+                  <Button
+                    type="primary"
+                    icon={<PlayCircleOutlined />}
+                    onClick={async () => {
+                      const success = await ASRServerService.startServer()
+                      if (success) {
+                        setIsServerRunning(true)
+                      }
+                    }}
+                    disabled={!asrEnabled || isServerRunning}
+                  >
+                    {t('settings.asr.server.start')}
+                  </Button>
+                  <Button
+                    danger
+                    icon={<StopOutlined />}
+                    onClick={async () => {
+                      const success = await ASRServerService.stopServer()
+                      if (success) {
+                        setIsServerRunning(false)
+                      }
+                    }}
+                    disabled={!asrEnabled || !isServerRunning}
+                  >
+                    {t('settings.asr.server.stop')}
+                  </Button>
+                </Space>
+
+                <Button
+                  type="primary"
+                  icon={<GlobalOutlined />}
+                  onClick={() => ASRServerService.openServerPage()}
+                  disabled={!asrEnabled || !isServerRunning}
+                >
+                  {t('settings.asr.open_browser')}
+                </Button>
+
+                <Button
+                  onClick={() => {
+                    // 尝试连接到WebSocket服务器
+                    ASRService.connectToWebSocketServer?.().then(connected => {
+                      if (connected) {
+                        window.message.success({ content: t('settings.asr.local.connection_success'), key: 'ws-connect' })
+                      } else {
+                        window.message.error({ content: t('settings.asr.local.connection_failed'), key: 'ws-connect' })
+                      }
+                    }).catch(error => {
+                      console.error('Failed to connect to WebSocket server:', error)
+                      window.message.error({ content: t('settings.asr.local.connection_failed'), key: 'ws-connect' })
+                    })
+                  }}
+                  disabled={!asrEnabled || !isServerRunning}
+                >
+                  {t('settings.asr.local.test_connection')}
+                </Button>
+
+                <BrowserTip>{t('settings.asr.local.browser_tip')}</BrowserTip>
+              </Space>
+            </Form.Item>
+          </>
+        )}
+
+        {/* 测试按钮 */}
+        <Form.Item>
+          <Space>
+            <Button
+              type="primary"
+              disabled={!asrEnabled}
+              onClick={() => window.message.info({ content: t('settings.asr.test_info'), key: 'asr-test' })}>
+              {t('settings.asr.test')}
+            </Button>
+          </Space>
+        </Form.Item>
+      </Form>
+    </Container>
+  )
+}
+
+const Container = styled.div`
+  padding: 0 0 20px 0;
+`
+
+const Tooltip = styled.div`
+  position: relative;
+  display: inline-block;
+  cursor: help;
+
+  &:hover::after {
+    content: attr(title);
+    position: absolute;
+    bottom: 100%;
+    left: 50%;
+    transform: translateX(-50%);
+    padding: 5px 10px;
+    background-color: var(--color-background-soft);
+    border: 1px solid var(--color-border);
+    border-radius: 4px;
+    white-space: nowrap;
+    z-index: 1;
+    font-size: 12px;
+  }
+`
+
+const Alert = styled.div<{ type: 'info' | 'warning' | 'error' | 'success' }>`
+  padding: 10px 15px;
+  border-radius: 4px;
+  margin-bottom: 16px;
+  background-color: ${(props) =>
+    props.type === 'info'
+      ? 'var(--color-info-bg)'
+      : props.type === 'warning'
+      ? 'var(--color-warning-bg)'
+      : props.type === 'error'
+      ? 'var(--color-error-bg)'
+      : 'var(--color-success-bg)'};
+  border: 1px solid
+    ${(props) =>
+      props.type === 'info'
+        ? 'var(--color-info-border)'
+        : props.type === 'warning'
+        ? 'var(--color-warning-border)'
+        : props.type === 'error'
+        ? 'var(--color-error-border)'
+        : 'var(--color-success-border)'};
+  color: ${(props) =>
+    props.type === 'info'
+      ? 'var(--color-info-text)'
+      : props.type === 'warning'
+      ? 'var(--color-warning-text)'
+      : props.type === 'error'
+      ? 'var(--color-error-text)'
+      : 'var(--color-success-text)'};
+`
+
+const BrowserTip = styled.div`
+  font-size: 12px;
+  color: var(--color-text-3);
+  margin-top: 8px;
+`
+
+export default ASRSettings
--- a/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx
+++ b/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx
@ -1,4 +1,4 @@
-import { PlusOutlined, ReloadOutlined, SoundOutlined } from '@ant-design/icons'
+import { AudioOutlined, PlusOutlined, ReloadOutlined, SoundOutlined } from '@ant-design/icons'
 import { useTheme } from '@renderer/context/ThemeProvider'
 import TTSService from '@renderer/services/TTSService'
 import store, { useAppDispatch } from '@renderer/store'
@ -17,7 +17,7 @@ import {
  setTtsServiceType,
  setTtsVoice
 } from '@renderer/store/settings'
-import { Button, Form, Input, message, Select, Space, Switch, Tag } from 'antd'
+import { Button, Form, Input, message, Select, Space, Switch, Tag, Tabs } from 'antd'
 import { FC, useEffect, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useSelector } from 'react-redux'
@ -33,6 +33,8 @@ import {
  SettingTitle
 } from '..'

+import ASRSettings from './ASRSettings'
+
 const CustomVoiceInput = styled.div`
  display: flex;
  flex-direction: column;
@ -378,341 +380,367 @@ const TTSSettings: FC = () => {
      <SettingTitle>
        <Space>
          <SoundOutlined />
-          {t('settings.tts.title')}
+          {t('settings.voice.title')}
        </Space>
      </SettingTitle>
      <SettingDivider />
-      <SettingGroup>
-        <SettingRow>
-          <SettingRowTitle>{t('settings.tts.enable')}</SettingRowTitle>
-          <Switch checked={ttsEnabled} onChange={(checked) => dispatch(setTtsEnabled(checked))} />
-        </SettingRow>
-        <SettingHelpText>{t('settings.tts.enable.help')}</SettingHelpText>
-      </SettingGroup>
+      <Tabs
+        defaultActiveKey="tts"
+        items={[
+          {
+            key: 'tts',
+            label: (
+              <span>
+                <SoundOutlined /> {t('settings.tts.tab_title')}
+              </span>
+            ),
+            children: (
+              <div>
+                <SettingGroup>
+                  <SettingRow>
+                    <SettingRowTitle>{t('settings.tts.enable')}</SettingRowTitle>
+                    <Switch checked={ttsEnabled} onChange={(checked) => dispatch(setTtsEnabled(checked))} />
+                  </SettingRow>
+                  <SettingHelpText>{t('settings.tts.enable.help')}</SettingHelpText>
+                </SettingGroup>

-      {/* 重置按钮 */}
-      <SettingGroup>
-        <SettingRow>
-          <SettingRowTitle>{t('settings.tts.reset_title')}</SettingRowTitle>
-          <Button
-            danger
-            onClick={() => {
-              if (window.confirm(t('settings.tts.reset_confirm'))) {
-                dispatch(resetTtsCustomValues())
-                window.message.success({ content: t('settings.tts.reset_success'), key: 'reset-tts' })
-              }
-            }}>
-            {t('settings.tts.reset')}
-          </Button>
-        </SettingRow>
-        <SettingHelpText>{t('settings.tts.reset_help')}</SettingHelpText>
-      </SettingGroup>
-      <SettingGroup>
-        <SettingRowTitle>{t('settings.tts.api_settings')}</SettingRowTitle>
-        <Form layout="vertical" style={{ width: '100%', marginTop: 16 }}>
-          {/* TTS服务类型选择 */}
-          <Form.Item label={t('settings.tts.service_type')} style={{ marginBottom: 16 }}>
-            <FlexContainer>
-              <Select
-                value={ttsServiceType}
-                onChange={(value: string) => {
-                  console.log('切换TTS服务类型为:', value)
-                  // 先将新的服务类型写入Redux状态
-                  dispatch(setTtsServiceType(value))
+                {/* 重置按钮 */}
+                <SettingGroup>
+                  <SettingRow>
+                    <SettingRowTitle>{t('settings.tts.reset_title')}</SettingRowTitle>
+                    <Button
+                      danger
+                      onClick={() => {
+                        if (window.confirm(t('settings.tts.reset_confirm'))) {
+                          dispatch(resetTtsCustomValues())
+                          window.message.success({ content: t('settings.tts.reset_success'), key: 'reset-tts' })
+                        }
+                      }}>
+                      {t('settings.tts.reset')}
+                    </Button>
+                  </SettingRow>
+                  <SettingHelpText>{t('settings.tts.reset_help')}</SettingHelpText>
+                </SettingGroup>
+                <SettingGroup>
+                  <SettingRowTitle>{t('settings.tts.api_settings')}</SettingRowTitle>
+                  <Form layout="vertical" style={{ width: '100%', marginTop: 16 }}>
+                    {/* TTS服务类型选择 */}
+                    <Form.Item label={t('settings.tts.service_type')} style={{ marginBottom: 16 }}>
+                      <FlexContainer>
+                        <Select
+                          value={ttsServiceType}
+                          onChange={(value: string) => {
+                            console.log('切换TTS服务类型为:', value)
+                            // 先将新的服务类型写入Redux状态
+                            dispatch(setTtsServiceType(value))

-                  // 等待一下，确保状态已更新
-                  setTimeout(() => {
-                    // 验证状态是否正确更新
-                    const currentType = store.getState().settings.ttsServiceType
-                    console.log('更新后的TTS服务类型:', currentType)
+                            // 等待一下，确保状态已更新
+                            setTimeout(() => {
+                              // 验证状态是否正确更新
+                              const currentType = store.getState().settings.ttsServiceType
+                              console.log('更新后的TTS服务类型:', currentType)

-                    // 如果状态没有正确更新，再次尝试
-                    if (currentType !== value) {
-                      console.log('状态未正确更新，再次尝试')
-                      dispatch(setTtsServiceType(value))
-                    }
-                  }, 100)
-                }}
-                options={[
-                  { label: t('settings.tts.service_type.openai'), value: 'openai' },
-                  { label: t('settings.tts.service_type.edge'), value: 'edge' }
-                ]}
-                disabled={!ttsEnabled}
-                style={{ flex: 1 }}
-              />
-              <Button
-                icon={<ReloadOutlined />}
-                onClick={() => {
-                  // 强制刷新当前服务类型设置
-                  const currentType = store.getState().settings.ttsServiceType
-                  console.log('强制刷新TTS服务类型:', currentType)
-                  dispatch(setTtsServiceType(currentType))
-                  window.message.success({
-                    content: t('settings.tts.service_type.refreshed', { defaultValue: '已刷新TTS服务类型设置' }),
-                    key: 'tts-refresh'
-                  })
-                }}
-                disabled={!ttsEnabled}
-                title={t('settings.tts.service_type.refresh', { defaultValue: '刷新TTS服务类型设置' })}
-              />
-            </FlexContainer>
-          </Form.Item>
+                              // 如果状态没有正确更新，再次尝试
+                              if (currentType !== value) {
+                                console.log('状态未正确更新，再次尝试')
+                                dispatch(setTtsServiceType(value))
+                              }
+                            }, 100)
+                          }}
+                          options={[
+                            { label: t('settings.tts.service_type.openai'), value: 'openai' },
+                            { label: t('settings.tts.service_type.edge'), value: 'edge' }
+                          ]}
+                          disabled={!ttsEnabled}
+                          style={{ flex: 1 }}
+                        />
+                        <Button
+                          icon={<ReloadOutlined />}
+                          onClick={() => {
+                            // 强制刷新当前服务类型设置
+                            const currentType = store.getState().settings.ttsServiceType
+                            console.log('强制刷新TTS服务类型:', currentType)
+                            dispatch(setTtsServiceType(currentType))
+                            window.message.success({
+                              content: t('settings.tts.service_type.refreshed', { defaultValue: '已刷新TTS服务类型设置' }),
+                              key: 'tts-refresh'
+                            })
+                          }}
+                          disabled={!ttsEnabled}
+                          title={t('settings.tts.service_type.refresh', { defaultValue: '刷新TTS服务类型设置' })}
+                        />
+                      </FlexContainer>
+                    </Form.Item>

-          {/* OpenAI TTS设置 */}
-          {ttsServiceType === 'openai' && (
-            <>
-              <Form.Item label={t('settings.tts.api_key')} style={{ marginBottom: 16 }}>
-                <Input.Password
-                  value={ttsApiKey}
-                  onChange={(e) => dispatch(setTtsApiKey(e.target.value))}
-                  placeholder={t('settings.tts.api_key.placeholder')}
-                  disabled={!ttsEnabled}
-                />
-              </Form.Item>
-              <Form.Item label={t('settings.tts.api_url')} style={{ marginBottom: 16 }}>
-                <Input
-                  value={ttsApiUrl}
-                  onChange={(e) => dispatch(setTtsApiUrl(e.target.value))}
-                  placeholder={t('settings.tts.api_url.placeholder')}
-                  disabled={!ttsEnabled}
-                />
-              </Form.Item>
-            </>
-          )}
+                    {/* OpenAI TTS设置 */}
+                    {ttsServiceType === 'openai' && (
+                      <>
+                        <Form.Item label={t('settings.tts.api_key')} style={{ marginBottom: 16 }}>
+                          <Input.Password
+                            value={ttsApiKey}
+                            onChange={(e) => dispatch(setTtsApiKey(e.target.value))}
+                            placeholder={t('settings.tts.api_key.placeholder')}
+                            disabled={!ttsEnabled}
+                          />
+                        </Form.Item>
+                        <Form.Item label={t('settings.tts.api_url')} style={{ marginBottom: 16 }}>
+                          <Input
+                            value={ttsApiUrl}
+                            onChange={(e) => dispatch(setTtsApiUrl(e.target.value))}
+                            placeholder={t('settings.tts.api_url.placeholder')}
+                            disabled={!ttsEnabled}
+                          />
+                        </Form.Item>
+                      </>
+                    )}

-          {/* Edge TTS设置 */}
-          {ttsServiceType === 'edge' && (
-            <Form.Item label={t('settings.tts.edge_voice')} style={{ marginBottom: 16 }}>
-              <VoiceSelectContainer>
-                <Select
-                  value={ttsEdgeVoice}
-                  onChange={(value) => dispatch(setTtsEdgeVoice(value))}
-                  options={
-                    availableVoices.length > 0
-                      ? availableVoices
-                      : [{ label: t('settings.tts.edge_voice.loading'), value: '' }]
-                  }
-                  disabled={!ttsEnabled}
-                  style={{ flex: 1 }}
-                  showSearch
-                  optionFilterProp="label"
-                  placeholder={
-                    availableVoices.length === 0
-                      ? t('settings.tts.edge_voice.loading')
-                      : t('settings.tts.voice.placeholder')
-                  }
-                  notFoundContent={
-                    availableVoices.length === 0
-                      ? t('settings.tts.edge_voice.loading')
-                      : t('settings.tts.edge_voice.not_found')
-                  }
-                />
-                <Button
-                  icon={<ReloadOutlined />}
-                  onClick={refreshVoices}
-                  disabled={!ttsEnabled}
-                  title={t('settings.tts.edge_voice.refresh')}
-                />
-              </VoiceSelectContainer>
-              {availableVoices.length === 0 && <LoadingText>{t('settings.tts.edge_voice.loading')}</LoadingText>}
-            </Form.Item>
-          )}
+                    {/* Edge TTS设置 */}
+                    {ttsServiceType === 'edge' && (
+                      <Form.Item label={t('settings.tts.edge_voice')} style={{ marginBottom: 16 }}>
+                        <VoiceSelectContainer>
+                          <Select
+                            value={ttsEdgeVoice}
+                            onChange={(value) => dispatch(setTtsEdgeVoice(value))}
+                            options={
+                              availableVoices.length > 0
+                                ? availableVoices
+                                : [{ label: t('settings.tts.edge_voice.loading'), value: '' }]
+                            }
+                            disabled={!ttsEnabled}
+                            style={{ flex: 1 }}
+                            showSearch
+                            optionFilterProp="label"
+                            placeholder={
+                              availableVoices.length === 0
+                                ? t('settings.tts.edge_voice.loading')
+                                : t('settings.tts.voice.placeholder')
+                            }
+                            notFoundContent={
+                              availableVoices.length === 0
+                                ? t('settings.tts.edge_voice.loading')
+                                : t('settings.tts.edge_voice.not_found')
+                            }
+                          />
+                          <Button
+                            icon={<ReloadOutlined />}
+                            onClick={refreshVoices}
+                            disabled={!ttsEnabled}
+                            title={t('settings.tts.edge_voice.refresh')}
+                          />
+                        </VoiceSelectContainer>
+                        {availableVoices.length === 0 && <LoadingText>{t('settings.tts.edge_voice.loading')}</LoadingText>}
+                      </Form.Item>
+                    )}

-          {/* OpenAI TTS的音色和模型设置 */}
-          {ttsServiceType === 'openai' && (
-            <>
-              {/* 音色选择 */}
-              <Form.Item label={t('settings.tts.voice')} style={{ marginBottom: 8 }}>
-                <Select
-                  value={ttsVoice}
-                  onChange={(value) => dispatch(setTtsVoice(value))}
-                  options={ttsCustomVoices.map((voice: any) => {
-                    // 确保voice是字符串
-                    const voiceStr = typeof voice === 'string' ? voice : String(voice)
-                    return { label: voiceStr, value: voiceStr }
-                  })}
-                  disabled={!ttsEnabled}
-                  style={{ width: '100%' }}
-                  placeholder={t('settings.tts.voice.placeholder')}
-                  showSearch
-                  optionFilterProp="label"
-                  allowClear
-                />
-              </Form.Item>
+                    {/* OpenAI TTS的音色和模型设置 */}
+                    {ttsServiceType === 'openai' && (
+                      <>
+                        {/* 音色选择 */}
+                        <Form.Item label={t('settings.tts.voice')} style={{ marginBottom: 8 }}>
+                          <Select
+                            value={ttsVoice}
+                            onChange={(value) => dispatch(setTtsVoice(value))}
+                            options={ttsCustomVoices.map((voice: any) => {
+                              // 确保voice是字符串
+                              const voiceStr = typeof voice === 'string' ? voice : String(voice)
+                              return { label: voiceStr, value: voiceStr }
+                            })}
+                            disabled={!ttsEnabled}
+                            style={{ width: '100%' }}
+                            placeholder={t('settings.tts.voice.placeholder')}
+                            showSearch
+                            optionFilterProp="label"
+                            allowClear
+                          />
+                        </Form.Item>

-              {/* 自定义音色列表 */}
-              <TagsContainer>
-                {ttsCustomVoices && ttsCustomVoices.length > 0 ? (
-                  ttsCustomVoices.map((voice: any, index: number) => {
-                    // 确保voice是字符串
-                    const voiceStr = typeof voice === 'string' ? voice : String(voice)
-                    return (
-                      <Tag
-                        key={`${voiceStr}-${index}`}
-                        closable
-                        onClose={() => handleRemoveVoice(voiceStr)}
-                        style={{ padding: '4px 8px' }}>
-                        {voiceStr}
-                      </Tag>
-                    )
-                  })
-                ) : (
-                  <EmptyText>{t('settings.tts.voice_empty')}</EmptyText>
-                )}
-              </TagsContainer>
+                        {/* 自定义音色列表 */}
+                        <TagsContainer>
+                          {ttsCustomVoices && ttsCustomVoices.length > 0 ? (
+                            ttsCustomVoices.map((voice: any, index: number) => {
+                              // 确保voice是字符串
+                              const voiceStr = typeof voice === 'string' ? voice : String(voice)
+                              return (
+                                <Tag
+                                  key={`${voiceStr}-${index}`}
+                                  closable
+                                  onClose={() => handleRemoveVoice(voiceStr)}
+                                  style={{ padding: '4px 8px' }}>
+                                  {voiceStr}
+                                </Tag>
+                              )
+                            })
+                          ) : (
+                            <EmptyText>{t('settings.tts.voice_empty')}</EmptyText>
+                          )}
+                        </TagsContainer>

-              {/* 添加自定义音色 */}
-              <CustomVoiceInput>
-                <InputGroup>
-                  <Input
-                    placeholder={t('settings.tts.voice_input_placeholder')}
-                    value={newVoice}
-                    onChange={(e) => setNewVoice(e.target.value)}
-                    disabled={!ttsEnabled}
-                    style={{ flex: 1 }}
-                  />
-                  <Button
-                    type="primary"
-                    icon={<PlusOutlined />}
-                    onClick={handleAddVoice}
-                    disabled={!ttsEnabled || !newVoice}>
-                    {t('settings.tts.voice_add')}
-                  </Button>
-                </InputGroup>
-              </CustomVoiceInput>
+                        {/* 添加自定义音色 */}
+                        <CustomVoiceInput>
+                          <InputGroup>
+                            <Input
+                              placeholder={t('settings.tts.voice_input_placeholder')}
+                              value={newVoice}
+                              onChange={(e) => setNewVoice(e.target.value)}
+                              disabled={!ttsEnabled}
+                              style={{ flex: 1 }}
+                            />
+                            <Button
+                              type="primary"
+                              icon={<PlusOutlined />}
+                              onClick={handleAddVoice}
+                              disabled={!ttsEnabled || !newVoice}>
+                              {t('settings.tts.voice_add')}
+                            </Button>
+                          </InputGroup>
+                        </CustomVoiceInput>

-              {/* 模型选择 */}
-              <Form.Item label={t('settings.tts.model')} style={{ marginBottom: 8, marginTop: 16 }}>
-                <Select
-                  value={ttsModel}
-                  onChange={(value) => dispatch(setTtsModel(value))}
-                  options={ttsCustomModels.map((model: any) => {
-                    // 确保model是字符串
-                    const modelStr = typeof model === 'string' ? model : String(model)
-                    return { label: modelStr, value: modelStr }
-                  })}
-                  disabled={!ttsEnabled}
-                  style={{ width: '100%' }}
-                  placeholder={t('settings.tts.model.placeholder')}
-                  showSearch
-                  optionFilterProp="label"
-                  allowClear
-                />
-              </Form.Item>
+                        {/* 模型选择 */}
+                        <Form.Item label={t('settings.tts.model')} style={{ marginBottom: 8, marginTop: 16 }}>
+                          <Select
+                            value={ttsModel}
+                            onChange={(value) => dispatch(setTtsModel(value))}
+                            options={ttsCustomModels.map((model: any) => {
+                              // 确保model是字符串
+                              const modelStr = typeof model === 'string' ? model : String(model)
+                              return { label: modelStr, value: modelStr }
+                            })}
+                            disabled={!ttsEnabled}
+                            style={{ width: '100%' }}
+                            placeholder={t('settings.tts.model.placeholder')}
+                            showSearch
+                            optionFilterProp="label"
+                            allowClear
+                          />
+                        </Form.Item>

-              {/* 自定义模型列表 */}
-              <TagsContainer>
-                {ttsCustomModels && ttsCustomModels.length > 0 ? (
-                  ttsCustomModels.map((model: any, index: number) => {
-                    // 确保model是字符串
-                    const modelStr = typeof model === 'string' ? model : String(model)
-                    return (
-                      <Tag
-                        key={`${modelStr}-${index}`}
-                        closable
-                        onClose={() => handleRemoveModel(modelStr)}
-                        style={{ padding: '4px 8px' }}>
-                        {modelStr}
-                      </Tag>
-                    )
-                  })
-                ) : (
-                  <EmptyText>{t('settings.tts.model_empty')}</EmptyText>
-                )}
-              </TagsContainer>
+                        {/* 自定义模型列表 */}
+                        <TagsContainer>
+                          {ttsCustomModels && ttsCustomModels.length > 0 ? (
+                            ttsCustomModels.map((model: any, index: number) => {
+                              // 确保model是字符串
+                              const modelStr = typeof model === 'string' ? model : String(model)
+                              return (
+                                <Tag
+                                  key={`${modelStr}-${index}`}
+                                  closable
+                                  onClose={() => handleRemoveModel(modelStr)}
+                                  style={{ padding: '4px 8px' }}>
+                                  {modelStr}
+                                </Tag>
+                              )
+                            })
+                          ) : (
+                            <EmptyText>{t('settings.tts.model_empty')}</EmptyText>
+                          )}
+                        </TagsContainer>

-              {/* 添加自定义模型 */}
-              <CustomVoiceInput>
-                <InputGroup>
-                  <Input
-                    placeholder={t('settings.tts.model_input_placeholder')}
-                    value={newModel}
-                    onChange={(e) => setNewModel(e.target.value)}
-                    disabled={!ttsEnabled}
-                    style={{ flex: 1 }}
-                  />
-                  <Button
-                    type="primary"
-                    icon={<PlusOutlined />}
-                    onClick={handleAddModel}
-                    disabled={!ttsEnabled || !newModel}>
-                    {t('settings.tts.model_add')}
-                  </Button>
-                </InputGroup>
-              </CustomVoiceInput>
-            </>
-          )}
+                        {/* 添加自定义模型 */}
+                        <CustomVoiceInput>
+                          <InputGroup>
+                            <Input
+                              placeholder={t('settings.tts.model_input_placeholder')}
+                              value={newModel}
+                              onChange={(e) => setNewModel(e.target.value)}
+                              disabled={!ttsEnabled}
+                              style={{ flex: 1 }}
+                            />
+                            <Button
+                              type="primary"
+                              icon={<PlusOutlined />}
+                              onClick={handleAddModel}
+                              disabled={!ttsEnabled || !newModel}>
+                              {t('settings.tts.model_add')}
+                            </Button>
+                          </InputGroup>
+                        </CustomVoiceInput>
+                      </>
+                    )}

-          {/* TTS过滤选项 */}
-          <Form.Item label={t('settings.tts.filter_options')} style={{ marginTop: 24, marginBottom: 8 }}>
-            <FilterOptionItem>
-              <Switch
-                checked={ttsFilterOptions.filterThinkingProcess}
-                onChange={(checked) => dispatch(setTtsFilterOptions({ filterThinkingProcess: checked }))}
-                disabled={!ttsEnabled}
-              />{' '}
-              {t('settings.tts.filter.thinking_process')}
-            </FilterOptionItem>
-            <FilterOptionItem>
-              <Switch
-                checked={ttsFilterOptions.filterMarkdown}
-                onChange={(checked) => dispatch(setTtsFilterOptions({ filterMarkdown: checked }))}
-                disabled={!ttsEnabled}
-              />{' '}
-              {t('settings.tts.filter.markdown')}
-            </FilterOptionItem>
-            <FilterOptionItem>
-              <Switch
-                checked={ttsFilterOptions.filterCodeBlocks}
-                onChange={(checked) => dispatch(setTtsFilterOptions({ filterCodeBlocks: checked }))}
-                disabled={!ttsEnabled}
-              />{' '}
-              {t('settings.tts.filter.code_blocks')}
-            </FilterOptionItem>
-            <FilterOptionItem>
-              <Switch
-                checked={ttsFilterOptions.filterHtmlTags}
-                onChange={(checked) => dispatch(setTtsFilterOptions({ filterHtmlTags: checked }))}
-                disabled={!ttsEnabled}
-              />{' '}
-              {t('settings.tts.filter.html_tags')}
-            </FilterOptionItem>
-            <FilterOptionItem>
-              <LengthLabel>{t('settings.tts.max_text_length')}:</LengthLabel>
-              <Select
-                value={ttsFilterOptions.maxTextLength}
-                onChange={(value) => dispatch(setTtsFilterOptions({ maxTextLength: value }))}
-                disabled={!ttsEnabled}
-                style={{ width: 120 }}
-                options={[
-                  { label: '1000', value: 1000 },
-                  { label: '2000', value: 2000 },
-                  { label: '4000', value: 4000 },
-                  { label: '8000', value: 8000 },
-                  { label: '16000', value: 16000 }
-                ]}
-              />
-            </FilterOptionItem>
-          </Form.Item>
+                    {/* TTS过滤选项 */}
+                    <Form.Item label={t('settings.tts.filter_options')} style={{ marginTop: 24, marginBottom: 8 }}>
+                      <FilterOptionItem>
+                        <Switch
+                          checked={ttsFilterOptions.filterThinkingProcess}
+                          onChange={(checked) => dispatch(setTtsFilterOptions({ filterThinkingProcess: checked }))}
+                          disabled={!ttsEnabled}
+                        />{' '}
+                        {t('settings.tts.filter.thinking_process')}
+                      </FilterOptionItem>
+                      <FilterOptionItem>
+                        <Switch
+                          checked={ttsFilterOptions.filterMarkdown}
+                          onChange={(checked) => dispatch(setTtsFilterOptions({ filterMarkdown: checked }))}
+                          disabled={!ttsEnabled}
+                        />{' '}
+                        {t('settings.tts.filter.markdown')}
+                      </FilterOptionItem>
+                      <FilterOptionItem>
+                        <Switch
+                          checked={ttsFilterOptions.filterCodeBlocks}
+                          onChange={(checked) => dispatch(setTtsFilterOptions({ filterCodeBlocks: checked }))}
+                          disabled={!ttsEnabled}
+                        />{' '}
+                        {t('settings.tts.filter.code_blocks')}
+                      </FilterOptionItem>
+                      <FilterOptionItem>
+                        <Switch
+                          checked={ttsFilterOptions.filterHtmlTags}
+                          onChange={(checked) => dispatch(setTtsFilterOptions({ filterHtmlTags: checked }))}
+                          disabled={!ttsEnabled}
+                        />{' '}
+                        {t('settings.tts.filter.html_tags')}
+                      </FilterOptionItem>
+                      <FilterOptionItem>
+                        <LengthLabel>{t('settings.tts.max_text_length')}:</LengthLabel>
+                        <Select
+                          value={ttsFilterOptions.maxTextLength}
+                          onChange={(value) => dispatch(setTtsFilterOptions({ maxTextLength: value }))}
+                          disabled={!ttsEnabled}
+                          style={{ width: 120 }}
+                          options={[
+                            { label: '1000', value: 1000 },
+                            { label: '2000', value: 2000 },
+                            { label: '4000', value: 4000 },
+                            { label: '8000', value: 8000 },
+                            { label: '16000', value: 16000 }
+                          ]}
+                        />
+                      </FilterOptionItem>
+                    </Form.Item>

-          <Form.Item style={{ marginTop: 16 }}>
-            <Button
-              type="primary"
-              onClick={testTTS}
-              disabled={
-                !ttsEnabled ||
-                (ttsServiceType === 'openai' && (!ttsApiKey || !ttsVoice || !ttsModel)) ||
-                (ttsServiceType === 'edge' && !ttsEdgeVoice)
-              }>
-              {t('settings.tts.test')}
-            </Button>
-          </Form.Item>
-        </Form>
-      </SettingGroup>
+                    <Form.Item style={{ marginTop: 16 }}>
+                      <Button
+                        type="primary"
+                        onClick={testTTS}
+                        disabled={
+                          !ttsEnabled ||
+                          (ttsServiceType === 'openai' && (!ttsApiKey || !ttsVoice || !ttsModel)) ||
+                          (ttsServiceType === 'edge' && !ttsEdgeVoice)
+                        }>
+                        {t('settings.tts.test')}
+                      </Button>
+                    </Form.Item>
+                  </Form>
+                </SettingGroup>
+              </div>
+            )
+          },
+          {
+            key: 'asr',
+            label: (
+              <span>
+                <AudioOutlined /> {t('settings.asr.tab_title')}
+              </span>
+            ),
+            children: <ASRSettings />
+          }
+        ]}
+      />
      <SettingHelpText style={{ display: 'flex', flexDirection: 'column', justifyContent: 'center', gap: 5 }}>
-        <span>{t('settings.tts.help')}</span>
-        <a href="https://platform.openai.com/docs/guides/text-to-speech" target="_blank" rel="noopener noreferrer">
-          {t('settings.tts.learn_more')}
+        <span>{t('settings.voice.help')}</span>
+        <a href="https://platform.openai.com/docs/guides/speech-to-text" target="_blank" rel="noopener noreferrer">
+          {t('settings.voice.learn_more')}
        </a>
      </SettingHelpText>
    </SettingContainer>
--- a/src/renderer/src/services/ASRServerService.ts
+++ b/src/renderer/src/services/ASRServerService.ts
@ -0,0 +1,129 @@
+import i18n from '@renderer/i18n'
+
+// 使用window.electron而不是直接导入electron模块
+// 这样可以避免__dirname不可用的问题
+
+class ASRServerService {
+  private serverProcess: any = null
+  private isServerRunning = false
+
+  /**
+   * 启动ASR服务器
+   * @returns Promise<boolean> 是否成功启动
+   */
+  startServer = async (): Promise<boolean> => {
+    if (this.isServerRunning) {
+      console.log('[ASRServerService] 服务器已经在运行中')
+      window.message.info({ content: i18n.t('settings.asr.server.already_running'), key: 'asr-server' })
+      return true
+    }
+
+    try {
+      console.log('[ASRServerService] 正在启动ASR服务器...')
+      window.message.loading({ content: i18n.t('settings.asr.server.starting'), key: 'asr-server' })
+
+      // 使用IPC调用主进程启动服务器
+      const result = await window.electron.ipcRenderer.invoke('start-asr-server')
+
+      if (result.success) {
+        this.isServerRunning = true
+        this.serverProcess = result.pid
+        console.log('[ASRServerService] ASR服务器启动成功，PID:', result.pid)
+        window.message.success({ content: i18n.t('settings.asr.server.started'), key: 'asr-server' })
+        return true
+      } else {
+        console.error('[ASRServerService] ASR服务器启动失败:', result.error)
+        window.message.error({
+          content: i18n.t('settings.asr.server.start_failed') + ': ' + result.error,
+          key: 'asr-server'
+        })
+        return false
+      }
+    } catch (error) {
+      console.error('[ASRServerService] 启动ASR服务器时出错:', error)
+      window.message.error({
+        content: i18n.t('settings.asr.server.start_failed') + ': ' + (error as Error).message,
+        key: 'asr-server'
+      })
+      return false
+    }
+  }
+
+  /**
+   * 停止ASR服务器
+   * @returns Promise<boolean> 是否成功停止
+   */
+  stopServer = async (): Promise<boolean> => {
+    if (!this.isServerRunning || !this.serverProcess) {
+      console.log('[ASRServerService] 服务器未运行')
+      window.message.info({ content: i18n.t('settings.asr.server.not_running'), key: 'asr-server' })
+      return true
+    }
+
+    try {
+      console.log('[ASRServerService] 正在停止ASR服务器...')
+      window.message.loading({ content: i18n.t('settings.asr.server.stopping'), key: 'asr-server' })
+
+      // 使用IPC调用主进程停止服务器
+      const result = await window.electron.ipcRenderer.invoke('stop-asr-server', this.serverProcess)
+
+      if (result.success) {
+        this.isServerRunning = false
+        this.serverProcess = null
+        console.log('[ASRServerService] ASR服务器已停止')
+        window.message.success({ content: i18n.t('settings.asr.server.stopped'), key: 'asr-server' })
+        return true
+      } else {
+        console.error('[ASRServerService] ASR服务器停止失败:', result.error)
+        window.message.error({
+          content: i18n.t('settings.asr.server.stop_failed') + ': ' + result.error,
+          key: 'asr-server'
+        })
+        return false
+      }
+    } catch (error) {
+      console.error('[ASRServerService] 停止ASR服务器时出错:', error)
+      window.message.error({
+        content: i18n.t('settings.asr.server.stop_failed') + ': ' + (error as Error).message,
+        key: 'asr-server'
+      })
+      return false
+    }
+  }
+
+  /**
+   * 检查ASR服务器是否正在运行
+   * @returns boolean 是否正在运行
+   */
+  isRunning = (): boolean => {
+    return this.isServerRunning
+  }
+
+  /**
+   * 获取ASR服务器网页URL
+   * @returns string 网页URL
+   */
+  getServerUrl = (): string => {
+    return 'http://localhost:8080'
+  }
+
+  /**
+   * 获取ASR服务器文件路径
+   * @returns string 服务器文件路径
+   */
+  getServerFilePath = (): string => {
+    // 使用相对路径，因为window.electron.app.getAppPath()不可用
+    return process.env.NODE_ENV === 'development'
+      ? 'src/renderer/src/assets/asr-server/server.js'
+      : 'public/asr-server/server.js'
+  }
+
+  /**
+   * 打开ASR服务器网页
+   */
+  openServerPage = (): void => {
+    window.open(this.getServerUrl(), '_blank')
+  }
+}
+
+export default new ASRServerService()
--- a/src/renderer/src/services/ASRService.ts
+++ b/src/renderer/src/services/ASRService.ts
@ -0,0 +1,560 @@
+import i18n from '@renderer/i18n'
+import store from '@renderer/store'
+
+/**
+ * ASR服务，用于将语音转换为文本
+ */
+class ASRService {
+  private mediaRecorder: MediaRecorder | null = null
+  private audioChunks: Blob[] = []
+  private isRecording = false
+  private stream: MediaStream | null = null
+
+  // WebSocket相关
+  private ws: WebSocket | null = null
+  private wsConnected = false
+  private browserReady = false
+  private reconnectAttempt = 0
+  private maxReconnectAttempts = 5
+  private reconnectTimeout: NodeJS.Timeout | null = null
+
+  /**
+   * 开始录音
+   * @returns Promise<void>
+   */
+  /**
+   * 连接到WebSocket服务器
+   * @returns Promise<boolean> 是否连接成功
+   */
+  connectToWebSocketServer = async (): Promise<boolean> => {
+    return new Promise((resolve) => {
+      if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+        console.log('[ASRService] WebSocket已连接')
+        resolve(true)
+        return
+      }
+
+      if (this.ws && this.ws.readyState === WebSocket.CONNECTING) {
+        console.log('[ASRService] WebSocket正在连接中')
+        // 等待连接完成
+        this.ws.onopen = () => {
+          console.log('[ASRService] WebSocket连接成功')
+          this.wsConnected = true
+          this.reconnectAttempt = 0
+          this.ws?.send(JSON.stringify({ type: 'identify', role: 'electron' }))
+          resolve(true)
+        }
+        this.ws.onerror = () => {
+          console.error('[ASRService] WebSocket连接失败')
+          this.wsConnected = false
+          resolve(false)
+        }
+        return
+      }
+
+      // 关闭之前的连接
+      if (this.ws) {
+        try {
+          this.ws.close()
+        } catch (e) {
+          console.error('[ASRService] 关闭WebSocket连接失败:', e)
+        }
+      }
+
+      // 创建新连接
+      try {
+        console.log('[ASRService] 正在连接WebSocket服务器...')
+        window.message.loading({ content: '正在连接语音识别服务...', key: 'ws-connect' })
+
+        this.ws = new WebSocket('ws://localhost:8080')
+        this.wsConnected = false
+        this.browserReady = false
+
+        this.ws.onopen = () => {
+          console.log('[ASRService] WebSocket连接成功')
+          window.message.success({ content: '语音识别服务连接成功', key: 'ws-connect' })
+          this.wsConnected = true
+          this.reconnectAttempt = 0
+          this.ws?.send(JSON.stringify({ type: 'identify', role: 'electron' }))
+          resolve(true)
+        }
+
+        this.ws.onclose = () => {
+          console.log('[ASRService] WebSocket连接关闭')
+          this.wsConnected = false
+          this.browserReady = false
+          this.attemptReconnect()
+        }
+
+        this.ws.onerror = (error) => {
+          console.error('[ASRService] WebSocket连接错误:', error)
+          this.wsConnected = false
+          window.message.error({ content: '语音识别服务连接失败', key: 'ws-connect' })
+          resolve(false)
+        }
+
+        this.ws.onmessage = this.handleWebSocketMessage
+      } catch (error) {
+        console.error('[ASRService] 创建WebSocket连接失败:', error)
+        window.message.error({ content: '语音识别服务连接失败', key: 'ws-connect' })
+        resolve(false)
+      }
+    })
+  }
+
+  /**
+   * 处理WebSocket消息
+   */
+  private handleWebSocketMessage = (event: MessageEvent) => {
+    try {
+      const data = JSON.parse(event.data)
+      console.log('[ASRService] 收到WebSocket消息:', data)
+
+      if (data.type === 'status') {
+        if (data.message === 'browser_ready' || data.message === 'Browser connected') {
+          console.log('[ASRService] 浏览器已准备好')
+          this.browserReady = true
+          window.message.success({ content: '语音识别浏览器已准备好', key: 'browser-status' })
+        } else if (data.message === 'Browser disconnected' || data.message === 'Browser connection error') {
+          console.log('[ASRService] 浏览器断开连接')
+          this.browserReady = false
+          window.message.error({ content: '语音识别浏览器断开连接', key: 'browser-status' })
+        }
+      } else if (data.type === 'status' && data.message === 'stopped') {
+        // 语音识别已停止
+        console.log('[ASRService] 语音识别已停止')
+        this.isRecording = false
+
+        // 如果没有收到最终结果，显示处理完成消息
+        window.message.success({ content: i18n.t('settings.asr.completed'), key: 'asr-processing' })
+
+        // 如果有回调函数，调用一次空字符串，触发按钮状态重置
+        if (this.resultCallback && typeof this.resultCallback === 'function') {
+          // 使用空字符串调用回调，不会影响输入框，但可以触发按钮状态重置
+          this.resultCallback('')
+        }
+      } else if (data.type === 'result' && data.data) {
+        // 处理识别结果
+        console.log('[ASRService] 收到识别结果:', data.data)
+        if (this.resultCallback && typeof this.resultCallback === 'function') {
+          // 只在收到最终结果时才调用回调
+          if (data.data.isFinal && data.data.text && data.data.text.trim()) {
+            console.log('[ASRService] 收到最终结果，调用回调函数，文本:', data.data.text)
+            this.resultCallback(data.data.text)
+            window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
+          } else if (!data.data.isFinal) {
+            // 非最终结果，只输出日志，不调用回调
+            console.log('[ASRService] 收到中间结果，文本:', data.data.text)
+          } else {
+            console.log('[ASRService] 识别结果为空，不调用回调')
+          }
+        } else {
+          console.warn('[ASRService] 没有设置结果回调函数')
+        }
+      } else if (data.type === 'error') {
+        console.error('[ASRService] 收到错误消息:', data.message || data.data)
+        window.message.error({ content: `语音识别错误: ${data.message || data.data?.error || '未知错误'}`, key: 'asr-error' })
+      }
+    } catch (error) {
+      console.error('[ASRService] 解析WebSocket消息失败:', error, event.data)
+    }
+  }
+
+  /**
+   * 尝试重新连接WebSocket服务器
+   */
+  private attemptReconnect = () => {
+    if (this.reconnectTimeout) {
+      clearTimeout(this.reconnectTimeout)
+      this.reconnectTimeout = null
+    }
+
+    if (this.reconnectAttempt >= this.maxReconnectAttempts) {
+      console.log('[ASRService] 达到最大重连次数，停止重连')
+      return
+    }
+
+    const delay = Math.min(1000 * Math.pow(2, this.reconnectAttempt), 30000)
+    console.log(`[ASRService] 将在 ${delay}ms 后尝试重连 (尝试 ${this.reconnectAttempt + 1}/${this.maxReconnectAttempts})`)
+
+    this.reconnectTimeout = setTimeout(() => {
+      this.reconnectAttempt++
+      this.connectToWebSocketServer().catch(console.error)
+    }, delay)
+  }
+
+  // 存储结果回调函数
+  resultCallback: ((text: string) => void) | null = null
+
+  startRecording = async (onTranscribed?: (text: string) => void): Promise<void> => {
+    try {
+      const { asrEnabled, asrServiceType } = store.getState().settings
+
+      if (!asrEnabled) {
+        window.message.error({ content: i18n.t('settings.asr.error.not_enabled'), key: 'asr-error' })
+        return
+      }
+
+      // 检查是否已经在录音
+      if (this.isRecording) {
+        console.log('已经在录音中，忽略此次请求')
+        return
+      }
+
+      // 如果是使用本地服务器
+      if (asrServiceType === 'local') {
+        // 连接WebSocket服务器
+        const connected = await this.connectToWebSocketServer()
+        if (!connected) {
+          throw new Error('无法连接到语音识别服务')
+        }
+
+        // 检查浏览器是否准备好
+        if (!this.browserReady) {
+          // 尝试等待浏览器准备好
+          let waitAttempts = 0
+          const maxWaitAttempts = 5
+
+          while (!this.browserReady && waitAttempts < maxWaitAttempts) {
+            window.message.loading({
+              content: `等待浏览器准备就绪 (${waitAttempts + 1}/${maxWaitAttempts})...`,
+              key: 'browser-status'
+            })
+
+            // 等待一秒
+            await new Promise(resolve => setTimeout(resolve, 1000))
+            waitAttempts++
+          }
+
+          if (!this.browserReady) {
+            window.message.warning({
+              content: '语音识别浏览器尚未准备好，请确保已打开浏览器页面',
+              key: 'browser-status'
+            })
+            throw new Error('浏览器尚未准备好')
+          }
+        }
+
+        // 保存回调函数（如果提供了）
+        if (onTranscribed && typeof onTranscribed === 'function') {
+          this.resultCallback = onTranscribed
+        }
+
+        // 发送开始命令
+        if (this.ws && this.wsConnected) {
+          this.ws.send(JSON.stringify({ type: 'start' }))
+          this.isRecording = true
+          console.log('开始语音识别')
+          window.message.info({ content: i18n.t('settings.asr.recording'), key: 'asr-recording' })
+        } else {
+          throw new Error('WebSocket连接未就绪')
+        }
+        return
+      }
+
+      // 以下是原有的录音逻辑（OpenAI或浏览器API）
+      // 请求麦克风权限
+      this.stream = await navigator.mediaDevices.getUserMedia({ audio: true })
+
+      // 创建MediaRecorder实例
+      this.mediaRecorder = new MediaRecorder(this.stream)
+
+      // 清空之前的录音数据
+      this.audioChunks = []
+
+      // 设置数据可用时的回调
+      this.mediaRecorder.ondataavailable = (event) => {
+        if (event.data.size > 0) {
+          this.audioChunks.push(event.data)
+        }
+      }
+
+      // 开始录音
+      this.mediaRecorder.start()
+      this.isRecording = true
+
+      console.log('开始录音')
+      window.message.info({ content: i18n.t('settings.asr.recording'), key: 'asr-recording' })
+    } catch (error) {
+      console.error('开始录音失败:', error)
+      window.message.error({
+        content: i18n.t('settings.asr.error.start_failed') + ': ' + (error as Error).message,
+        key: 'asr-error'
+      })
+      this.isRecording = false
+    }
+  }
+
+  /**
+   * 停止录音并转换为文本
+   * @param onTranscribed 转录完成后的回调函数
+   * @returns Promise<void>
+   */
+  stopRecording = async (onTranscribed: (text: string) => void): Promise<void> => {
+    const { asrServiceType } = store.getState().settings
+
+    // 如果是使用本地服务器
+    if (asrServiceType === 'local') {
+      if (!this.isRecording) {
+        console.log('没有正在进行的语音识别')
+        return
+      }
+
+      try {
+        // 保存回调函数
+        this.resultCallback = onTranscribed
+
+        // 发送停止命令
+        if (this.ws && this.wsConnected) {
+          this.ws.send(JSON.stringify({ type: 'stop' }))
+          console.log('停止语音识别')
+          window.message.loading({ content: i18n.t('settings.asr.processing'), key: 'asr-processing' })
+
+          // 立即调用回调函数，使按钮状态立即更新
+          if (onTranscribed) {
+            // 使用空字符串调用回调，不会影响输入框，但可以触发按钮状态重置
+            setTimeout(() => onTranscribed(''), 100)
+          }
+        } else {
+          throw new Error('WebSocket连接未就绪')
+        }
+
+        // 重置录音状态
+        this.isRecording = false
+      } catch (error) {
+        console.error('停止语音识别失败:', error)
+        window.message.error({
+          content: i18n.t('settings.asr.error.transcribe_failed') + ': ' + (error as Error).message,
+          key: 'asr-processing'
+        })
+        this.isRecording = false
+      }
+      return
+    }
+
+    // 以下是原有的录音停止逻辑（OpenAI或浏览器API）
+    if (!this.isRecording || !this.mediaRecorder) {
+      console.log('没有正在进行的录音')
+      return
+    }
+
+    try {
+      // 创建一个Promise，等待录音结束
+      const recordingEndedPromise = new Promise<Blob>((resolve) => {
+        if (this.mediaRecorder) {
+          this.mediaRecorder.onstop = () => {
+            // 将所有音频块合并为一个Blob
+            const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm' })
+            resolve(audioBlob)
+          }
+
+          // 停止录音
+          this.mediaRecorder.stop()
+        }
+      })
+
+      // 停止所有轨道
+      if (this.stream) {
+        this.stream.getTracks().forEach(track => track.stop())
+        this.stream = null
+      }
+
+      // 等待录音结束并获取音频Blob
+      const audioBlob = await recordingEndedPromise
+
+      // 重置录音状态
+      this.isRecording = false
+      this.mediaRecorder = null
+
+      console.log('录音结束，音频大小:', audioBlob.size, 'bytes')
+
+      // 显示处理中消息
+      window.message.loading({ content: i18n.t('settings.asr.processing'), key: 'asr-processing' })
+
+      if (asrServiceType === 'openai') {
+        // 使用OpenAI的Whisper API进行语音识别
+        await this.transcribeWithOpenAI(audioBlob, onTranscribed)
+      } else if (asrServiceType === 'browser') {
+        // 使用浏览器的Web Speech API进行语音识别
+        await this.transcribeWithBrowser(audioBlob, onTranscribed)
+      } else {
+        throw new Error(`不支持的ASR服务类型: ${asrServiceType}`)
+      }
+    } catch (error) {
+      console.error('停止录音或转录失败:', error)
+      window.message.error({
+        content: i18n.t('settings.asr.error.transcribe_failed') + ': ' + (error as Error).message,
+        key: 'asr-processing'
+      })
+
+      // 重置录音状态
+      this.isRecording = false
+      this.mediaRecorder = null
+      if (this.stream) {
+        this.stream.getTracks().forEach(track => track.stop())
+        this.stream = null
+      }
+    }
+  }
+
+  /**
+   * 使用OpenAI的Whisper API进行语音识别
+   * @param audioBlob 音频Blob
+   * @param onTranscribed 转录完成后的回调函数
+   * @returns Promise<void>
+   */
+  private transcribeWithOpenAI = async (audioBlob: Blob, onTranscribed: (text: string) => void): Promise<void> => {
+    try {
+      const { asrApiKey, asrApiUrl, asrModel } = store.getState().settings
+
+      if (!asrApiKey) {
+        throw new Error(i18n.t('settings.asr.error.no_api_key'))
+      }
+
+      // 创建FormData对象
+      const formData = new FormData()
+      formData.append('file', audioBlob, 'recording.webm')
+      formData.append('model', asrModel || 'whisper-1')
+
+      // 调用OpenAI API
+      const response = await fetch(asrApiUrl, {
+        method: 'POST',
+        headers: {
+          'Authorization': `Bearer ${asrApiKey}`
+        },
+        body: formData
+      })
+
+      if (!response.ok) {
+        const errorData = await response.json()
+        throw new Error(errorData.error?.message || 'OpenAI语音识别失败')
+      }
+
+      // 解析响应
+      const data = await response.json()
+      const transcribedText = data.text
+
+      if (transcribedText) {
+        console.log('语音识别成功:', transcribedText)
+        window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
+        onTranscribed(transcribedText)
+      } else {
+        throw new Error('未能识别出文本')
+      }
+    } catch (error) {
+      console.error('OpenAI语音识别失败:', error)
+      throw error
+    }
+  }
+
+  /**
+   * 使用浏览器的Web Speech API进行语音识别
+   * @param audioBlob 音频Blob
+   * @param onTranscribed 转录完成后的回调函数
+   * @returns Promise<void>
+   */
+  private transcribeWithBrowser = async (_audioBlob: Blob, onTranscribed: (text: string) => void): Promise<void> => {
+    try {
+      // 检查浏览器是否支持Web Speech API
+      if (!('webkitSpeechRecognition' in window) && !('SpeechRecognition' in window)) {
+        throw new Error(i18n.t('settings.asr.error.browser_not_support'))
+      }
+
+      // 由于Web Speech API不支持直接处理录制的音频，这里我们只是模拟一个成功的回调
+      // 实际上，使用Web Speech API时，应该直接使用SpeechRecognition对象进行实时识别
+      // 这里简化处理，实际项目中可能需要更复杂的实现
+      window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
+      onTranscribed('浏览器语音识别功能尚未完全实现')
+    } catch (error) {
+      console.error('浏览器语音识别失败:', error)
+      throw error
+    }
+  }
+
+  /**
+   * 检查是否正在录音
+   * @returns boolean
+   */
+  isCurrentlyRecording = (): boolean => {
+    return this.isRecording
+  }
+
+  /**
+   * 取消录音
+   */
+  cancelRecording = (): void => {
+    const { asrServiceType } = store.getState().settings
+
+    // 如果是使用本地服务器
+    if (asrServiceType === 'local') {
+      if (this.isRecording) {
+        // 发送停止命令
+        if (this.ws && this.wsConnected) {
+          this.ws.send(JSON.stringify({ type: 'stop' }))
+        }
+
+        // 重置状态
+        this.isRecording = false
+        this.resultCallback = null
+
+        console.log('语音识别已取消')
+        window.message.info({ content: i18n.t('settings.asr.canceled'), key: 'asr-recording' })
+      }
+      return
+    }
+
+    // 以下是原有的取消录音逻辑（OpenAI或浏览器API）
+    if (this.isRecording && this.mediaRecorder) {
+      // 停止MediaRecorder
+      this.mediaRecorder.stop()
+
+      // 停止所有轨道
+      if (this.stream) {
+        this.stream.getTracks().forEach(track => track.stop())
+        this.stream = null
+      }
+
+      // 重置状态
+      this.isRecording = false
+      this.mediaRecorder = null
+      this.audioChunks = []
+
+      console.log('录音已取消')
+      window.message.info({ content: i18n.t('settings.asr.canceled'), key: 'asr-recording' })
+    }
+  }
+
+  /**
+   * 关闭WebSocket连接
+   */
+  closeWebSocketConnection = (): void => {
+    if (this.ws) {
+      try {
+        this.ws.close()
+      } catch (e) {
+        console.error('[ASRService] 关闭WebSocket连接失败:', e)
+      }
+      this.ws = null
+    }
+
+    this.wsConnected = false
+    this.browserReady = false
+
+    if (this.reconnectTimeout) {
+      clearTimeout(this.reconnectTimeout)
+      this.reconnectTimeout = null
+    }
+  }
+
+  /**
+   * 打开浏览器页面
+   */
+  openBrowserPage = (): void => {
+    // 使用window.open打开浏览器页面
+    window.open('http://localhost:8080', '_blank')
+  }
+}
+
+// 创建单例实例
+const instance = new ASRService()
+export default instance
--- a/src/renderer/src/store/settings.ts
+++ b/src/renderer/src/store/settings.ts
@ -129,6 +129,12 @@ export interface SettingsState {
    filterHtmlTags: boolean // 过滤HTML标签
    maxTextLength: number // 最大文本长度
  }
+  // ASR配置（语音识别）
+  asrEnabled: boolean
+  asrServiceType: string // ASR服务类型：openai或browser
+  asrApiKey: string
+  asrApiUrl: string
+  asrModel: string
  // Quick Panel Triggers
  enableQuickPanelTriggers: boolean
  // Export Menu Options
@ -248,6 +254,12 @@ export const initialState: SettingsState = {
    filterHtmlTags: true, // 默认过滤HTML标签
    maxTextLength: 4000 // 默认最大文本长度
  },
+  // ASR配置（语音识别）
+  asrEnabled: false,
+  asrServiceType: 'openai', // 默认使用 OpenAI ASR
+  asrApiKey: '',
+  asrApiUrl: 'https://api.openai.com/v1/audio/transcriptions',
+  asrModel: 'whisper-1',
  // Quick Panel Triggers
  enableQuickPanelTriggers: false,
  // Export Menu Options
@ -628,6 +640,22 @@ const settingsSlice = createSlice({
        ...action.payload
      }
    },
+    // ASR相关的action
+    setAsrEnabled: (state, action: PayloadAction<boolean>) => {
+      state.asrEnabled = action.payload
+    },
+    setAsrServiceType: (state, action: PayloadAction<string>) => {
+      state.asrServiceType = action.payload
+    },
+    setAsrApiKey: (state, action: PayloadAction<string>) => {
+      state.asrApiKey = action.payload
+    },
+    setAsrApiUrl: (state, action: PayloadAction<string>) => {
+      state.asrApiUrl = action.payload
+    },
+    setAsrModel: (state, action: PayloadAction<string>) => {
+      state.asrModel = action.payload
+    },
    // Quick Panel Triggers action
    setEnableQuickPanelTriggers: (state, action: PayloadAction<boolean>) => {
      state.enableQuickPanelTriggers = action.payload
@ -736,7 +764,12 @@ export const {
  addTtsCustomModel,
  removeTtsCustomVoice,
  removeTtsCustomModel,
-  setTtsFilterOptions
+  setTtsFilterOptions,
+  setAsrEnabled,
+  setAsrServiceType,
+  setAsrApiKey,
+  setAsrApiUrl,
+  setAsrModel
 } = settingsSlice.actions

 export default settingsSlice.reducer
--- a/src/renderer/src/types/electron.d.ts
+++ b/src/renderer/src/types/electron.d.ts
@ -4,6 +4,21 @@ interface ObsidianAPI {
  getFolders: (vaultName: string) => Promise<Array<{ path: string; type: 'folder' | 'markdown'; name: string }>>
 }

+interface IpcRendererAPI {
+  invoke: (channel: string, ...args: any[]) => Promise<any>
+  on: (channel: string, listener: (...args: any[]) => void) => void
+  once: (channel: string, listener: (...args: any[]) => void) => void
+  removeListener: (channel: string, listener: (...args: any[]) => void) => void
+  removeAllListeners: (channel: string) => void
+  send: (channel: string, ...args: any[]) => void
+  sendSync: (channel: string, ...args: any[]) => any
+}
+
+interface ElectronAPI {
+  ipcRenderer: IpcRendererAPI
+}
+
 interface Window {
  obsidian: ObsidianAPI
+  electron: ElectronAPI
 }