diff --git a/src/main/mcpServers/__tests__/browser.test.ts b/src/main/mcpServers/__tests__/browser.test.ts index 712eaf94ea..800d03d7c5 100644 --- a/src/main/mcpServers/__tests__/browser.test.ts +++ b/src/main/mcpServers/__tests__/browser.test.ts @@ -1,5 +1,14 @@ import { describe, expect, it, vi } from 'vitest' +vi.mock('node:fs', () => ({ + default: { + existsSync: vi.fn(() => false), + mkdirSync: vi.fn() + }, + existsSync: vi.fn(() => false), + mkdirSync: vi.fn() +})) + vi.mock('electron', () => { const sendCommand = vi.fn(async (command: string, params?: { expression?: string }) => { if (command === 'Runtime.evaluate') { @@ -21,24 +30,31 @@ vi.mock('electron', () => { sendCommand } - const webContents = { + const createWebContents = () => ({ debugger: debuggerObj, setUserAgent: vi.fn(), getURL: vi.fn(() => 'https://example.com/'), getTitle: vi.fn(async () => 'Example Title'), + loadURL: vi.fn(async () => {}), once: vi.fn(), removeListener: vi.fn(), - on: vi.fn() - } - - const loadURL = vi.fn(async () => {}) + on: vi.fn(), + isDestroyed: vi.fn(() => false), + canGoBack: vi.fn(() => false), + canGoForward: vi.fn(() => false), + goBack: vi.fn(), + goForward: vi.fn(), + reload: vi.fn(), + executeJavaScript: vi.fn(async () => null), + setWindowOpenHandler: vi.fn() + }) const windows: any[] = [] + const views: any[] = [] class MockBrowserWindow { private destroyed = false - public webContents = webContents - public loadURL = loadURL + public webContents = createWebContents() public isDestroyed = vi.fn(() => this.destroyed) public close = vi.fn(() => { this.destroyed = true @@ -47,31 +63,58 @@ vi.mock('electron', () => { this.destroyed = true }) public on = vi.fn() + public setBrowserView = vi.fn() + public addBrowserView = vi.fn() + public removeBrowserView = vi.fn() + public getContentSize = vi.fn(() => [1200, 800]) + public show = vi.fn() constructor() { windows.push(this) } } + class MockBrowserView { + public webContents = createWebContents() + public setBounds = vi.fn() + public setAutoResize = vi.fn() + public destroy = vi.fn() + + constructor() { + views.push(this) + } + } + const app = { isReady: vi.fn(() => true), whenReady: vi.fn(async () => {}), - on: vi.fn() + on: vi.fn(), + getPath: vi.fn((key: string) => { + if (key === 'userData') return '/mock/userData' + if (key === 'temp') return '/tmp' + return '/mock/unknown' + }), + getAppPath: vi.fn(() => '/mock/app'), + setPath: vi.fn() + } + + const nativeTheme = { + on: vi.fn(), + shouldUseDarkColors: false } return { BrowserWindow: MockBrowserWindow as any, + BrowserView: MockBrowserView as any, app, + nativeTheme, __mockDebugger: debuggerObj, __mockSendCommand: sendCommand, - __mockLoadURL: loadURL, - __mockWindows: windows + __mockWindows: windows, + __mockViews: views } }) -import * as electron from 'electron' -const { __mockWindows } = electron as typeof electron & { __mockWindows: any[] } - import { CdpBrowserController } from '../browser' describe('CdpBrowserController', () => { @@ -81,54 +124,249 @@ describe('CdpBrowserController', () => { expect(result).toBe('ok') }) - it('opens a URL (hidden) and returns current page info', async () => { + it('opens a URL in normal mode and returns current page info', async () => { const controller = new CdpBrowserController() const result = await controller.open('https://foo.bar/', 5000, false) expect(result.currentUrl).toBe('https://example.com/') expect(result.title).toBe('Example Title') }) - it('opens a URL (visible) when show=true', async () => { + it('opens a URL in private mode', async () => { const controller = new CdpBrowserController() - const result = await controller.open('https://foo.bar/', 5000, true, 'session-a') + const result = await controller.open('https://foo.bar/', 5000, true) expect(result.currentUrl).toBe('https://example.com/') expect(result.title).toBe('Example Title') }) it('reuses session for execute and supports multiline', async () => { const controller = new CdpBrowserController() - await controller.open('https://foo.bar/', 5000, false, 'session-b') - const result = await controller.execute('const a=1; const b=2; a+b;', 5000, 'session-b') + await controller.open('https://foo.bar/', 5000, false) + const result = await controller.execute('const a=1; const b=2; a+b;', 5000, false) expect(result).toBe('ok') }) - it('evicts least recently used session when exceeding maxSessions', async () => { - const controller = new CdpBrowserController({ maxSessions: 2, idleTimeoutMs: 1000 * 60 }) - await controller.open('https://foo.bar/', 5000, false, 's1') - await controller.open('https://foo.bar/', 5000, false, 's2') - await controller.open('https://foo.bar/', 5000, false, 's3') - const destroyedCount = __mockWindows.filter( - (w: any) => w.destroy.mock.calls.length > 0 || w.close.mock.calls.length > 0 - ).length - expect(destroyedCount).toBeGreaterThanOrEqual(1) + it('normal and private modes are isolated', async () => { + const controller = new CdpBrowserController() + await controller.open('https://foo.bar/', 5000, false) + await controller.open('https://foo.bar/', 5000, true) + const normalResult = await controller.execute('1+1', 5000, false) + const privateResult = await controller.execute('1+1', 5000, true) + expect(normalResult).toBe('ok') + expect(privateResult).toBe('ok') }) - it('fetches URL and returns html format', async () => { + it('fetches URL and returns html format with tabId', async () => { const controller = new CdpBrowserController() const result = await controller.fetch('https://example.com/', 'html') - expect(result).toBe('

Test

Content

') + expect(result.tabId).toBeDefined() + expect(result.content).toBe('

Test

Content

') }) - it('fetches URL and returns txt format', async () => { + it('fetches URL and returns txt format with tabId', async () => { const controller = new CdpBrowserController() const result = await controller.fetch('https://example.com/', 'txt') - expect(result).toBe('Test\nContent') + expect(result.tabId).toBeDefined() + expect(result.content).toBe('Test\nContent') }) - it('fetches URL and returns markdown format (default)', async () => { + it('fetches URL and returns markdown format (default) with tabId', async () => { const controller = new CdpBrowserController() const result = await controller.fetch('https://example.com/') - expect(typeof result).toBe('string') - expect(result).toContain('Test') + expect(result.tabId).toBeDefined() + expect(typeof result.content).toBe('string') + expect(result.content).toContain('Test') + }) + + it('fetches URL in private mode with tabId', async () => { + const controller = new CdpBrowserController() + const result = await controller.fetch('https://example.com/', 'html', 10000, true) + expect(result.tabId).toBeDefined() + expect(result.content).toBe('

Test

Content

') + }) + + describe('Multi-tab support', () => { + it('creates new tab with newTab parameter', async () => { + const controller = new CdpBrowserController() + const result1 = await controller.open('https://site1.com/', 5000, false, true) + const result2 = await controller.open('https://site2.com/', 5000, false, true) + + expect(result1.tabId).toBeDefined() + expect(result2.tabId).toBeDefined() + expect(result1.tabId).not.toBe(result2.tabId) + }) + + it('reuses same tab without newTab parameter', async () => { + const controller = new CdpBrowserController() + const result1 = await controller.open('https://site1.com/', 5000, false) + const result2 = await controller.open('https://site2.com/', 5000, false) + + expect(result1.tabId).toBe(result2.tabId) + }) + + it('fetches in new tab with newTab parameter', async () => { + const controller = new CdpBrowserController() + await controller.open('https://example.com/', 5000, false) + const tabs = await controller.listTabs(false) + const initialTabCount = tabs.length + + await controller.fetch('https://other.com/', 'html', 10000, false, true) + const tabsAfter = await controller.listTabs(false) + + expect(tabsAfter.length).toBe(initialTabCount + 1) + }) + }) + + describe('Tab management', () => { + it('lists tabs in a window', async () => { + const controller = new CdpBrowserController() + await controller.open('https://example.com/', 5000, false) + + const tabs = await controller.listTabs(false) + expect(tabs.length).toBeGreaterThan(0) + expect(tabs[0].tabId).toBeDefined() + }) + + it('lists tabs separately for normal and private modes', async () => { + const controller = new CdpBrowserController() + await controller.open('https://example.com/', 5000, false) + await controller.open('https://example.com/', 5000, true) + + const normalTabs = await controller.listTabs(false) + const privateTabs = await controller.listTabs(true) + + expect(normalTabs.length).toBe(1) + expect(privateTabs.length).toBe(1) + expect(normalTabs[0].tabId).not.toBe(privateTabs[0].tabId) + }) + + it('closes specific tab', async () => { + const controller = new CdpBrowserController() + const result1 = await controller.open('https://site1.com/', 5000, false, true) + await controller.open('https://site2.com/', 5000, false, true) + + const tabsBefore = await controller.listTabs(false) + expect(tabsBefore.length).toBe(2) + + await controller.closeTab(false, result1.tabId) + + const tabsAfter = await controller.listTabs(false) + expect(tabsAfter.length).toBe(1) + expect(tabsAfter.find((t) => t.tabId === result1.tabId)).toBeUndefined() + }) + + it('switches active tab', async () => { + const controller = new CdpBrowserController() + const result1 = await controller.open('https://site1.com/', 5000, false, true) + const result2 = await controller.open('https://site2.com/', 5000, false, true) + + await controller.switchTab(false, result1.tabId) + await controller.switchTab(false, result2.tabId) + }) + + it('throws error when switching to non-existent tab', async () => { + const controller = new CdpBrowserController() + await controller.open('https://example.com/', 5000, false) + + await expect(controller.switchTab(false, 'non-existent-tab')).rejects.toThrow('Tab non-existent-tab not found') + }) + }) + + describe('Reset behavior', () => { + it('resets specific tab only', async () => { + const controller = new CdpBrowserController() + const result1 = await controller.open('https://site1.com/', 5000, false, true) + await controller.open('https://site2.com/', 5000, false, true) + + await controller.reset(false, result1.tabId) + + const tabs = await controller.listTabs(false) + expect(tabs.length).toBe(1) + }) + + it('resets specific window only', async () => { + const controller = new CdpBrowserController() + await controller.open('https://example.com/', 5000, false) + await controller.open('https://example.com/', 5000, true) + + await controller.reset(false) + + const normalTabs = await controller.listTabs(false) + const privateTabs = await controller.listTabs(true) + + expect(normalTabs.length).toBe(0) + expect(privateTabs.length).toBe(1) + }) + + it('resets all windows', async () => { + const controller = new CdpBrowserController() + await controller.open('https://example.com/', 5000, false) + await controller.open('https://example.com/', 5000, true) + + await controller.reset() + + const normalTabs = await controller.listTabs(false) + const privateTabs = await controller.listTabs(true) + + expect(normalTabs.length).toBe(0) + expect(privateTabs.length).toBe(0) + }) + }) + + describe('showWindow parameter', () => { + it('passes showWindow parameter through open', async () => { + const controller = new CdpBrowserController() + const result = await controller.open('https://example.com/', 5000, false, false, true) + expect(result.currentUrl).toBe('https://example.com/') + expect(result.tabId).toBeDefined() + }) + + it('passes showWindow parameter through fetch', async () => { + const controller = new CdpBrowserController() + const result = await controller.fetch('https://example.com/', 'html', 10000, false, false, true) + expect(result.tabId).toBeDefined() + expect(result.content).toBe('

Test

Content

') + }) + + it('passes showWindow parameter through createTab', async () => { + const controller = new CdpBrowserController() + const { tabId, view } = await controller.createTab(false, true) + expect(tabId).toBeDefined() + expect(view).toBeDefined() + }) + + it('shows existing window when showWindow=true on subsequent calls', async () => { + const controller = new CdpBrowserController() + // First call creates window + await controller.open('https://example.com/', 5000, false, false, false) + // Second call with showWindow=true should show existing window + const result = await controller.open('https://example.com/', 5000, false, false, true) + expect(result.currentUrl).toBe('https://example.com/') + }) + }) + + describe('Window limits and eviction', () => { + it('respects maxWindows limit', async () => { + const controller = new CdpBrowserController({ maxWindows: 1 }) + await controller.open('https://example.com/', 5000, false) + await controller.open('https://example.com/', 5000, true) + + const normalTabs = await controller.listTabs(false) + const privateTabs = await controller.listTabs(true) + + expect(privateTabs.length).toBe(1) + expect(normalTabs.length).toBe(0) + }) + + it('cleans up idle windows on next access', async () => { + const controller = new CdpBrowserController({ idleTimeoutMs: 1 }) + await controller.open('https://example.com/', 5000, false) + + await new Promise((r) => setTimeout(r, 10)) + + await controller.open('https://example.com/', 5000, true) + + const normalTabs = await controller.listTabs(false) + expect(normalTabs.length).toBe(0) + }) }) }) diff --git a/src/main/mcpServers/browser/README.md b/src/main/mcpServers/browser/README.md new file mode 100644 index 0000000000..27d1307782 --- /dev/null +++ b/src/main/mcpServers/browser/README.md @@ -0,0 +1,177 @@ +# Browser MCP Server + +A Model Context Protocol (MCP) server for controlling browser windows via Chrome DevTools Protocol (CDP). + +## Features + +### ✨ User Data Persistence +- **Normal mode (default)**: Cookies, localStorage, and sessionStorage persist across browser restarts +- **Private mode**: Ephemeral browsing - no data persists (like incognito mode) + +### 🔄 Window Management +- Two browsing modes: normal (persistent) and private (ephemeral) +- Lazy idle timeout cleanup (cleaned on next window access) +- Maximum window limits to prevent resource exhaustion + +> **Note**: Normal mode uses a global `persist:default` partition shared by all clients. This means login sessions and stored data are accessible to any code using the MCP server. + +## Architecture + +### How It Works +``` +Normal Mode (BrowserWindow) +├─ Persistent Storage (partition: persist:default) ← Global, shared across all clients +└─ Tabs (BrowserView) ← created via newTab or automatically + +Private Mode (BrowserWindow) +├─ Ephemeral Storage (partition: private) ← No disk persistence +└─ Tabs (BrowserView) ← created via newTab or automatically +``` + +- **One Window Per Mode**: Normal and private modes each have their own window +- **Multi-Tab Support**: Use `newTab: true` for parallel URL requests +- **Storage Isolation**: Normal and private modes have completely separate storage + +## Available Tools + +### `open` +Open a URL in a browser window. Optionally return page content. +```json +{ + "url": "https://example.com", + "format": "markdown", + "timeout": 10000, + "privateMode": false, + "newTab": false, + "showWindow": false +} +``` +- `format`: If set (`html`, `txt`, `markdown`, `json`), returns page content in that format along with tabId. If not set, just opens the page and returns navigation info. +- `newTab`: Set to `true` to open in a new tab (required for parallel requests) +- `showWindow`: Set to `true` to display the browser window (useful for debugging) +- Returns (without format): `{ currentUrl, title, tabId }` +- Returns (with format): `{ tabId, content }` where content is in the specified format + +### `execute` +Execute JavaScript code in the page context. +```json +{ + "code": "document.title", + "timeout": 5000, + "privateMode": false, + "tabId": "optional-tab-id" +} +``` +- `tabId`: Target a specific tab (from `open` response) + +### `reset` +Reset browser windows and tabs. +```json +{ + "privateMode": false, + "tabId": "optional-tab-id" +} +``` +- Omit all parameters to close all windows +- Set `privateMode` to close a specific window +- Set both `privateMode` and `tabId` to close a specific tab only + +## Usage Examples + +### Basic Navigation +```typescript +// Open a URL in normal mode (data persists) +await controller.open('https://example.com') +``` + +### Fetch Page Content +```typescript +// Open URL and get content as markdown +await open({ url: 'https://example.com', format: 'markdown' }) + +// Open URL and get raw HTML +await open({ url: 'https://example.com', format: 'html' }) +``` + +### Multi-Tab / Parallel Requests +```typescript +// Open multiple URLs in parallel using newTab +const [page1, page2] = await Promise.all([ + controller.open('https://site1.com', 10000, false, true), // newTab: true + controller.open('https://site2.com', 10000, false, true) // newTab: true +]) + +// Execute on specific tab +await controller.execute('document.title', 5000, false, page1.tabId) + +// Close specific tab when done +await controller.reset(false, page1.tabId) +``` + +### Private Browsing +```typescript +// Open a URL in private mode (no data persistence) +await controller.open('https://example.com', 10000, true) + +// Cookies and localStorage won't persist after reset +``` + +### Data Persistence (Normal Mode) +```typescript +// Set data +await controller.open('https://example.com', 10000, false) +await controller.execute('localStorage.setItem("key", "value")', 5000, false) + +// Close window +await controller.reset(false) + +// Reopen - data persists! +await controller.open('https://example.com', 10000, false) +const value = await controller.execute('localStorage.getItem("key")', 5000, false) +// Returns: "value" +``` + +### No Persistence (Private Mode) +```typescript +// Set data in private mode +await controller.open('https://example.com', 10000, true) +await controller.execute('localStorage.setItem("key", "value")', 5000, true) + +// Close private window +await controller.reset(true) + +// Reopen - data is gone! +await controller.open('https://example.com', 10000, true) +const value = await controller.execute('localStorage.getItem("key")', 5000, true) +// Returns: null +``` + +## Configuration + +```typescript +const controller = new CdpBrowserController({ + maxWindows: 5, // Maximum concurrent windows + idleTimeoutMs: 5 * 60 * 1000 // 5 minutes idle timeout (lazy cleanup) +}) +``` + +> **Note on Idle Timeout**: Idle windows are cleaned up lazily when the next window is created or accessed, not on a background timer. + +## Best Practices + +1. **Use Normal Mode for Authentication**: When you need to stay logged in across sessions +2. **Use Private Mode for Sensitive Operations**: When you don't want data to persist +3. **Use `newTab: true` for Parallel Requests**: Avoid race conditions when fetching multiple URLs +4. **Resource Cleanup**: Call `reset()` when done, or `reset(privateMode, tabId)` to close specific tabs +5. **Error Handling**: All tool handlers return error responses on failure +6. **Timeout Configuration**: Adjust timeouts based on page complexity + +## Technical Details + +- **CDP Version**: 1.3 +- **User Agent**: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:145.0) Gecko/20100101 Firefox/145.0 +- **Storage**: + - Normal mode: `persist:default` (disk-persisted, global) + - Private mode: `private` (memory only) +- **Window Size**: 1200x800 (default) +- **Visibility**: Windows hidden by default (use `showWindow: true` to display) diff --git a/src/main/mcpServers/browser/constants.ts b/src/main/mcpServers/browser/constants.ts new file mode 100644 index 0000000000..2b10943f8e --- /dev/null +++ b/src/main/mcpServers/browser/constants.ts @@ -0,0 +1,3 @@ +export const TAB_BAR_HEIGHT = 92 // Height for Chrome-style tab bar (42px) + address bar (50px) +export const SESSION_KEY_DEFAULT = 'default' +export const SESSION_KEY_PRIVATE = 'private' diff --git a/src/main/mcpServers/browser/controller.ts b/src/main/mcpServers/browser/controller.ts index 6246da45d2..9e0f5220ca 100644 --- a/src/main/mcpServers/browser/controller.ts +++ b/src/main/mcpServers/browser/controller.ts @@ -1,20 +1,49 @@ -import { app, BrowserWindow } from 'electron' +import { titleBarOverlayDark, titleBarOverlayLight } from '@main/config' +import { isMac } from '@main/constant' +import { randomUUID } from 'crypto' +import { app, BrowserView, BrowserWindow, nativeTheme } from 'electron' import TurndownService from 'turndown' -import { logger, userAgent } from './types' +import { SESSION_KEY_DEFAULT, SESSION_KEY_PRIVATE, TAB_BAR_HEIGHT } from './constants' +import { TAB_BAR_HTML } from './tabbar-html' +import { logger, type TabInfo, userAgent, type WindowInfo } from './types' /** * Controller for managing browser windows via Chrome DevTools Protocol (CDP). - * Supports multiple sessions with LRU eviction and idle timeout cleanup. + * Supports two modes: normal (persistent) and private (ephemeral). + * Normal mode persists user data (cookies, localStorage, etc.) globally across all clients. + * Private mode is ephemeral - data is cleared when the window closes. */ export class CdpBrowserController { - private windows: Map = new Map() - private readonly maxSessions: number + private windows: Map = new Map() + private readonly maxWindows: number private readonly idleTimeoutMs: number + private readonly turndownService: TurndownService - constructor(options?: { maxSessions?: number; idleTimeoutMs?: number }) { - this.maxSessions = options?.maxSessions ?? 5 + constructor(options?: { maxWindows?: number; idleTimeoutMs?: number }) { + this.maxWindows = options?.maxWindows ?? 5 this.idleTimeoutMs = options?.idleTimeoutMs ?? 5 * 60 * 1000 + this.turndownService = new TurndownService() + + // Listen for theme changes and update all tab bars + nativeTheme.on('updated', () => { + const isDark = nativeTheme.shouldUseDarkColors + for (const windowInfo of this.windows.values()) { + if (windowInfo.tabBarView && !windowInfo.tabBarView.webContents.isDestroyed()) { + windowInfo.tabBarView.webContents.executeJavaScript(`window.setTheme(${isDark})`).catch(() => { + // Ignore errors if tab bar is not ready + }) + } + } + }) + } + + private getWindowKey(privateMode: boolean): string { + return privateMode ? SESSION_KEY_PRIVATE : SESSION_KEY_DEFAULT + } + + private getPartition(privateMode: boolean): string { + return privateMode ? SESSION_KEY_PRIVATE : `persist:${SESSION_KEY_DEFAULT}` } private async ensureAppReady() { @@ -23,28 +52,50 @@ export class CdpBrowserController { } } - private touch(sessionId: string) { - const entry = this.windows.get(sessionId) - if (entry) entry.lastActive = Date.now() + private touchWindow(windowKey: string) { + const windowInfo = this.windows.get(windowKey) + if (windowInfo) windowInfo.lastActive = Date.now() } - private closeWindow(win: BrowserWindow, sessionId: string) { - try { - if (!win.isDestroyed()) { - if (win.webContents.debugger.isAttached()) { - win.webContents.debugger.detach() - } - win.close() - } - } catch (error) { - logger.warn('Error closing window', { error, sessionId }) + private touchTab(windowKey: string, tabId: string) { + const windowInfo = this.windows.get(windowKey) + if (windowInfo) { + const tab = windowInfo.tabs.get(tabId) + if (tab) tab.lastActive = Date.now() + windowInfo.lastActive = Date.now() } } - private async ensureDebuggerAttached(dbg: Electron.Debugger, sessionId: string) { + private closeTabInternal(windowInfo: WindowInfo, tabId: string) { + try { + const tab = windowInfo.tabs.get(tabId) + if (!tab) return + + if (!tab.view.webContents.isDestroyed()) { + if (tab.view.webContents.debugger.isAttached()) { + tab.view.webContents.debugger.detach() + } + } + + // Remove view from window + if (!windowInfo.window.isDestroyed()) { + windowInfo.window.removeBrowserView(tab.view) + } + + // Destroy the view using safe cast + const viewWithDestroy = tab.view as BrowserView & { destroy?: () => void } + if (viewWithDestroy.destroy) { + viewWithDestroy.destroy() + } + } catch (error) { + logger.warn('Error closing tab', { error, windowKey: windowInfo.windowKey, tabId }) + } + } + + private async ensureDebuggerAttached(dbg: Electron.Debugger, sessionKey: string) { if (!dbg.isAttached()) { try { - logger.info('Attaching debugger', { sessionId }) + logger.info('Attaching debugger', { sessionKey }) dbg.attach('1.3') await dbg.sendCommand('Page.enable') await dbg.sendCommand('Runtime.enable') @@ -58,110 +109,514 @@ export class CdpBrowserController { private sweepIdle() { const now = Date.now() - for (const [id, entry] of this.windows.entries()) { - if (now - entry.lastActive > this.idleTimeoutMs) { - this.closeWindow(entry.win, id) - this.windows.delete(id) + const windowKeys = Array.from(this.windows.keys()) + for (const windowKey of windowKeys) { + const windowInfo = this.windows.get(windowKey) + if (!windowInfo) continue + if (now - windowInfo.lastActive > this.idleTimeoutMs) { + const tabIds = Array.from(windowInfo.tabs.keys()) + for (const tabId of tabIds) { + this.closeTabInternal(windowInfo, tabId) + } + if (!windowInfo.window.isDestroyed()) { + windowInfo.window.close() + } + this.windows.delete(windowKey) } } } - private evictIfNeeded(newSessionId: string) { - if (this.windows.size < this.maxSessions) return - let lruId: string | null = null + private evictIfNeeded(newWindowKey: string) { + if (this.windows.size < this.maxWindows) return + let lruKey: string | null = null let lruTime = Number.POSITIVE_INFINITY - for (const [id, entry] of this.windows.entries()) { - if (id === newSessionId) continue - if (entry.lastActive < lruTime) { - lruTime = entry.lastActive - lruId = id + for (const [key, windowInfo] of this.windows.entries()) { + if (key === newWindowKey) continue + if (windowInfo.lastActive < lruTime) { + lruTime = windowInfo.lastActive + lruKey = key } } - if (lruId) { - const entry = this.windows.get(lruId) - if (entry) { - this.closeWindow(entry.win, lruId) + if (lruKey) { + const windowInfo = this.windows.get(lruKey) + if (windowInfo) { + for (const [tabId] of windowInfo.tabs.entries()) { + this.closeTabInternal(windowInfo, tabId) + } + if (!windowInfo.window.isDestroyed()) { + windowInfo.window.close() + } } - this.windows.delete(lruId) - logger.info('Evicted session to respect maxSessions', { evicted: lruId }) + this.windows.delete(lruKey) + logger.info('Evicted window to respect maxWindows', { evicted: lruKey }) } } - private async getWindow(sessionId = 'default', forceNew = false, show = false): Promise { + private sendTabBarUpdate(windowInfo: WindowInfo) { + if (!windowInfo.tabBarView || !windowInfo.tabBarView.webContents || windowInfo.tabBarView.webContents.isDestroyed()) + return + + const tabs = Array.from(windowInfo.tabs.values()).map((tab) => ({ + id: tab.id, + title: tab.title || 'New Tab', + url: tab.url, + isActive: tab.id === windowInfo.activeTabId + })) + + let activeUrl = '' + let canGoBack = false + let canGoForward = false + + if (windowInfo.activeTabId) { + const activeTab = windowInfo.tabs.get(windowInfo.activeTabId) + if (activeTab && !activeTab.view.webContents.isDestroyed()) { + activeUrl = activeTab.view.webContents.getURL() + canGoBack = activeTab.view.webContents.canGoBack() + canGoForward = activeTab.view.webContents.canGoForward() + } + } + + const script = `window.updateTabs(${JSON.stringify(tabs)}, ${JSON.stringify(activeUrl)}, ${canGoBack}, ${canGoForward})` + windowInfo.tabBarView.webContents.executeJavaScript(script).catch((error) => { + logger.debug('Tab bar update failed', { error, windowKey: windowInfo.windowKey }) + }) + } + + private handleNavigateAction(windowInfo: WindowInfo, url: string) { + if (!windowInfo.activeTabId) return + const activeTab = windowInfo.tabs.get(windowInfo.activeTabId) + if (!activeTab || activeTab.view.webContents.isDestroyed()) return + + let finalUrl = url.trim() + if (!/^https?:\/\//i.test(finalUrl)) { + if (/^[a-zA-Z0-9][a-zA-Z0-9-]*\.[a-zA-Z]{2,}/.test(finalUrl) || finalUrl.includes('.')) { + finalUrl = 'https://' + finalUrl + } else { + finalUrl = 'https://www.google.com/search?q=' + encodeURIComponent(finalUrl) + } + } + + activeTab.view.webContents.loadURL(finalUrl).catch((error) => { + logger.warn('Navigation failed in tab bar', { error, url: finalUrl, tabId: windowInfo.activeTabId }) + }) + } + + private handleBackAction(windowInfo: WindowInfo) { + if (!windowInfo.activeTabId) return + const activeTab = windowInfo.tabs.get(windowInfo.activeTabId) + if (!activeTab || activeTab.view.webContents.isDestroyed()) return + + if (activeTab.view.webContents.canGoBack()) { + activeTab.view.webContents.goBack() + } + } + + private handleForwardAction(windowInfo: WindowInfo) { + if (!windowInfo.activeTabId) return + const activeTab = windowInfo.tabs.get(windowInfo.activeTabId) + if (!activeTab || activeTab.view.webContents.isDestroyed()) return + + if (activeTab.view.webContents.canGoForward()) { + activeTab.view.webContents.goForward() + } + } + + private handleRefreshAction(windowInfo: WindowInfo) { + if (!windowInfo.activeTabId) return + const activeTab = windowInfo.tabs.get(windowInfo.activeTabId) + if (!activeTab || activeTab.view.webContents.isDestroyed()) return + + activeTab.view.webContents.reload() + } + + private setupTabBarMessageHandler(windowInfo: WindowInfo) { + if (!windowInfo.tabBarView) return + + windowInfo.tabBarView.webContents.on('console-message', (_event, _level, message) => { + try { + const parsed = JSON.parse(message) + if (parsed?.channel === 'tabbar-action' && parsed?.payload) { + this.handleTabBarAction(windowInfo, parsed.payload) + } + } catch { + // Not a JSON message, ignore + } + }) + + windowInfo.tabBarView.webContents + .executeJavaScript(` + (function() { + window.addEventListener('message', function(e) { + if (e.data && e.data.channel === 'tabbar-action') { + console.log(JSON.stringify(e.data)); + } + }); + })(); + `) + .catch((error) => { + logger.debug('Tab bar message handler setup failed', { error, windowKey: windowInfo.windowKey }) + }) + } + + private handleTabBarAction(windowInfo: WindowInfo, action: { type: string; tabId?: string; url?: string }) { + if (action.type === 'switch' && action.tabId) { + this.switchTab(windowInfo.privateMode, action.tabId).catch((error) => { + logger.warn('Tab switch failed', { error, tabId: action.tabId, windowKey: windowInfo.windowKey }) + }) + } else if (action.type === 'close' && action.tabId) { + this.closeTab(windowInfo.privateMode, action.tabId).catch((error) => { + logger.warn('Tab close failed', { error, tabId: action.tabId, windowKey: windowInfo.windowKey }) + }) + } else if (action.type === 'new') { + this.createTab(windowInfo.privateMode, true) + .then(({ tabId }) => this.switchTab(windowInfo.privateMode, tabId)) + .catch((error) => { + logger.warn('New tab creation failed', { error, windowKey: windowInfo.windowKey }) + }) + } else if (action.type === 'navigate' && action.url) { + this.handleNavigateAction(windowInfo, action.url) + } else if (action.type === 'back') { + this.handleBackAction(windowInfo) + } else if (action.type === 'forward') { + this.handleForwardAction(windowInfo) + } else if (action.type === 'refresh') { + this.handleRefreshAction(windowInfo) + } else if (action.type === 'window-minimize') { + if (!windowInfo.window.isDestroyed()) { + windowInfo.window.minimize() + } + } else if (action.type === 'window-maximize') { + if (!windowInfo.window.isDestroyed()) { + if (windowInfo.window.isMaximized()) { + windowInfo.window.unmaximize() + } else { + windowInfo.window.maximize() + } + } + } else if (action.type === 'window-close') { + if (!windowInfo.window.isDestroyed()) { + windowInfo.window.close() + } + } + } + + private createTabBarView(windowInfo: WindowInfo): BrowserView { + const tabBarView = new BrowserView({ + webPreferences: { + contextIsolation: false, + sandbox: false, + nodeIntegration: false + } + }) + + windowInfo.window.addBrowserView(tabBarView) + const [width] = windowInfo.window.getContentSize() + tabBarView.setBounds({ x: 0, y: 0, width, height: TAB_BAR_HEIGHT }) + tabBarView.setAutoResize({ width: true, height: false }) + tabBarView.webContents.loadURL(`data:text/html;charset=utf-8,${encodeURIComponent(TAB_BAR_HTML)}`) + + tabBarView.webContents.on('did-finish-load', () => { + // Initialize platform for proper styling + const platform = isMac ? 'mac' : process.platform === 'win32' ? 'win' : 'linux' + tabBarView.webContents.executeJavaScript(`window.initPlatform('${platform}')`).catch((error) => { + logger.debug('Platform init failed', { error, windowKey: windowInfo.windowKey }) + }) + // Initialize theme + const isDark = nativeTheme.shouldUseDarkColors + tabBarView.webContents.executeJavaScript(`window.setTheme(${isDark})`).catch((error) => { + logger.debug('Theme init failed', { error, windowKey: windowInfo.windowKey }) + }) + this.setupTabBarMessageHandler(windowInfo) + this.sendTabBarUpdate(windowInfo) + }) + + return tabBarView + } + + private async createBrowserWindow( + windowKey: string, + privateMode: boolean, + showWindow = false + ): Promise { await this.ensureAppReady() - this.sweepIdle() - - const existing = this.windows.get(sessionId) - if (existing && !existing.win.isDestroyed() && !forceNew) { - this.touch(sessionId) - return existing.win - } - - if (existing && !existing.win.isDestroyed() && forceNew) { - try { - if (existing.win.webContents.debugger.isAttached()) { - existing.win.webContents.debugger.detach() - } - } catch (error) { - logger.warn('Error detaching debugger before recreate', { error, sessionId }) - } - existing.win.destroy() - this.windows.delete(sessionId) - } - - this.evictIfNeeded(sessionId) + const partition = this.getPartition(privateMode) const win = new BrowserWindow({ - show, + show: showWindow, + width: 1200, + height: 800, + ...(isMac + ? { + titleBarStyle: 'hidden', + titleBarOverlay: nativeTheme.shouldUseDarkColors ? titleBarOverlayDark : titleBarOverlayLight, + trafficLightPosition: { x: 8, y: 13 } + } + : { + frame: false // Frameless window for Windows and Linux + }), webPreferences: { contextIsolation: true, sandbox: true, nodeIntegration: false, - devTools: true + devTools: true, + partition } }) - // Use a standard Chrome UA to avoid some anti-bot blocks - win.webContents.setUserAgent(userAgent) - - // Log navigation lifecycle to help diagnose slow loads - win.webContents.on('did-start-loading', () => logger.info(`did-start-loading`, { sessionId })) - win.webContents.on('dom-ready', () => logger.info(`dom-ready`, { sessionId })) - win.webContents.on('did-finish-load', () => logger.info(`did-finish-load`, { sessionId })) - win.webContents.on('did-fail-load', (_e, code, desc) => logger.warn('Navigation failed', { code, desc })) - win.on('closed', () => { - this.windows.delete(sessionId) + const windowInfo = this.windows.get(windowKey) + if (windowInfo) { + const tabIds = Array.from(windowInfo.tabs.keys()) + for (const tabId of tabIds) { + this.closeTabInternal(windowInfo, tabId) + } + this.windows.delete(windowKey) + } }) - this.windows.set(sessionId, { win, lastActive: Date.now() }) return win } + private async getOrCreateWindow(privateMode: boolean, showWindow = false): Promise { + await this.ensureAppReady() + this.sweepIdle() + + const windowKey = this.getWindowKey(privateMode) + + let windowInfo = this.windows.get(windowKey) + if (!windowInfo) { + this.evictIfNeeded(windowKey) + const window = await this.createBrowserWindow(windowKey, privateMode, showWindow) + windowInfo = { + windowKey, + privateMode, + window, + tabs: new Map(), + activeTabId: null, + lastActive: Date.now(), + tabBarView: undefined + } + this.windows.set(windowKey, windowInfo) + const tabBarView = this.createTabBarView(windowInfo) + windowInfo.tabBarView = tabBarView + + // Register resize listener once per window (not per tab) + // Capture windowKey to look up fresh windowInfo on each resize + windowInfo.window.on('resize', () => { + const info = this.windows.get(windowKey) + if (info) this.updateViewBounds(info) + }) + + logger.info('Created new window', { windowKey, privateMode }) + } else if (showWindow && !windowInfo.window.isDestroyed()) { + windowInfo.window.show() + } + + this.touchWindow(windowKey) + return windowInfo + } + + private updateViewBounds(windowInfo: WindowInfo) { + if (windowInfo.window.isDestroyed()) return + + const [width, height] = windowInfo.window.getContentSize() + + // Update tab bar bounds + if (windowInfo.tabBarView && !windowInfo.tabBarView.webContents.isDestroyed()) { + windowInfo.tabBarView.setBounds({ x: 0, y: 0, width, height: TAB_BAR_HEIGHT }) + } + + // Update active tab view bounds + if (windowInfo.activeTabId) { + const activeTab = windowInfo.tabs.get(windowInfo.activeTabId) + if (activeTab && !activeTab.view.webContents.isDestroyed()) { + activeTab.view.setBounds({ + x: 0, + y: TAB_BAR_HEIGHT, + width, + height: Math.max(0, height - TAB_BAR_HEIGHT) + }) + } + } + } + + /** + * Creates a new tab in the window + * @param privateMode - If true, uses private browsing mode (default: false) + * @param showWindow - If true, shows the browser window (default: false) + * @returns Tab ID and view + */ + public async createTab(privateMode = false, showWindow = false): Promise<{ tabId: string; view: BrowserView }> { + const windowInfo = await this.getOrCreateWindow(privateMode, showWindow) + const tabId = randomUUID() + const partition = this.getPartition(privateMode) + + const view = new BrowserView({ + webPreferences: { + contextIsolation: true, + sandbox: true, + nodeIntegration: false, + devTools: true, + partition + } + }) + + view.webContents.setUserAgent(userAgent) + + const windowKey = windowInfo.windowKey + view.webContents.on('did-start-loading', () => logger.info(`did-start-loading`, { windowKey, tabId })) + view.webContents.on('dom-ready', () => logger.info(`dom-ready`, { windowKey, tabId })) + view.webContents.on('did-finish-load', () => logger.info(`did-finish-load`, { windowKey, tabId })) + view.webContents.on('did-fail-load', (_e, code, desc) => logger.warn('Navigation failed', { code, desc })) + + view.webContents.on('destroyed', () => { + windowInfo.tabs.delete(tabId) + if (windowInfo.activeTabId === tabId) { + windowInfo.activeTabId = windowInfo.tabs.keys().next().value ?? null + if (windowInfo.activeTabId) { + const newActiveTab = windowInfo.tabs.get(windowInfo.activeTabId) + if (newActiveTab && !windowInfo.window.isDestroyed()) { + windowInfo.window.addBrowserView(newActiveTab.view) + this.updateViewBounds(windowInfo) + } + } + } + this.sendTabBarUpdate(windowInfo) + }) + + view.webContents.on('page-title-updated', (_event, title) => { + tabInfo.title = title + this.sendTabBarUpdate(windowInfo) + }) + + view.webContents.on('did-navigate', (_event, url) => { + tabInfo.url = url + this.sendTabBarUpdate(windowInfo) + }) + + view.webContents.on('did-navigate-in-page', (_event, url) => { + tabInfo.url = url + this.sendTabBarUpdate(windowInfo) + }) + + // Handle new window requests (e.g., target="_blank" links) - open in new tab instead + view.webContents.setWindowOpenHandler(({ url }) => { + // Create a new tab and navigate to the URL + this.createTab(privateMode, true) + .then(({ tabId: newTabId }) => { + return this.switchTab(privateMode, newTabId).then(() => { + const newTab = windowInfo.tabs.get(newTabId) + if (newTab && !newTab.view.webContents.isDestroyed()) { + newTab.view.webContents.loadURL(url) + } + }) + }) + .catch((error) => { + logger.warn('Failed to open link in new tab', { error, url }) + }) + return { action: 'deny' } + }) + + const tabInfo: TabInfo = { + id: tabId, + view, + url: '', + title: '', + lastActive: Date.now() + } + + windowInfo.tabs.set(tabId, tabInfo) + + // Set as active tab and add to window + if (!windowInfo.activeTabId || windowInfo.tabs.size === 1) { + windowInfo.activeTabId = tabId + windowInfo.window.addBrowserView(view) + this.updateViewBounds(windowInfo) + } + + this.sendTabBarUpdate(windowInfo) + logger.info('Created new tab', { windowKey, tabId, privateMode }) + return { tabId, view } + } + + /** + * Gets an existing tab or creates a new one + * @param privateMode - Whether to use private browsing mode + * @param tabId - Optional specific tab ID to use + * @param newTab - If true, always create a new tab (useful for parallel requests) + * @param showWindow - If true, shows the browser window (default: false) + */ + private async getTab( + privateMode: boolean, + tabId?: string, + newTab?: boolean, + showWindow = false + ): Promise<{ tabId: string; tab: TabInfo }> { + const windowInfo = await this.getOrCreateWindow(privateMode, showWindow) + + // If newTab is requested, create a fresh tab + if (newTab) { + const { tabId: freshTabId } = await this.createTab(privateMode, showWindow) + const tab = windowInfo.tabs.get(freshTabId) + if (!tab) { + throw new Error(`Tab ${freshTabId} was created but not found - it may have been closed`) + } + return { tabId: freshTabId, tab } + } + + if (tabId) { + const tab = windowInfo.tabs.get(tabId) + if (tab && !tab.view.webContents.isDestroyed()) { + this.touchTab(windowInfo.windowKey, tabId) + return { tabId, tab } + } + } + + // Use active tab or create new one + if (windowInfo.activeTabId) { + const activeTab = windowInfo.tabs.get(windowInfo.activeTabId) + if (activeTab && !activeTab.view.webContents.isDestroyed()) { + this.touchTab(windowInfo.windowKey, windowInfo.activeTabId) + return { tabId: windowInfo.activeTabId, tab: activeTab } + } + } + + // Create new tab + const { tabId: newTabId } = await this.createTab(privateMode, showWindow) + const tab = windowInfo.tabs.get(newTabId) + if (!tab) { + throw new Error(`Tab ${newTabId} was created but not found - it may have been closed`) + } + return { tabId: newTabId, tab } + } + /** * Opens a URL in a browser window and waits for navigation to complete. * @param url - The URL to navigate to * @param timeout - Navigation timeout in milliseconds (default: 10000) - * @param show - Whether to show the browser window (default: false) - * @param sessionId - Session identifier for window reuse (default: 'default') - * @returns Object containing the current URL and page title after navigation + * @param privateMode - If true, uses private browsing mode (default: false) + * @param newTab - If true, always creates a new tab (useful for parallel requests) + * @param showWindow - If true, shows the browser window (default: false) + * @returns Object containing the current URL, page title, and tab ID after navigation */ - public async open(url: string, timeout = 10000, show = false, sessionId = 'default') { - const win = await this.getWindow(sessionId, true, show) - logger.info('Loading URL', { url, sessionId }) - const { webContents } = win - this.touch(sessionId) + public async open(url: string, timeout = 10000, privateMode = false, newTab = false, showWindow = false) { + const { tabId: actualTabId, tab } = await this.getTab(privateMode, undefined, newTab, showWindow) + const view = tab.view + const windowKey = this.getWindowKey(privateMode) + + logger.info('Loading URL', { url, windowKey, tabId: actualTabId, privateMode }) + const { webContents } = view + this.touchTab(windowKey, actualTabId) - // Track resolution state to prevent multiple handlers from firing let resolved = false + let timeoutHandle: ReturnType | undefined let onFinish: () => void let onDomReady: () => void let onFail: (_event: Electron.Event, code: number, desc: string) => void - // Define cleanup outside Promise to ensure it's callable in finally block, - // preventing memory leaks when timeout occurs before navigation completes const cleanup = () => { + if (timeoutHandle) clearTimeout(timeoutHandle) webContents.removeListener('did-finish-load', onFinish) webContents.removeListener('did-fail-load', onFail) webContents.removeListener('dom-ready', onDomReady) @@ -192,67 +647,134 @@ export class CdpBrowserController { }) const timeoutPromise = new Promise((_, reject) => { - setTimeout(() => reject(new Error('Navigation timed out')), timeout) + timeoutHandle = setTimeout(() => reject(new Error('Navigation timed out')), timeout) }) try { - await Promise.race([win.loadURL(url), loadPromise, timeoutPromise]) + await Promise.race([view.webContents.loadURL(url), loadPromise, timeoutPromise]) } finally { - // Always cleanup listeners to prevent memory leaks on timeout cleanup() } const currentUrl = webContents.getURL() const title = await webContents.getTitle() - return { currentUrl, title } + + // Update tab info + tab.url = currentUrl + tab.title = title + + return { currentUrl, title, tabId: actualTabId } } - public async execute(code: string, timeout = 5000, sessionId = 'default') { - const win = await this.getWindow(sessionId) - this.touch(sessionId) - const dbg = win.webContents.debugger + /** + * Executes JavaScript code in the page context using Chrome DevTools Protocol. + * @param code - JavaScript code to evaluate in the page + * @param timeout - Execution timeout in milliseconds (default: 5000) + * @param privateMode - If true, targets the private browsing window (default: false) + * @param tabId - Optional specific tab ID to target; if omitted, uses the active tab + * @returns The result value from the evaluated code, or null if no value returned + */ + public async execute(code: string, timeout = 5000, privateMode = false, tabId?: string) { + const { tabId: actualTabId, tab } = await this.getTab(privateMode, tabId) + const windowKey = this.getWindowKey(privateMode) + this.touchTab(windowKey, actualTabId) + const dbg = tab.view.webContents.debugger - await this.ensureDebuggerAttached(dbg, sessionId) + await this.ensureDebuggerAttached(dbg, windowKey) + let timeoutHandle: ReturnType | undefined const evalPromise = dbg.sendCommand('Runtime.evaluate', { expression: code, awaitPromise: true, returnByValue: true }) - const result = await Promise.race([ - evalPromise, - new Promise((_, reject) => setTimeout(() => reject(new Error('Execution timed out')), timeout)) - ]) + try { + const result = await Promise.race([ + evalPromise, + new Promise((_, reject) => { + timeoutHandle = setTimeout(() => reject(new Error('Execution timed out')), timeout) + }) + ]) - const evalResult = result as any + const evalResult = result as any - if (evalResult?.exceptionDetails) { - const message = evalResult.exceptionDetails.exception?.description || 'Unknown script error' - logger.warn('Runtime.evaluate raised exception', { message }) - throw new Error(message) + if (evalResult?.exceptionDetails) { + const message = evalResult.exceptionDetails.exception?.description || 'Unknown script error' + logger.warn('Runtime.evaluate raised exception', { message }) + throw new Error(message) + } + + const value = evalResult?.result?.value ?? evalResult?.result?.description ?? null + return value + } finally { + if (timeoutHandle) clearTimeout(timeoutHandle) } - - const value = evalResult?.result?.value ?? evalResult?.result?.description ?? null - return value } - public async reset(sessionId?: string) { - if (sessionId) { - const entry = this.windows.get(sessionId) - if (entry) { - this.closeWindow(entry.win, sessionId) + public async reset(privateMode?: boolean, tabId?: string) { + if (privateMode !== undefined && tabId) { + const windowKey = this.getWindowKey(privateMode) + const windowInfo = this.windows.get(windowKey) + if (windowInfo) { + this.closeTabInternal(windowInfo, tabId) + windowInfo.tabs.delete(tabId) + + // If no tabs left, close the window + if (windowInfo.tabs.size === 0) { + if (!windowInfo.window.isDestroyed()) { + windowInfo.window.close() + } + this.windows.delete(windowKey) + logger.info('Browser CDP window closed (last tab closed)', { windowKey, tabId }) + return + } + + if (windowInfo.activeTabId === tabId) { + windowInfo.activeTabId = windowInfo.tabs.keys().next().value ?? null + if (windowInfo.activeTabId) { + const newActiveTab = windowInfo.tabs.get(windowInfo.activeTabId) + if (newActiveTab && !windowInfo.window.isDestroyed()) { + windowInfo.window.addBrowserView(newActiveTab.view) + this.updateViewBounds(windowInfo) + } + } + } + this.sendTabBarUpdate(windowInfo) } - this.windows.delete(sessionId) - logger.info('Browser CDP context reset', { sessionId }) + logger.info('Browser CDP tab reset', { windowKey, tabId }) return } - for (const [id, entry] of this.windows.entries()) { - this.closeWindow(entry.win, id) - this.windows.delete(id) + if (privateMode !== undefined) { + const windowKey = this.getWindowKey(privateMode) + const windowInfo = this.windows.get(windowKey) + if (windowInfo) { + const tabIds = Array.from(windowInfo.tabs.keys()) + for (const tid of tabIds) { + this.closeTabInternal(windowInfo, tid) + } + if (!windowInfo.window.isDestroyed()) { + windowInfo.window.close() + } + } + this.windows.delete(windowKey) + logger.info('Browser CDP window reset', { windowKey, privateMode }) + return } - logger.info('Browser CDP context reset (all sessions)') + + const allWindowInfos = Array.from(this.windows.values()) + for (const windowInfo of allWindowInfos) { + const tabIds = Array.from(windowInfo.tabs.keys()) + for (const tid of tabIds) { + this.closeTabInternal(windowInfo, tid) + } + if (!windowInfo.window.isDestroyed()) { + windowInfo.window.close() + } + } + this.windows.clear() + logger.info('Browser CDP context reset (all windows)') } /** @@ -260,21 +782,26 @@ export class CdpBrowserController { * @param url - The URL to fetch * @param format - Output format: 'html', 'txt', 'markdown', or 'json' (default: 'markdown') * @param timeout - Navigation timeout in milliseconds (default: 10000) - * @param sessionId - Session identifier (default: 'default') - * @returns Content in the requested format. For 'json', returns parsed object or { data: rawContent } if parsing fails + * @param privateMode - If true, uses private browsing mode (default: false) + * @param newTab - If true, always creates a new tab (useful for parallel requests) + * @param showWindow - If true, shows the browser window (default: false) + * @returns Object with tabId and content in the requested format. For 'json', content is parsed object or { data: rawContent } if parsing fails */ public async fetch( url: string, format: 'html' | 'txt' | 'markdown' | 'json' = 'markdown', timeout = 10000, - sessionId = 'default' - ) { - await this.open(url, timeout, false, sessionId) + privateMode = false, + newTab = false, + showWindow = false + ): Promise<{ tabId: string; content: string | object }> { + const { tabId } = await this.open(url, timeout, privateMode, newTab, showWindow) - const win = await this.getWindow(sessionId) - const dbg = win.webContents.debugger + const { tab } = await this.getTab(privateMode, tabId, false, showWindow) + const dbg = tab.view.webContents.debugger + const windowKey = this.getWindowKey(privateMode) - await this.ensureDebuggerAttached(dbg, sessionId) + await this.ensureDebuggerAttached(dbg, windowKey) let expression: string if (format === 'json' || format === 'txt') { @@ -283,25 +810,100 @@ export class CdpBrowserController { expression = 'document.documentElement.outerHTML' } - const result = (await dbg.sendCommand('Runtime.evaluate', { - expression, - returnByValue: true - })) as { result?: { value?: string } } + let timeoutHandle: ReturnType | undefined + try { + const result = (await Promise.race([ + dbg.sendCommand('Runtime.evaluate', { + expression, + returnByValue: true + }), + new Promise((_, reject) => { + timeoutHandle = setTimeout(() => reject(new Error('Fetch content timed out')), timeout) + }) + ])) as { result?: { value?: string } } - const content = result?.result?.value ?? '' + const rawContent = result?.result?.value ?? '' - if (format === 'markdown') { - const turndownService = new TurndownService() - return turndownService.turndown(content) + let content: string | object + if (format === 'markdown') { + content = this.turndownService.turndown(rawContent) + } else if (format === 'json') { + try { + content = JSON.parse(rawContent) + } catch (parseError) { + logger.warn('JSON parse failed, returning raw content', { + url, + contentLength: rawContent.length, + error: parseError + }) + content = { data: rawContent } + } + } else { + content = rawContent + } + + return { tabId, content } + } finally { + if (timeoutHandle) clearTimeout(timeoutHandle) } - if (format === 'json') { - // Attempt to parse as JSON; if content is not valid JSON, wrap it in a data object - try { - return JSON.parse(content) - } catch { - return { data: content } + } + + /** + * Lists all tabs in a window + * @param privateMode - If true, lists tabs from private window (default: false) + */ + public async listTabs(privateMode = false): Promise> { + const windowKey = this.getWindowKey(privateMode) + const windowInfo = this.windows.get(windowKey) + if (!windowInfo) return [] + + return Array.from(windowInfo.tabs.values()).map((tab) => ({ + tabId: tab.id, + url: tab.url, + title: tab.title + })) + } + + /** + * Closes a specific tab + * @param privateMode - If true, closes tab from private window (default: false) + * @param tabId - Tab identifier to close + */ + public async closeTab(privateMode: boolean, tabId: string) { + await this.reset(privateMode, tabId) + } + + /** + * Switches the active tab + * @param privateMode - If true, switches tab in private window (default: false) + * @param tabId - Tab identifier to switch to + */ + public async switchTab(privateMode: boolean, tabId: string) { + const windowKey = this.getWindowKey(privateMode) + const windowInfo = this.windows.get(windowKey) + if (!windowInfo) throw new Error(`Window not found for ${privateMode ? 'private' : 'normal'} mode`) + + const tab = windowInfo.tabs.get(tabId) + if (!tab) throw new Error(`Tab ${tabId} not found`) + + // Remove previous active tab view (but NOT the tabBarView) + if (windowInfo.activeTabId && windowInfo.activeTabId !== tabId) { + const prevTab = windowInfo.tabs.get(windowInfo.activeTabId) + if (prevTab && !windowInfo.window.isDestroyed()) { + windowInfo.window.removeBrowserView(prevTab.view) } } - return content + + windowInfo.activeTabId = tabId + + // Add the new active tab view + if (!windowInfo.window.isDestroyed()) { + windowInfo.window.addBrowserView(tab.view) + this.updateViewBounds(windowInfo) + } + + this.touchTab(windowKey, tabId) + this.sendTabBarUpdate(windowInfo) + logger.info('Switched active tab', { windowKey, tabId, privateMode }) } } diff --git a/src/main/mcpServers/browser/tabbar-html.ts b/src/main/mcpServers/browser/tabbar-html.ts new file mode 100644 index 0000000000..4a1bec0e0d --- /dev/null +++ b/src/main/mcpServers/browser/tabbar-html.ts @@ -0,0 +1,567 @@ +export const TAB_BAR_HTML = ` + + + + + + +
+
+
+ +
+
+ +
+ + + +
+
+
+ + + +
+ +
+
+ + +` diff --git a/src/main/mcpServers/browser/tools/execute.ts b/src/main/mcpServers/browser/tools/execute.ts index 1585a467a8..09cd79f2d1 100644 --- a/src/main/mcpServers/browser/tools/execute.ts +++ b/src/main/mcpServers/browser/tools/execute.ts @@ -1,36 +1,39 @@ import * as z from 'zod' import type { CdpBrowserController } from '../controller' +import { logger } from '../types' import { errorResponse, successResponse } from './utils' export const ExecuteSchema = z.object({ - code: z - .string() - .describe( - 'JavaScript evaluated via Chrome DevTools Runtime.evaluate. Keep it short; prefer one-line with semicolons for multiple statements.' - ), - timeout: z.number().default(5000).describe('Timeout in milliseconds for code execution (default: 5000ms)'), - sessionId: z.string().optional().describe('Session identifier to target a specific page (default: default)') + code: z.string().describe('JavaScript code to run in page context'), + timeout: z.number().default(5000).describe('Execution timeout in ms (default: 5000)'), + privateMode: z.boolean().optional().describe('Target private session (default: false)'), + tabId: z.string().optional().describe('Target specific tab by ID') }) export const executeToolDefinition = { name: 'execute', description: - 'Run JavaScript in the current page via Runtime.evaluate. Prefer short, single-line snippets; use semicolons for multiple statements.', + 'Run JavaScript in the currently open page. Use after open to: click elements, fill forms, extract content (document.body.innerText), or interact with the page. The page must be opened first with open or fetch.', inputSchema: { type: 'object', properties: { code: { type: 'string', - description: 'One-line JS to evaluate in page context' + description: + 'JavaScript to evaluate. Examples: document.body.innerText (get text), document.querySelector("button").click() (click), document.title (get title)' }, timeout: { type: 'number', - description: 'Timeout in milliseconds (default 5000)' + description: 'Execution timeout in ms (default: 5000)' }, - sessionId: { + privateMode: { + type: 'boolean', + description: 'Target private session (default: false)' + }, + tabId: { type: 'string', - description: 'Session identifier; targets a specific page (default: default)' + description: 'Target specific tab by ID (from open response)' } }, required: ['code'] @@ -38,11 +41,12 @@ export const executeToolDefinition = { } export async function handleExecute(controller: CdpBrowserController, args: unknown) { - const { code, timeout, sessionId } = ExecuteSchema.parse(args) + const { code, timeout, privateMode, tabId } = ExecuteSchema.parse(args) try { - const value = await controller.execute(code, timeout, sessionId ?? 'default') + const value = await controller.execute(code, timeout, privateMode ?? false, tabId) return successResponse(typeof value === 'string' ? value : JSON.stringify(value)) } catch (error) { + logger.error('Execute failed', { error, code: code.slice(0, 100), privateMode, tabId }) return errorResponse(error as Error) } } diff --git a/src/main/mcpServers/browser/tools/fetch.ts b/src/main/mcpServers/browser/tools/fetch.ts deleted file mode 100644 index b749aaff93..0000000000 --- a/src/main/mcpServers/browser/tools/fetch.ts +++ /dev/null @@ -1,49 +0,0 @@ -import * as z from 'zod' - -import type { CdpBrowserController } from '../controller' -import { errorResponse, successResponse } from './utils' - -export const FetchSchema = z.object({ - url: z.url().describe('URL to fetch'), - format: z.enum(['html', 'txt', 'markdown', 'json']).default('markdown').describe('Output format (default: markdown)'), - timeout: z.number().optional().describe('Timeout in milliseconds for navigation (default: 10000)'), - sessionId: z.string().optional().describe('Session identifier (default: default)') -}) - -export const fetchToolDefinition = { - name: 'fetch', - description: 'Fetch a URL using the browser and return content in specified format (html, txt, markdown, json)', - inputSchema: { - type: 'object', - properties: { - url: { - type: 'string', - description: 'URL to fetch' - }, - format: { - type: 'string', - enum: ['html', 'txt', 'markdown', 'json'], - description: 'Output format (default: markdown)' - }, - timeout: { - type: 'number', - description: 'Navigation timeout in milliseconds (default: 10000)' - }, - sessionId: { - type: 'string', - description: 'Session identifier (default: default)' - } - }, - required: ['url'] - } -} - -export async function handleFetch(controller: CdpBrowserController, args: unknown) { - const { url, format, timeout, sessionId } = FetchSchema.parse(args) - try { - const content = await controller.fetch(url, format, timeout ?? 10000, sessionId ?? 'default') - return successResponse(typeof content === 'string' ? content : JSON.stringify(content)) - } catch (error) { - return errorResponse(error as Error) - } -} diff --git a/src/main/mcpServers/browser/tools/index.ts b/src/main/mcpServers/browser/tools/index.ts index 19f1ee4163..5ba6fcae6d 100644 --- a/src/main/mcpServers/browser/tools/index.ts +++ b/src/main/mcpServers/browser/tools/index.ts @@ -1,15 +1,13 @@ export { ExecuteSchema, executeToolDefinition, handleExecute } from './execute' -export { FetchSchema, fetchToolDefinition, handleFetch } from './fetch' export { handleOpen, OpenSchema, openToolDefinition } from './open' export { handleReset, resetToolDefinition } from './reset' import type { CdpBrowserController } from '../controller' import { executeToolDefinition, handleExecute } from './execute' -import { fetchToolDefinition, handleFetch } from './fetch' import { handleOpen, openToolDefinition } from './open' import { handleReset, resetToolDefinition } from './reset' -export const toolDefinitions = [openToolDefinition, executeToolDefinition, resetToolDefinition, fetchToolDefinition] +export const toolDefinitions = [openToolDefinition, executeToolDefinition, resetToolDefinition] export const toolHandlers: Record< string, @@ -20,6 +18,5 @@ export const toolHandlers: Record< > = { open: handleOpen, execute: handleExecute, - reset: handleReset, - fetch: handleFetch + reset: handleReset } diff --git a/src/main/mcpServers/browser/tools/open.ts b/src/main/mcpServers/browser/tools/open.ts index 9739b3bcae..6ea9ec9e48 100644 --- a/src/main/mcpServers/browser/tools/open.ts +++ b/src/main/mcpServers/browser/tools/open.ts @@ -1,39 +1,52 @@ import * as z from 'zod' import type { CdpBrowserController } from '../controller' -import { successResponse } from './utils' +import { logger } from '../types' +import { errorResponse, successResponse } from './utils' export const OpenSchema = z.object({ - url: z.url().describe('URL to open in the controlled Electron window'), - timeout: z.number().optional().describe('Timeout in milliseconds for navigation (default: 10000)'), - show: z.boolean().optional().describe('Whether to show the browser window (default: false)'), - sessionId: z - .string() + url: z.url().describe('URL to navigate to'), + format: z + .enum(['html', 'txt', 'markdown', 'json']) .optional() - .describe('Session identifier; separate sessions keep separate pages (default: default)') + .describe('If set, return page content in this format. If not set, just open the page and return tabId.'), + timeout: z.number().optional().describe('Navigation timeout in ms (default: 10000)'), + privateMode: z.boolean().optional().describe('Use incognito mode, no data persisted (default: false)'), + newTab: z.boolean().optional().describe('Open in new tab, required for parallel requests (default: false)'), + showWindow: z.boolean().optional().default(true).describe('Show browser window (default: true)') }) export const openToolDefinition = { name: 'open', - description: 'Open a URL in a hidden Electron window controlled via Chrome DevTools Protocol', + description: + 'Navigate to a URL in a browser window. If format is specified, returns { tabId, content } with page content in that format. Otherwise, returns { currentUrl, title, tabId } for subsequent operations with execute tool. Set newTab=true when opening multiple URLs in parallel.', inputSchema: { type: 'object', properties: { url: { type: 'string', - description: 'URL to load' + description: 'URL to navigate to' + }, + format: { + type: 'string', + enum: ['html', 'txt', 'markdown', 'json'], + description: 'If set, return page content in this format. If not set, just open the page and return tabId.' }, timeout: { type: 'number', - description: 'Navigation timeout in milliseconds (default 10000)' + description: 'Navigation timeout in ms (default: 10000)' }, - show: { + privateMode: { type: 'boolean', - description: 'Whether to show the browser window (default false)' + description: 'Use incognito mode, no data persisted (default: false)' }, - sessionId: { - type: 'string', - description: 'Session identifier; separate sessions keep separate pages (default: default)' + newTab: { + type: 'boolean', + description: 'Open in new tab, required for parallel requests (default: false)' + }, + showWindow: { + type: 'boolean', + description: 'Show browser window (default: true)' } }, required: ['url'] @@ -41,7 +54,28 @@ export const openToolDefinition = { } export async function handleOpen(controller: CdpBrowserController, args: unknown) { - const { url, timeout, show, sessionId } = OpenSchema.parse(args) - const res = await controller.open(url, timeout ?? 10000, show ?? false, sessionId ?? 'default') - return successResponse(JSON.stringify(res)) + try { + const { url, format, timeout, privateMode, newTab, showWindow } = OpenSchema.parse(args) + + if (format) { + const { tabId, content } = await controller.fetch( + url, + format, + timeout ?? 10000, + privateMode ?? false, + newTab ?? false, + showWindow + ) + return successResponse(JSON.stringify({ tabId, content })) + } else { + const res = await controller.open(url, timeout ?? 10000, privateMode ?? false, newTab ?? false, showWindow) + return successResponse(JSON.stringify(res)) + } + } catch (error) { + logger.error('Open failed', { + error, + url: args && typeof args === 'object' && 'url' in args ? args.url : undefined + }) + return errorResponse(error instanceof Error ? error : String(error)) + } } diff --git a/src/main/mcpServers/browser/tools/reset.ts b/src/main/mcpServers/browser/tools/reset.ts index d09d251119..fe67b74b1d 100644 --- a/src/main/mcpServers/browser/tools/reset.ts +++ b/src/main/mcpServers/browser/tools/reset.ts @@ -1,34 +1,43 @@ import * as z from 'zod' import type { CdpBrowserController } from '../controller' -import { successResponse } from './utils' +import { logger } from '../types' +import { errorResponse, successResponse } from './utils' -/** Zod schema for validating reset tool arguments */ export const ResetSchema = z.object({ - sessionId: z.string().optional().describe('Session identifier to reset; omit to reset all sessions') + privateMode: z.boolean().optional().describe('true=private window, false=normal window, omit=all windows'), + tabId: z.string().optional().describe('Close specific tab only (requires privateMode)') }) -/** MCP tool definition for the reset tool */ export const resetToolDefinition = { name: 'reset', - description: 'Reset the controlled window and detach debugger', + description: + 'Close browser windows and clear state. Call when done browsing to free resources. Omit all parameters to close everything.', inputSchema: { type: 'object', properties: { - sessionId: { + privateMode: { + type: 'boolean', + description: 'true=reset private window only, false=reset normal window only, omit=reset all' + }, + tabId: { type: 'string', - description: 'Session identifier to reset; omit to reset all sessions' + description: 'Close specific tab only (requires privateMode to be set)' } } } } -/** - * Handler for the reset MCP tool. - * Closes browser window(s) and detaches debugger for the specified session or all sessions. - */ export async function handleReset(controller: CdpBrowserController, args: unknown) { - const { sessionId } = ResetSchema.parse(args) - await controller.reset(sessionId) - return successResponse('reset') + try { + const { privateMode, tabId } = ResetSchema.parse(args) + await controller.reset(privateMode, tabId) + return successResponse('reset') + } catch (error) { + logger.error('Reset failed', { + error, + privateMode: args && typeof args === 'object' && 'privateMode' in args ? args.privateMode : undefined + }) + return errorResponse(error instanceof Error ? error : String(error)) + } } diff --git a/src/main/mcpServers/browser/tools/utils.ts b/src/main/mcpServers/browser/tools/utils.ts index 2c5ecc0f1d..f5272ac81c 100644 --- a/src/main/mcpServers/browser/tools/utils.ts +++ b/src/main/mcpServers/browser/tools/utils.ts @@ -5,9 +5,10 @@ export function successResponse(text: string) { } } -export function errorResponse(error: Error) { +export function errorResponse(error: Error | string) { + const message = error instanceof Error ? error.message : error return { - content: [{ type: 'text', text: error.message }], + content: [{ type: 'text', text: message }], isError: true } } diff --git a/src/main/mcpServers/browser/types.ts b/src/main/mcpServers/browser/types.ts index 2cc934e6ce..a59fe59665 100644 --- a/src/main/mcpServers/browser/types.ts +++ b/src/main/mcpServers/browser/types.ts @@ -1,4 +1,24 @@ import { loggerService } from '@logger' +import type { BrowserView, BrowserWindow } from 'electron' export const logger = loggerService.withContext('MCPBrowserCDP') -export const userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:145.0) Gecko/20100101 Firefox/145.0' +export const userAgent = + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' + +export interface TabInfo { + id: string + view: BrowserView + url: string + title: string + lastActive: number +} + +export interface WindowInfo { + windowKey: string + privateMode: boolean + window: BrowserWindow + tabs: Map + activeTabId: string | null + lastActive: number + tabBarView?: BrowserView +}