diff --git a/package.json b/package.json index c8f2909675..4ae140e521 100644 --- a/package.json +++ b/package.json @@ -95,6 +95,7 @@ "officeparser": "^4.1.1", "os-proxy-config": "^1.1.2", "proxy-agent": "^6.5.0", + "pyodide": "^0.27.5", "tar": "^7.4.3", "turndown": "^7.2.0", "turndown-plugin-gfm": "^1.0.2", diff --git a/src/main/mcpServers/factory.ts b/src/main/mcpServers/factory.ts index 1c508f8844..b3e3e1a66f 100644 --- a/src/main/mcpServers/factory.ts +++ b/src/main/mcpServers/factory.ts @@ -5,6 +5,7 @@ import BraveSearchServer from './brave-search' import DifyKnowledgeServer from './dify-knowledge' import FetchServer from './fetch' import FileSystemServer from './filesystem' +import createRunPythonServer from './mcp-run-python/main' import MemoryServer from './memory' import ThinkingServer from './sequentialthinking' @@ -31,6 +32,9 @@ export function createInMemoryMCPServer(name: string, args: string[] = [], envs: const difyKey = envs.DIFY_KEY return new DifyKnowledgeServer(difyKey, args).server } + case '@cherry/mcp-run-python': { + return createRunPythonServer().server + } default: throw new Error(`Unknown in-memory MCP server: ${name}`) } diff --git a/src/main/mcpServers/mcp-run-python/main.ts b/src/main/mcpServers/mcp-run-python/main.ts new file mode 100644 index 0000000000..e98602c84d --- /dev/null +++ b/src/main/mcpServers/mcp-run-python/main.ts @@ -0,0 +1,84 @@ +// port from https://ai.pydantic.dev/mcp/run-python/ +// https://jsr.io/@pydantic/mcp-run-python@0.0.13 +// import './polyfill' + +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' +import { type LoggingLevel, SetLevelRequestSchema } from '@modelcontextprotocol/sdk/types.js' +import { z } from 'zod' + +import { asXml, runCode } from './runCode' + +const VERSION = '0.0.13' + +// list of log levels to use for level comparison +const LogLevels: LoggingLevel[] = ['debug', 'info', 'notice', 'warning', 'error', 'critical', 'alert', 'emergency'] + +/* + * Create an MCP server with the `run_python_code` tool registered. + */ +function createServer(): McpServer { + const server = new McpServer( + { + name: 'MCP Run Python', + version: VERSION + }, + { + instructions: 'Call the "run_python_code" tool with the Python code to run.', + capabilities: { + logging: {} + } + } + ) + + const toolDescription = `Tool to execute Python code and return stdout, stderr, and return value. + +The code may be async, and the value on the last line will be returned as the return value. + +The code will be executed with Python 3.12. + +Dependencies may be defined via PEP 723 script metadata, e.g. to install "pydantic", the script should start +with a comment of the form: + +# /// script +# dependencies = ['pydantic'] +# /// +print('python code here') +` + + let setLogLevel: LoggingLevel = 'info' + + server.server.setRequestHandler(SetLevelRequestSchema, (request) => { + setLogLevel = request.params.level + return {} + }) + + server.tool( + 'run_python_code', + toolDescription, + { python_code: z.string().describe('Python code to run') }, + async ({ python_code }: { python_code: string }) => { + const logPromises: Promise[] = [] + const result = await runCode( + [ + { + name: 'main.py', + content: python_code, + active: true + } + ], + (level, data) => { + if (LogLevels.indexOf(level) >= LogLevels.indexOf(setLogLevel)) { + logPromises.push(server.server.sendLoggingMessage({ level, data })) + } + } + ) + await Promise.all(logPromises) + return { + content: [{ type: 'text', text: asXml(result) }] + } + } + ) + return server +} + +export default createServer diff --git a/src/main/mcpServers/mcp-run-python/polyfill.ts b/src/main/mcpServers/mcp-run-python/polyfill.ts new file mode 100644 index 0000000000..7031153f40 --- /dev/null +++ b/src/main/mcpServers/mcp-run-python/polyfill.ts @@ -0,0 +1,8 @@ +// import process from 'node:process' + +// Stub `process.env` and always return an empty object +// Object.defineProperty(process, 'env', { +// get() { +// return {} +// } +// }) diff --git a/src/main/mcpServers/mcp-run-python/prepareEnvCode.ts b/src/main/mcpServers/mcp-run-python/prepareEnvCode.ts new file mode 100644 index 0000000000..71177d0498 --- /dev/null +++ b/src/main/mcpServers/mcp-run-python/prepareEnvCode.ts @@ -0,0 +1,202 @@ +// DO NOT EDIT THIS FILE DIRECTLY, INSTEAD RUN "deno run build" +export const preparePythonCode = `"""Logic for installing dependencies in Pyodide. + +Mostly taken from https://github.com/pydantic/pydantic.run/blob/main/src/frontend/src/prepare_env.py +""" + +from __future__ import annotations as _annotations + +import importlib +import logging +import re +import sys +import traceback +from collections.abc import Iterable, Iterator +from contextlib import contextmanager +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Literal, TypedDict + +import micropip +import pyodide_js +import tomllib +from pyodide.code import find_imports + +__all__ = 'prepare_env', 'dump_json' + + +class File(TypedDict): + name: str + content: str + active: bool + + +@dataclass +class Success: + dependencies: list[str] | None + kind: Literal['success'] = 'success' + + +@dataclass +class Error: + message: str + kind: Literal['error'] = 'error' + + +async def prepare_env(files: list[File]) -> Success | Error: + sys.setrecursionlimit(400) + + cwd = Path.cwd() + for file in files: + (cwd / file['name']).write_text(file['content']) + + active: File | None = next((f for f in files if f['active']), None) + + dependencies: list[str] | None = None + if active: + python_code = active['content'] + dependencies = _find_pep723_dependencies(python_code) + if dependencies is None: + dependencies = await _find_import_dependencies(python_code) + + if dependencies: + dependencies = _add_extra_dependencies(dependencies) + + with _micropip_logging() as logs_filename: + try: + await micropip.install(dependencies, keep_going=True) + importlib.invalidate_caches() + except Exception: + with open(logs_filename) as f: + logs = f.read() + return Error(message=f'{logs} {traceback.format_exc()}') + + return Success(dependencies=dependencies) + + +def dump_json(value: Any) -> str | None: + from pydantic_core import to_json + + if value is None: + return None + if isinstance(value, str): + return value + else: + return to_json(value, indent=2, fallback=_json_fallback).decode() + + +def _json_fallback(value: Any) -> Any: + tp: Any = type(value) + module = tp.__module__ + if module == 'numpy': + if tp.__name__ in {'ndarray', 'matrix'}: + return value.tolist() + else: + return value.item() + elif module == 'pyodide.ffi': + return value.to_py() + else: + return repr(value) + + +def _add_extra_dependencies(dependencies: list[str]) -> list[str]: + """Add extra dependencies we know some packages need. + + Workaround for micropip not installing some required transitive dependencies. + See https://github.com/pyodide/micropip/issues/204 + + pygments seems to be required to get rich to work properly, ssl is required for FastAPI and HTTPX, + pydantic_ai requires newest typing_extensions. + """ + extras: list[str] = [] + for d in dependencies: + if d.startswith(('logfire', 'rich')): + extras.append('pygments') + elif d.startswith(('fastapi', 'httpx', 'pydantic_ai')): + extras.append('ssl') + + if d.startswith('pydantic_ai'): + extras.append('typing_extensions>=4.12') + + if len(extras) == 3: + break + + return dependencies + extras + + +@contextmanager +def _micropip_logging() -> Iterator[str]: + from micropip import logging as micropip_logging + + micropip_logging.setup_logging() + logger = logging.getLogger('micropip') + logger.handlers.clear() + logger.setLevel(logging.INFO) + + file_name = 'micropip.log' + handler = logging.FileHandler(file_name) + handler.setLevel(logging.INFO) + handler.setFormatter(logging.Formatter('%(message)s')) + logger.addHandler(handler) + try: + yield file_name + finally: + logger.removeHandler(handler) + + +def _find_pep723_dependencies(code: str) -> list[str] | None: + """Extract dependencies from a script with PEP 723 metadata.""" + metadata = _read_pep723_metadata(code) + dependencies: list[str] | None = metadata.get('dependencies') + if dependencies is None: + return None + else: + assert isinstance(dependencies, list), 'dependencies must be a list' + assert all(isinstance(dep, str) for dep in dependencies), 'dependencies must be a list of strings' + return dependencies + + +def _read_pep723_metadata(code: str) -> dict[str, Any]: + """Read PEP 723 script metadata. + + Copied from https://packaging.python.org/en/latest/specifications/inline-script-metadata/#reference-implementation + """ + name = 'script' + magic_comment_regex = r'(?m)^# /// (?P[a-zA-Z0-9-]+)$\\s(?P(^#(| .*)$\\s)+)^# ///$' + matches = list(filter(lambda m: m.group('type') == name, re.finditer(magic_comment_regex, code))) + if len(matches) > 1: + raise ValueError(f'Multiple {name} blocks found') + elif len(matches) == 1: + content = ''.join( + line[2:] if line.startswith('# ') else line[1:] + for line in matches[0].group('content').splitlines(keepends=True) + ) + return tomllib.loads(content) + else: + return {} + + +async def _find_import_dependencies(code: str) -> list[str] | None: + """Find dependencies in imports.""" + try: + imports: list[str] = find_imports(code) + except SyntaxError: + return None + else: + return list(_find_imports_to_install(imports)) + + +TO_PACKAGE_NAME: dict[str, str] = pyodide_js._api._import_name_to_package_name.to_py() # pyright: ignore[reportPrivateUsage] + + +def _find_imports_to_install(imports: list[str]) -> Iterable[str]: + """Given a list of module names being imported, return packages that are not installed.""" + for module in imports: + try: + importlib.import_module(module) + except ModuleNotFoundError: + if package_name := TO_PACKAGE_NAME.get(module): + yield package_name + elif '.' not in module: + yield module +` diff --git a/src/main/mcpServers/mcp-run-python/runCode.ts b/src/main/mcpServers/mcp-run-python/runCode.ts new file mode 100644 index 0000000000..84c9ed1699 --- /dev/null +++ b/src/main/mcpServers/mcp-run-python/runCode.ts @@ -0,0 +1,167 @@ +/* eslint @typescript-eslint/no-explicit-any: off */ +import type { LoggingLevel } from '@modelcontextprotocol/sdk/types.js' +import { loadPyodide } from 'pyodide' + +import { preparePythonCode } from './prepareEnvCode' + +export interface CodeFile { + name: string + content: string + active: boolean +} + +export async function runCode( + files: CodeFile[], + log: (level: LoggingLevel, data: string) => void +): Promise { + // remove once https://github.com/pyodide/pyodide/pull/5514 is released + const realConsoleLog = console.log + // deno-lint-ignore no-explicit-any + console.log = (...args: any[]) => log('debug', args.join(' ')) + + const output: string[] = [] + const pyodide = await loadPyodide({ + stdout: (msg) => { + log('info', msg) + output.push(msg) + }, + stderr: (msg) => { + log('warning', msg) + output.push(msg) + } + }) + + // see https://github.com/pyodide/pyodide/discussions/5512 + const origLoadPackage = pyodide.loadPackage + pyodide.loadPackage = (pkgs, options) => + origLoadPackage(pkgs, { + // stop pyodide printing to stdout/stderr + messageCallback: (msg: string) => log('debug', `loadPackage: ${msg}`), + errorCallback: (msg: string) => { + log('error', `loadPackage: ${msg}`) + output.push(`install error: ${msg}`) + }, + ...options + }) + + await pyodide.loadPackage(['micropip', 'pydantic']) + const sys = pyodide.pyimport('sys') + + const dirPath = '/tmp/mcp_run_python' + sys.path.append(dirPath) + const pathlib = pyodide.pyimport('pathlib') + pathlib.Path(dirPath).mkdir() + const moduleName = '_prepare_env' + + pathlib.Path(`${dirPath}/${moduleName}.py`).write_text(preparePythonCode) + + const preparePyEnv: PreparePyEnv = pyodide.pyimport(moduleName) + + const prepareStatus = await preparePyEnv.prepare_env(pyodide.toPy(files)) + + let runResult: RunSuccess | RunError + if (prepareStatus.kind == 'error') { + runResult = { + status: 'install-error', + output, + error: prepareStatus.message + } + } else { + const { dependencies } = prepareStatus + const activeFile = files.find((f) => f.active)! || files[0] + try { + const rawValue = await pyodide.runPythonAsync(activeFile.content, { + globals: pyodide.toPy({ __name__: '__main__' }), + filename: activeFile.name + }) + runResult = { + status: 'success', + dependencies, + output, + returnValueJson: preparePyEnv.dump_json(rawValue) + } + } catch (err) { + runResult = { + status: 'run-error', + dependencies, + output, + error: formatError(err) + } + } + } + sys.stdout.flush() + sys.stderr.flush() + console.log = realConsoleLog + return runResult +} + +interface RunSuccess { + status: 'success' + // we could record stdout and stderr separately, but I suspect simplicity is more important + output: string[] + dependencies: string[] + returnValueJson: string | null +} + +interface RunError { + status: 'install-error' | 'run-error' + output: string[] + dependencies?: string[] + error: string +} + +export function asXml(runResult: RunSuccess | RunError): string { + const xml = [`${runResult.status}`] + if (runResult.dependencies?.length) { + xml.push(`${JSON.stringify(runResult.dependencies)}`) + } + if (runResult.output.length) { + xml.push('') + const escapeXml = escapeClosing('output') + xml.push(...runResult.output.map(escapeXml)) + xml.push('') + } + if (runResult.status == 'success') { + if (runResult.returnValueJson) { + xml.push('') + xml.push(escapeClosing('return_value')(runResult.returnValueJson)) + xml.push('') + } + } else { + xml.push('') + xml.push(escapeClosing('error')(runResult.error)) + xml.push('') + } + return xml.join('\n') +} + +function escapeClosing(closingTag: string): (str: string) => string { + const regex = new RegExp(`)?`, 'gi') + const onMatch = (match: string) => { + return match.replace(//g, '>') + } + return (str) => str.replace(regex, onMatch) +} + +// deno-lint-ignore no-explicit-any +function formatError(err: any): string { + let errStr = err.toString() + errStr = errStr.replace(/^PythonError: +/, '') + // remove frames from inside pyodide + errStr = errStr.replace(/ {2}File "\/lib\/python\d+\.zip\/_pyodide\/.*\n {4}.*\n(?: {4,}\^+\n)?/g, '') + return errStr +} + +interface PrepareSuccess { + kind: 'success' + dependencies: string[] +} +interface PrepareError { + kind: 'error' + message: string +} +interface PreparePyEnv { + prepare_env: (files: CodeFile[]) => Promise + // deno-lint-ignore no-explicit-any + dump_json: (value: any) => string | null +} diff --git a/src/renderer/src/store/mcp.ts b/src/renderer/src/store/mcp.ts index 88e49226a4..bd8b5be977 100644 --- a/src/renderer/src/store/mcp.ts +++ b/src/renderer/src/store/mcp.ts @@ -121,6 +121,14 @@ export const builtinMCPServers: MCPServer[] = [ DIFY_KEY: 'YOUR_DIFY_KEY' }, provider: 'CherryAI' + }, + { + id: nanoid(), + name: '@cherry/mcp-run-python', + type: 'inMemory', + description: 'Model Context Protocol server to run Python code in a sandbox.', + isActive: false, + provider: 'CherryAI' } ] diff --git a/yarn.lock b/yarn.lock index 4644306209..e918f39a6f 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5842,6 +5842,7 @@ __metadata: p-queue: "npm:^8.1.0" prettier: "npm:^3.5.3" proxy-agent: "npm:^6.5.0" + pyodide: "npm:^0.27.5" rc-virtual-list: "npm:^3.18.6" react: "npm:^19.0.0" react-dom: "npm:^19.0.0" @@ -15841,6 +15842,15 @@ __metadata: languageName: node linkType: hard +"pyodide@npm:^0.27.5": + version: 0.27.5 + resolution: "pyodide@npm:0.27.5" + dependencies: + ws: "npm:^8.5.0" + checksum: 10c0/86e242031a315bb8f55c778b8555c55397662f3f927e8afc4f1b9bdcb0c93cef7de708c01f63ed19d7151de6088aa555952cd1513756b9ac26bc599286452bd8 + languageName: node + linkType: hard + "qs@npm:^6.14.0": version: 6.14.0 resolution: "qs@npm:6.14.0" @@ -19863,6 +19873,21 @@ __metadata: languageName: node linkType: hard +"ws@npm:^8.5.0": + version: 8.18.2 + resolution: "ws@npm:8.18.2" + peerDependencies: + bufferutil: ^4.0.1 + utf-8-validate: ">=5.0.2" + peerDependenciesMeta: + bufferutil: + optional: true + utf-8-validate: + optional: true + checksum: 10c0/4b50f67931b8c6943c893f59c524f0e4905bbd183016cfb0f2b8653aa7f28dad4e456b9d99d285bbb67cca4fedd9ce90dfdfaa82b898a11414ebd66ee99141e4 + languageName: node + linkType: hard + "xhr@npm:^2.0.1": version: 2.6.0 resolution: "xhr@npm:2.6.0"