add code context (#10)

samuelcolvin · web-flow · commit 27df0e73c064 · 2025-09-03T23:08:15.000Z
diff --git a/examples/sandbox.py b/examples/sandbox.py
@@ -10,7 +10,7 @@ def log_handler(level: str, message: str):
 
 code = """
 import numpy, asyncio
-a = numpy.array([1, 2, 3])
+a = numpy.array(thing)
 print(a)
 await asyncio.sleep(1)
 a
@@ -20,7 +20,7 @@ def log_handler(level: str, message: str):
 async def main():
     async with code_sandbox(dependencies=['numpy'], log_handler=log_handler) as sandbox:
         print('running code')
-        result = await sandbox.eval(code)
+        result = await sandbox.eval(code, {'thing': [1, 2, 3]})
         print(f'{result["status"].title()}:')
         if result['status'] == 'success':
             print(result['return_value'])
diff --git a/mcp_run_python/code_sandbox.py b/mcp_run_python/code_sandbox.py
@@ -2,7 +2,7 @@
 from collections.abc import AsyncIterator, Awaitable, Callable
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
-from typing import Literal, TypeAlias, TypedDict
+from typing import Any, Literal, TypeAlias, TypedDict
 
 from mcp import ClientSession, StdioServerParameters, types as mcp_types
 from mcp.client.stdio import stdio_client
@@ -28,8 +28,21 @@ class RunError(TypedDict):
 class CodeSandbox:
     _session: ClientSession
 
-    async def eval(self, code: str) -> RunSuccess | RunError:
-        result = await self._session.call_tool('run_python_code', {'python_code': code})
+    async def eval(
+        self,
+        code: str,
+        globals: dict[str, Any] | None = None,
+    ) -> RunSuccess | RunError:
+        """Run code in the sandbox.
+
+        Args:
+            code: Python code to run.
+            globals: Dictionary of global variables in context when the code is executed
+        """
+        args: dict[str, Any] = {'python_code': code}
+        if globals is not None:
+            args['global_variables'] = globals
+        result = await self._session.call_tool('run_python_code', args)
         content_block = result.content[0]
         if content_block.type == 'text':
             return json.loads(content_block.text)
@@ -44,7 +57,7 @@ async def code_sandbox(
     log_handler: LogHandler | None = None,
     allow_networking: bool = True,
 ) -> AsyncIterator['CodeSandbox']:
-    """Run code in a secure sandbox.
+    """Create a secure sandbox.
 
     Args:
         dependencies: A list of dependencies to be installed.
diff --git a/mcp_run_python/deno/src/main.ts b/mcp_run_python/deno/src/main.ts
@@ -1,3 +1,4 @@
+// deno-lint-ignore-file no-explicit-any
 /// <reference types="npm:@types/node@22.12.0" />
 
 import './polyfill.ts'
@@ -88,17 +89,23 @@ The code will be executed with Python 3.12.
   server.tool(
     'run_python_code',
     toolDescription,
-    { python_code: z.string().describe('Python code to run') },
-    async ({ python_code }: { python_code: string }) => {
+    {
+      python_code: z.string().describe('Python code to run'),
+      global_variables: z.record(z.string(), z.any()).default({}).describe(
+        'Map of global variables in context when the code is executed',
+      ),
+    },
+    async ({ python_code, global_variables }: { python_code: string; global_variables: Record<string, any> }) => {
       const logPromises: Promise<void>[] = []
       const result = await runCode.run(
         deps,
-        { name: 'main.py', content: python_code },
         (level, data) => {
           if (LogLevels.indexOf(level) >= LogLevels.indexOf(setLogLevel)) {
             logPromises.push(server.server.sendLoggingMessage({ level, data }))
           }
         },
+        { name: 'main.py', content: python_code },
+        global_variables,
       )
       await Promise.all(logPromises)
       return {
@@ -122,7 +129,6 @@ function httpGetUrl(req: http.IncomingMessage): URL {
 function httpGetBody(req: http.IncomingMessage): Promise<JSON> {
   // https://nodejs.org/en/learn/modules/anatomy-of-an-http-transaction#request-body
   return new Promise((resolve) => {
-    // deno-lint-ignore no-explicit-any
     const bodyParts: any[] = []
     let body
     req.on('data', (chunk) => {
@@ -255,7 +261,6 @@ async function installDeps(deps: string[]) {
   const runCode = new RunCode()
   const result = await runCode.run(
     deps,
-    undefined,
     (level, data) => console.error(`${level}|${data}`),
   )
   if (result.status !== 'success') {
@@ -280,9 +285,9 @@ a
   const runCode = new RunCode()
   const result = await runCode.run(
     deps,
-    { name: 'example.py', content: code },
     // use warn to avoid recursion since console.log is patched in runCode
     (level, data) => console.warn(`${level}: ${data}`),
+    { name: 'example.py', content: code },
   )
   console.log('Tool return value:')
   console.log(asXml(result))
diff --git a/mcp_run_python/deno/src/runCode.ts b/mcp_run_python/deno/src/runCode.ts
@@ -23,8 +23,9 @@ export class RunCode {
 
   async run(
     dependencies: string[],
-    file: CodeFile | undefined,
     log: (level: LoggingLevel, data: string) => void,
+    file?: CodeFile,
+    globals?: Record<string, any>,
   ): Promise<RunSuccess | RunError> {
     // remove once we can upgrade to pyodide 0.27.7 and console.log is no longer used.
     const realConsoleLog = console.log
@@ -60,7 +61,7 @@ export class RunCode {
     } else if (file) {
       try {
         const rawValue = await pyodide.runPythonAsync(file.content, {
-          globals: pyodide.toPy({ __name__: '__main__' }),
+          globals: pyodide.toPy({ ...(globals || {}), __name__: '__main__' }),
           filename: file.name,
         })
         runResult = {
diff --git a/tests/test_mcp_servers.py b/tests/test_mcp_servers.py
@@ -82,7 +82,15 @@ async def test_list_tools(run_mcp_session: Callable[[list[str]], AbstractAsyncCo
         assert tool.description
         assert tool.description.startswith('Tool to execute Python code and return stdout, stderr, and return value.')
         assert tool.inputSchema['properties'] == snapshot(
-            {'python_code': {'type': 'string', 'description': 'Python code to run'}}
+            {
+                'python_code': {'type': 'string', 'description': 'Python code to run'},
+                'global_variables': {
+                    'type': 'object',
+                    'additionalProperties': {},
+                    'default': {},
+                    'description': 'Map of global variables in context when the code is executed',
+                },
+            }
         )
 
 
diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py
@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 from typing import Any
 
 import pytest
@@ -8,24 +9,62 @@
 pytestmark = pytest.mark.anyio
 
 
+@dataclass
+class Foobar:
+    a: int
+    b: str
+    c: bytes
+
+
 @pytest.mark.parametrize(
-    'deps,code,expected',
+    'deps,code,locals,expected',
     [
         pytest.param(
             [],
             'a = 1\na + 1',
+            {},
             snapshot({'status': 'success', 'output': [], 'return_value': 2}),
             id='return-value-success',
         ),
         pytest.param(
             [],
             'print(123)',
+            {},
             snapshot({'status': 'success', 'output': ['123'], 'return_value': None}),
             id='print-success',
         ),
+        pytest.param(
+            [],
+            'a',
+            {'a': [1, 2, 3]},
+            snapshot({'status': 'success', 'output': [], 'return_value': [1, 2, 3]}),
+            id='access-local-variables',
+        ),
+        pytest.param(
+            [],
+            'a + b',
+            {'a': 4, 'b': 5},
+            snapshot({'status': 'success', 'output': [], 'return_value': 9}),
+            id='multiple-locals',
+        ),
+        pytest.param(
+            [],
+            'print(f)',
+            {'f': Foobar(1, '2', b'3')},
+            snapshot({'status': 'success', 'output': ["{'a': 1, 'b': '2', 'c': '3'}"], 'return_value': None}),
+            id='print-complex-local',
+        ),
+        pytest.param(
+            [],
+            'f',
+            {'f': Foobar(1, '2', b'3')},
+            snapshot({'status': 'success', 'output': [], 'return_value': {'a': 1, 'b': '2', 'c': '3'}}),
+            id='return-complex-local',
+        ),
         pytest.param(
             [],
             'print(unknown)',
+            {},
             snapshot(
                 {
                     'status': 'run-error',
@@ -44,14 +83,15 @@
         pytest.param(
             ['numpy'],
             'import numpy\nnumpy.array([1, 2, 3])',
+            {},
             snapshot({'status': 'success', 'output': [], 'return_value': [1, 2, 3]}),
             id='return-numpy-success',
         ),
     ],
 )
-async def test_sandbox(deps: list[str], code: str, expected: Any):
+async def test_sandbox(deps: list[str], code: str, locals: dict[str, Any], expected: Any):
     async with code_sandbox(dependencies=deps) as sandbox:
-        result = await sandbox.eval(code)
+        result = await sandbox.eval(code, locals)
         assert result == expected