-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(runtime): Execute codes in a sandbox environment (#2119)
- Loading branch information
Showing
22 changed files
with
868 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,322 @@ | ||
"""Code operators for DB-GPT. | ||
The code will be executed in a sandbox environment, which is isolated from the host | ||
system. You can limit the memory and file system access of the code execution. | ||
""" | ||
|
||
import json | ||
import logging | ||
import os | ||
|
||
from dbgpt.core import ModelRequest | ||
from dbgpt.core.awel import MapOperator | ||
from dbgpt.core.awel.flow import ( | ||
TAGS_ORDER_HIGH, | ||
IOField, | ||
OperatorCategory, | ||
OptionValue, | ||
Parameter, | ||
ViewMetadata, | ||
ui, | ||
) | ||
from dbgpt.util.code.server import get_code_server | ||
from dbgpt.util.i18n_utils import _ | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
_FN_PYTHON_MAP = """ | ||
import os | ||
import json | ||
import lyric_task | ||
from lyric_py_task.imports import msgpack | ||
def fn_map(args: dict[str, any]) -> dict[str, any]: | ||
text = args.get("text") | ||
return { | ||
"text": text, | ||
"key0": "customized key", | ||
"key1": "hello, world", | ||
"key2": [1, 2, 3], | ||
"key3": {"a": 1, "b": 2}, | ||
} | ||
""" | ||
|
||
_FN_JAVASCRIPT_MAP = """ | ||
function fn_map(args) { | ||
var text = args.text; | ||
return { | ||
text: text, | ||
key0: "customized key", | ||
key1: "hello, world", | ||
key2: [1, 2, 3], | ||
key3: {a: 1, b: 2}, | ||
}; | ||
} | ||
""" | ||
|
||
|
||
class CodeMapOperator(MapOperator[dict, dict]): | ||
metadata = ViewMetadata( | ||
label=_("Code Map Operator"), | ||
name="default_code_map_operator", | ||
description=_( | ||
"Handle input dictionary with code and return output dictionary after execution." | ||
), | ||
category=OperatorCategory.CODE, | ||
parameters=[ | ||
Parameter.build_from( | ||
_("Code Editor"), | ||
"code", | ||
type=str, | ||
optional=True, | ||
default=_FN_PYTHON_MAP, | ||
placeholder=_("Please input your code"), | ||
description=_("The code to be executed."), | ||
ui=ui.UICodeEditor( | ||
language="python", | ||
), | ||
), | ||
Parameter.build_from( | ||
_("Language"), | ||
"lang", | ||
type=str, | ||
optional=True, | ||
default="python", | ||
placeholder=_("Please select the language"), | ||
description=_("The language of the code."), | ||
options=[ | ||
OptionValue(label="Python", name="python", value="python"), | ||
OptionValue( | ||
label="JavaScript", name="javascript", value="javascript" | ||
), | ||
], | ||
ui=ui.UISelect(), | ||
), | ||
Parameter.build_from( | ||
_("Call Name"), | ||
"call_name", | ||
type=str, | ||
optional=True, | ||
default="fn_map", | ||
placeholder=_("Please input the call name"), | ||
description=_("The call name of the function."), | ||
), | ||
], | ||
inputs=[ | ||
IOField.build_from( | ||
_("Input Data"), | ||
"input", | ||
type=dict, | ||
description=_("The input dictionary."), | ||
) | ||
], | ||
outputs=[ | ||
IOField.build_from( | ||
_("Output Data"), | ||
"output", | ||
type=dict, | ||
description=_("The output dictionary."), | ||
) | ||
], | ||
tags={"order": TAGS_ORDER_HIGH}, | ||
) | ||
|
||
def __init__( | ||
self, | ||
code: str = _FN_PYTHON_MAP, | ||
lang: str = "python", | ||
call_name: str = "fn_map", | ||
**kwargs, | ||
): | ||
super().__init__(**kwargs) | ||
self.code = code | ||
self.lang = lang | ||
self.call_name = call_name | ||
|
||
async def map(self, input_value: dict) -> dict: | ||
exec_input_data_bytes = json.dumps(input_value).encode("utf-8") | ||
code_server = await get_code_server() | ||
result = await code_server.exec1( | ||
self.code, exec_input_data_bytes, call_name=self.call_name, lang=self.lang | ||
) | ||
logger.info(f"Code execution result: {result}") | ||
return result.output | ||
|
||
|
||
_REQ_BUILD_PY_FUNC = """ | ||
import os | ||
def fn_map(args: dict[str, any]) -> dict[str, any]: | ||
llm_model = args.get("model", os.getenv("DBGPT_RUNTIME_LLM_MODEL")) | ||
messages: str | list[str] = args.get("messages", []) | ||
if isinstance(messages, str): | ||
human_message = messages | ||
else: | ||
human_message = messages[0] | ||
temperature = float(args.get("temperature") or 0.5) | ||
max_new_tokens = int(args.get("max_new_tokens") or 2048) | ||
conv_uid = args.get("conv_uid", "") | ||
print("Conv uid is: ", conv_uid) | ||
messages = [ | ||
{"role": "system", "content": "You are a helpful assistant."}, | ||
{"role": "human", "content": human_message} | ||
] | ||
return { | ||
"model": llm_model, | ||
"messages": messages, | ||
"temperature": temperature, | ||
"max_new_tokens": max_new_tokens | ||
} | ||
""" | ||
|
||
_REQ_BUILD_JS_FUNC = """ | ||
function fn_map(args) { | ||
var llm_model = args.model || "chatgpt_proxyllm"; | ||
var messages = args.messages || []; | ||
var human_message = messages[0]; | ||
var temperature = parseFloat(args.temperature) || 0.5; | ||
var max_new_tokens = parseInt(args.max_new_tokens) || 2048; | ||
var conv_uid = args.conv_uid || ""; | ||
console.log("Conv uid is: ", conv_uid); | ||
messages = [ | ||
{"role": "system", "content": "You are a helpful assistant."}, | ||
{"role": "human", "content": human_message} | ||
]; | ||
return { | ||
model: llm_model, | ||
messages: messages, | ||
temperature: temperature, | ||
max_new_tokens: max_new_tokens | ||
}; | ||
} | ||
""" | ||
|
||
|
||
class CodeDictToModelRequestOperator(MapOperator[dict, ModelRequest]): | ||
metadata = ViewMetadata( | ||
label=_("Code Dict to Model Request Operator"), | ||
name="default_code_dict_to_model_request_operator", | ||
description=_( | ||
"Handle input dictionary with code and return output ModelRequest after execution." | ||
), | ||
category=OperatorCategory.CODE, | ||
parameters=[ | ||
Parameter.build_from( | ||
_("Code Editor"), | ||
"code", | ||
type=str, | ||
optional=True, | ||
default=_REQ_BUILD_PY_FUNC, | ||
placeholder=_("Please input your code"), | ||
description=_("The code to be executed."), | ||
ui=ui.UICodeEditor( | ||
language="python", | ||
), | ||
), | ||
Parameter.build_from( | ||
_("Language"), | ||
"lang", | ||
type=str, | ||
optional=True, | ||
default="python", | ||
placeholder=_("Please select the language"), | ||
description=_("The language of the code."), | ||
options=[ | ||
OptionValue(label="Python", name="python", value="python"), | ||
OptionValue( | ||
label="JavaScript", name="javascript", value="javascript" | ||
), | ||
], | ||
ui=ui.UISelect(), | ||
), | ||
Parameter.build_from( | ||
_("Call Name"), | ||
"call_name", | ||
type=str, | ||
optional=True, | ||
default="fn_map", | ||
placeholder=_("Please input the call name"), | ||
description=_("The call name of the function."), | ||
), | ||
], | ||
inputs=[ | ||
IOField.build_from( | ||
_("Input Data"), | ||
"input", | ||
type=dict, | ||
description=_("The input dictionary."), | ||
) | ||
], | ||
outputs=[ | ||
IOField.build_from( | ||
_("Output Data"), | ||
"output", | ||
type=ModelRequest, | ||
description=_("The output ModelRequest."), | ||
) | ||
], | ||
tags={"order": TAGS_ORDER_HIGH}, | ||
) | ||
|
||
def __init__( | ||
self, | ||
code: str = _REQ_BUILD_PY_FUNC, | ||
lang: str = "python", | ||
call_name: str = "fn_map", | ||
**kwargs, | ||
): | ||
super().__init__(**kwargs) | ||
self.code = code | ||
self.lang = lang | ||
self.call_name = call_name | ||
|
||
async def map(self, input_value: dict) -> ModelRequest: | ||
from lyric import PyTaskFsConfig, PyTaskMemoryConfig, PyTaskResourceConfig | ||
|
||
exec_input_data_bytes = json.dumps(input_value).encode("utf-8") | ||
code_server = await get_code_server() | ||
model_name = os.getenv("LLM_MODEL") | ||
|
||
fs = PyTaskFsConfig( | ||
preopens=[ | ||
# Mount the /tmp directory to the /tmp directory in the sandbox | ||
# Directory permissions are set to 3 (read and write) | ||
# File permissions are set to 3 (read and write) | ||
("/tmp", "/tmp", 3, 3), | ||
# Mount the current directory to the /home directory in the sandbox | ||
# Directory and file permissions are set to 1 (read) | ||
(".", "/home", 1, 1), | ||
] | ||
) | ||
memory = PyTaskMemoryConfig(memory_limit=50 * 1024 * 1024) # 50MB in bytes | ||
resources = PyTaskResourceConfig( | ||
fs=fs, | ||
memory=memory, | ||
env_vars=[ | ||
("DBGPT_RUNTIME_LLM_MODEL", model_name), | ||
], | ||
) | ||
result = await code_server.exec1( | ||
self.code, | ||
exec_input_data_bytes, | ||
call_name=self.call_name, | ||
lang=self.lang, | ||
resources=resources, | ||
) | ||
logger.info(f"Code execution result: {result}") | ||
if result.exit_code != 0: | ||
raise RuntimeError(f"Code execution failed: {result.logs}") | ||
|
||
if not result.output: | ||
raise RuntimeError(f"Code execution failed: {result.logs}") | ||
|
||
if not isinstance(result.output, dict): | ||
raise RuntimeError( | ||
f"Code execution failed, invalid output: {result.output}" | ||
) | ||
logger.info(f"Code execution result: {result}") | ||
return ModelRequest(**result.output) |
Oops, something went wrong.