From cd03fc5a5b4fdc1e50e330ce408bf70390d0c5b5 Mon Sep 17 00:00:00 2001 From: Hiran Wijesinghe Date: Tue, 18 Jun 2024 11:49:56 -0400 Subject: [PATCH] Parse array slices without `eval()` (#752) * slice_ without eval * rebased on main and updated CHANGELOG * fixed typo in test_slicer data * back compat fix for py<3.10 * improved slice regex pattern * add slice fastapi integration test * add cr suggestions * Improve clairty of CHANGELOG --------- Co-authored-by: Dan Allan --- CHANGELOG.md | 7 ++ tiled/_tests/test_slicer.py | 136 +++++++++++++++++++++++++++++++++++ tiled/client/array.py | 2 +- tiled/server/dependencies.py | 31 ++++---- 4 files changed, 161 insertions(+), 15 deletions(-) create mode 100644 tiled/_tests/test_slicer.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 224939cf3..31765d583 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,13 @@ Write the date in place of the "Unreleased" in the case a new version is release - Minor implementation changes were necessary to make Tiled compatible with Numpy 2.0. +- For improved security, the server-side array slicing function has been + refactored to avoid using `eval()`. To be clear: there were no known + exploitable vulnerabilities in the `eval()` approach. The input was validated + against a regular expression before being passed to `eval()`. However, + avoiding `eval()` altogether is better practice for defense-in-depth against + potential code injection attacks due to current or future bugs in Tiled or + its upstream dependencies. ## v0.1.0b3 (2024-06-04) diff --git a/tiled/_tests/test_slicer.py b/tiled/_tests/test_slicer.py new file mode 100644 index 000000000..5ad419d3c --- /dev/null +++ b/tiled/_tests/test_slicer.py @@ -0,0 +1,136 @@ +import pathlib + +import pytest +from fastapi import Query +from starlette.status import HTTP_422_UNPROCESSABLE_ENTITY + +from tiled.catalog import in_memory +from tiled.client import Context, from_context +from tiled.server.app import build_app +from tiled.server.dependencies import slice_ + + +@pytest.fixture(scope="module") +def module_tmp_path(tmp_path_factory: pytest.TempdirFactory) -> pathlib.Path: + return tmp_path_factory.mktemp("temp") + + +@pytest.fixture(scope="module") +def client(module_tmp_path): + catalog = in_memory(writable_storage=module_tmp_path) + app = build_app(catalog) + with Context.from_app(app) as context: + client = from_context(context) + client.write_array([1], key="x") + yield client + + +slice_test_data = [ + "", + ":", + "::", + "0", + "0:", + "0::", + ":0", + "::0", + "5:", + ":10", + "::12", + "-1", + "-5:", + ":-5", + "::-45", + "3:5", + "5:3", + "123::4", + "5::678", + ":123:4", + ":5:678", + ",", + ",,", + ",:", + ":,::", + ",,:,::,,::,:,,::,", + "0,1,2", + "5:,:10,::-5", + "1:2:3,4:5:6,7:8:9", + "10::20,30::40,50::60", + "1 : 2", + "1:2, 3", + "1 ,2:3", + "1 , 2 , 3", +] + +slice_typo_data = [ + ":::", + "1:2:3:4", + "1:2,3:4:5:6", +] + +slice_malicious_data = [ + "1:(2+3)", + "1**2", + "print('oh so innocent')", + "; print('oh so innocent')", + ")\"; print('oh so innocent')", + "1:2)\"; print('oh so innocent')", + "1:2)\";print('oh_so_innocent')", + "import sys; sys.exit()", + "; import sys; sys.exit()", + "touch /tmp/x", + "rm -rf /tmp/*", +] + + +# this is the outgoing slice_ function from tiled.server.dependencies as is +def reference_slice_( + slice: str = Query(None, pattern="^[-0-9,:]*$"), +): + "Specify and parse a block index parameter." + import numpy + + # IMPORTANT We are eval-ing a user-provider string here so we need to be + # very careful about locking down what can be in it. The regex above + # excludes any letters or operators, so it is not possible to execute + # functions or expensive arithmetic. + return tuple( + [ + eval(f"numpy.s_[{dim!s}]", {"numpy": numpy}) + for dim in (slice or "").split(",") + if dim + ] + ) + + +@pytest.mark.parametrize("slice", slice_test_data) +def test_slicer(slice: str): + """ + Test the slicer function + """ + assert slice_(slice) == reference_slice_(slice) + + +@pytest.mark.parametrize("slice", slice_typo_data) +def test_slicer_typo_data(slice: str): + """ + Test the slicer function with invalid input + """ + with pytest.raises(TypeError): + _ = slice_(slice) + + +@pytest.mark.parametrize("slice", slice_malicious_data) +def test_slicer_malicious_exec(slice: str): + """ + Test the slicer function with 'malicious' input + """ + with pytest.raises(ValueError): + _ = slice_(slice) + + +@pytest.mark.parametrize("slice_", slice_typo_data + slice_malicious_data) +def test_slicer_fastapi_query_rejection(slice_, client): + http_client = client.context.http_client + response = http_client.get(f"/api/v1/array/block/x?block=0&slice={slice_}") + assert response.status_code == HTTP_422_UNPROCESSABLE_ENTITY diff --git a/tiled/client/array.py b/tiled/client/array.py index c0a89bec6..edfdffa5d 100644 --- a/tiled/client/array.py +++ b/tiled/client/array.py @@ -125,7 +125,7 @@ def read_block(self, block, slice=None): def read(self, slice=None): """ - Acess the entire array or a slice. + Access the entire array or a slice. The array will be internally chunked with dask. """ diff --git a/tiled/server/dependencies.py b/tiled/server/dependencies.py index ccca20785..f13c676fc 100644 --- a/tiled/server/dependencies.py +++ b/tiled/server/dependencies.py @@ -17,6 +17,12 @@ from .core import NoEntry from .utils import filter_for_access, record_timing +# saving slice() to rescue after using "slice" for FastAPI dependency injection of slice_(slice: str) +slice_func = slice + +DIM_REGEX = r"(?:(?:-?\d+)?:){0,2}(?:-?\d+)?" +SLICE_REGEX = rf"^{DIM_REGEX}(?:,{DIM_REGEX})*$" + @lru_cache(1) def get_query_registry(): @@ -159,20 +165,17 @@ def expected_shape( return tuple(map(int, expected_shape.split(","))) +def np_style_slicer(indices: tuple): + return indices[0] if len(indices) == 1 else slice_func(*indices) + + +def parse_slice_str(dim: str): + return np_style_slicer(tuple(int(idx) if idx else None for idx in dim.split(":"))) + + def slice_( - slice: str = Query(None, pattern="^[-0-9,:]*$"), + slice: Optional[str] = Query(None, pattern=SLICE_REGEX), ): "Specify and parse a block index parameter." - import numpy - - # IMPORTANT We are eval-ing a user-provider string here so we need to be - # very careful about locking down what can be in it. The regex above - # excludes any letters or operators, so it is not possible to execute - # functions or expensive arithmetic. - return tuple( - [ - eval(f"numpy.s_[{dim!s}]", {"numpy": numpy}) - for dim in (slice or "").split(",") - if dim - ] - ) + + return tuple(parse_slice_str(dim) for dim in (slice or "").split(",") if dim)