Skip to content

Commit

Permalink
Parse array slices without eval(<user_input>) (bluesky#752)
Browse files Browse the repository at this point in the history
* slice_ without eval

* rebased on main and updated CHANGELOG

* fixed typo in test_slicer data

* back compat fix for py<3.10

* improved slice regex pattern

* add slice fastapi integration test

* add cr suggestions

* Improve clairty of CHANGELOG

---------

Co-authored-by: Dan Allan <dallan@bnl.gov>
  • Loading branch information
hyperrealist and danielballan authored Jun 18, 2024
1 parent bc1926f commit cd03fc5
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 15 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ Write the date in place of the "Unreleased" in the case a new version is release

- Minor implementation changes were necessary to make Tiled compatible with
Numpy 2.0.
- For improved security, the server-side array slicing function has been
refactored to avoid using `eval()`. To be clear: there were no known
exploitable vulnerabilities in the `eval()` approach. The input was validated
against a regular expression before being passed to `eval()`. However,
avoiding `eval()` altogether is better practice for defense-in-depth against
potential code injection attacks due to current or future bugs in Tiled or
its upstream dependencies.

## v0.1.0b3 (2024-06-04)

Expand Down
136 changes: 136 additions & 0 deletions tiled/_tests/test_slicer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import pathlib

import pytest
from fastapi import Query
from starlette.status import HTTP_422_UNPROCESSABLE_ENTITY

from tiled.catalog import in_memory
from tiled.client import Context, from_context
from tiled.server.app import build_app
from tiled.server.dependencies import slice_


@pytest.fixture(scope="module")
def module_tmp_path(tmp_path_factory: pytest.TempdirFactory) -> pathlib.Path:
return tmp_path_factory.mktemp("temp")


@pytest.fixture(scope="module")
def client(module_tmp_path):
catalog = in_memory(writable_storage=module_tmp_path)
app = build_app(catalog)
with Context.from_app(app) as context:
client = from_context(context)
client.write_array([1], key="x")
yield client


slice_test_data = [
"",
":",
"::",
"0",
"0:",
"0::",
":0",
"::0",
"5:",
":10",
"::12",
"-1",
"-5:",
":-5",
"::-45",
"3:5",
"5:3",
"123::4",
"5::678",
":123:4",
":5:678",
",",
",,",
",:",
":,::",
",,:,::,,::,:,,::,",
"0,1,2",
"5:,:10,::-5",
"1:2:3,4:5:6,7:8:9",
"10::20,30::40,50::60",
"1 : 2",
"1:2, 3",
"1 ,2:3",
"1 , 2 , 3",
]

slice_typo_data = [
":::",
"1:2:3:4",
"1:2,3:4:5:6",
]

slice_malicious_data = [
"1:(2+3)",
"1**2",
"print('oh so innocent')",
"; print('oh so innocent')",
")\"; print('oh so innocent')",
"1:2)\"; print('oh so innocent')",
"1:2)\";print('oh_so_innocent')",
"import sys; sys.exit()",
"; import sys; sys.exit()",
"touch /tmp/x",
"rm -rf /tmp/*",
]


# this is the outgoing slice_ function from tiled.server.dependencies as is
def reference_slice_(
slice: str = Query(None, pattern="^[-0-9,:]*$"),
):
"Specify and parse a block index parameter."
import numpy

# IMPORTANT We are eval-ing a user-provider string here so we need to be
# very careful about locking down what can be in it. The regex above
# excludes any letters or operators, so it is not possible to execute
# functions or expensive arithmetic.
return tuple(
[
eval(f"numpy.s_[{dim!s}]", {"numpy": numpy})
for dim in (slice or "").split(",")
if dim
]
)


@pytest.mark.parametrize("slice", slice_test_data)
def test_slicer(slice: str):
"""
Test the slicer function
"""
assert slice_(slice) == reference_slice_(slice)


@pytest.mark.parametrize("slice", slice_typo_data)
def test_slicer_typo_data(slice: str):
"""
Test the slicer function with invalid input
"""
with pytest.raises(TypeError):
_ = slice_(slice)


@pytest.mark.parametrize("slice", slice_malicious_data)
def test_slicer_malicious_exec(slice: str):
"""
Test the slicer function with 'malicious' input
"""
with pytest.raises(ValueError):
_ = slice_(slice)


@pytest.mark.parametrize("slice_", slice_typo_data + slice_malicious_data)
def test_slicer_fastapi_query_rejection(slice_, client):
http_client = client.context.http_client
response = http_client.get(f"/api/v1/array/block/x?block=0&slice={slice_}")
assert response.status_code == HTTP_422_UNPROCESSABLE_ENTITY
2 changes: 1 addition & 1 deletion tiled/client/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def read_block(self, block, slice=None):

def read(self, slice=None):
"""
Acess the entire array or a slice.
Access the entire array or a slice.
The array will be internally chunked with dask.
"""
Expand Down
31 changes: 17 additions & 14 deletions tiled/server/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
from .core import NoEntry
from .utils import filter_for_access, record_timing

# saving slice() to rescue after using "slice" for FastAPI dependency injection of slice_(slice: str)
slice_func = slice

DIM_REGEX = r"(?:(?:-?\d+)?:){0,2}(?:-?\d+)?"
SLICE_REGEX = rf"^{DIM_REGEX}(?:,{DIM_REGEX})*$"


@lru_cache(1)
def get_query_registry():
Expand Down Expand Up @@ -159,20 +165,17 @@ def expected_shape(
return tuple(map(int, expected_shape.split(",")))


def np_style_slicer(indices: tuple):
return indices[0] if len(indices) == 1 else slice_func(*indices)


def parse_slice_str(dim: str):
return np_style_slicer(tuple(int(idx) if idx else None for idx in dim.split(":")))


def slice_(
slice: str = Query(None, pattern="^[-0-9,:]*$"),
slice: Optional[str] = Query(None, pattern=SLICE_REGEX),
):
"Specify and parse a block index parameter."
import numpy

# IMPORTANT We are eval-ing a user-provider string here so we need to be
# very careful about locking down what can be in it. The regex above
# excludes any letters or operators, so it is not possible to execute
# functions or expensive arithmetic.
return tuple(
[
eval(f"numpy.s_[{dim!s}]", {"numpy": numpy})
for dim in (slice or "").split(",")
if dim
]
)

return tuple(parse_slice_str(dim) for dim in (slice or "").split(",") if dim)

0 comments on commit cd03fc5

Please sign in to comment.