Skip to content

Commit

Permalink
Fix specs configuration and add 'reject_undeclared_specs'.
Browse files Browse the repository at this point in the history
  • Loading branch information
danielballan committed Jan 31, 2023
1 parent bf5368c commit f92b158
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 50 deletions.
60 changes: 42 additions & 18 deletions tiled/_tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import numpy as np
import pandas as pd

from ..client import from_config, from_tree
from ..validation_registration import ValidationError, ValidationRegistry
from ..client import from_config
from ..config import merge
from ..validation_registration import ValidationError
from .utils import fail_with_status_code
from .writable_adapters import WritableMapAdapter

Expand Down Expand Up @@ -43,33 +44,42 @@ def validate_foo(metadata, structure_family, structure, spec, references):
return metadata


tree = WritableMapAdapter({})


def test_validators():

validation_registry = ValidationRegistry()
validation_registry.register("foo", validate_foo)
config = {
"trees": [{"tree": f"{__name__}:tree", "path": "/"}],
"specs": [{"spec": "foo", "validator": f"{__name__}:validate_foo"}],
"authentication": {"single_user_api_key": API_KEY},
}
# Check that specs propagate correctly through merging configs.
merged_config = merge({"filepath_placeholder": config})
assert merged_config["specs"]
client = from_config(merged_config)

tree = WritableMapAdapter({})
client = from_tree(
tree,
api_key=API_KEY,
authentication={"single_user_api_key": API_KEY},
validation_registry=validation_registry,
)
# valid example
df = pd.DataFrame({"a": np.zeros(10), "b": np.zeros(10)})
client.write_dataframe(df, metadata={"foo": 1}, specs=["foo"])

with fail_with_status_code(400):
# not expected structure family
a = np.ones((5, 7))
client.write_array(a, metadata={}, specs=["foo"])

with fail_with_status_code(400):
df = pd.DataFrame({"x": np.zeros(100), "y": np.zeros(100)})
# column names are not expected
df = pd.DataFrame({"x": np.zeros(10), "y": np.zeros(10)})
client.write_dataframe(df, metadata={}, specs=["foo"])

with fail_with_status_code(400):
df = pd.DataFrame({"a": np.zeros(100), "b": np.zeros(100)})
# missing expected metadata
df = pd.DataFrame({"a": np.zeros(10), "b": np.zeros(10)})
client.write_dataframe(df, metadata={}, specs=["foo"])

metadata = {"id": 1, "foo": "bar"}
df = pd.DataFrame({"a": np.zeros(100), "b": np.zeros(100)})
df = pd.DataFrame({"a": np.zeros(10), "b": np.zeros(10)})
result = client.write_dataframe(df, metadata=metadata, specs=["foo"])
assert result.metadata == metadata
result_df = result.read()
Expand All @@ -83,19 +93,33 @@ def test_validators():
pd.testing.assert_frame_equal(result_df, df)


tree = WritableMapAdapter({})


def test_unknown_spec():
def test_unknown_spec_strict():
"Test unknown spec rejected for upload."
config = {
"trees": [{"tree": f"{__name__}:tree", "path": "/"}],
"specs": [{"spec": "a"}],
"authentication": {"single_user_api_key": API_KEY},
"reject_undeclared_specs": True,
}
client = from_config(config, api_key=API_KEY)
a = np.ones((5, 7))
client.write_array(a, metadata={}, specs=["a"])

with fail_with_status_code(400):
# unknown spec 'b' should be rejected
client.write_array(a, metadata={}, specs=["b"])


def test_unknown_spec_permissive():
"Test unknown spec rejected for upload."
config = {
"trees": [{"tree": f"{__name__}:tree", "path": "/"}],
"specs": [{"spec": "a"}],
"authentication": {"single_user_api_key": API_KEY},
# "reject_undeclared_specs": false, # the default
}
client = from_config(config, api_key=API_KEY)
a = np.ones((5, 7))
client.write_array(a, metadata={}, specs=["a"])
# unknown spec 'b' should be accepted
client.write_array(a, metadata={}, specs=["b"])
20 changes: 16 additions & 4 deletions tiled/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,9 @@ def construct_build_app_kwargs(
"response_bytesize_limit"
)
server_settings["database"] = config.get("database", {})
server_settings["reject_undeclared_specs"] = config.get(
"reject_undeclared_specs"
)
server_settings["metrics"] = config.get("metrics", {})
for structure_family, values in config.get("media_types", {}).items():
for media_type, import_path in values.items():
Expand Down Expand Up @@ -216,16 +219,15 @@ def merge(configs):
response_bytesize_limit_config_source = None
allow_origins = []
media_types = defaultdict(dict)
validation = {}
specs = []
reject_undeclared_specs_source = None
file_extensions = {}
paths = {} # map each item's path to config file that specified it

for filepath, config in configs.items():
for structure_family, values in config.get("media_types", {}).items():
media_types[structure_family].update(values)

validation.update(config.get("validation", {}))

file_extensions.update(config.get("file_extensions", {}))
allow_origins.extend(config.get("allow_origins", []))
if "access_control" in config:
Expand Down Expand Up @@ -291,6 +293,15 @@ def merge(configs):
)
database_config_source = filepath
merged["database"] = config["database"]
if "reject_undeclared_specs" in config:
if "reject_undeclared_specs" in merged:
raise ConfigError(
"'reject_undeclared_specs' can only be specified in one file. "
f"It was found in both {reject_undeclared_specs_source} and "
f"{filepath}"
)
reject_undeclared_specs_source = filepath
merged["reject_undeclared_specs"] = config["reject_undeclared_specs"]
for item in config.get("trees", []):
if item["path"] in paths:
msg = "A given path may be only be specified once."
Expand All @@ -302,10 +313,11 @@ def merge(configs):
raise ConfigError(msg)
paths[item["path"]] = filepath
merged["trees"].append(item)
specs.extend(config.get("specs", []))
merged["media_types"] = dict(media_types) # convert from defaultdict
merged["file_extensions"] = file_extensions
merged["allow_origins"] = allow_origins
merged["validation"] = validation
merged["specs"] = specs
return merged


Expand Down
6 changes: 6 additions & 0 deletions tiled/config_schemas/service_configuration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -481,3 +481,9 @@ properties:
success or raise a `tiled.validation_registration.ValidationError`
exception to indicate failure. If a metadata object is returned it
will be sent back to client in the response to the POST.
reject_undeclared_specs:
type: boolean
description: |
If true, any data uploaded with a "spec" that is not declared in this
configuration file will be rejected. By default, this is set to false
---i.e., permissive.
6 changes: 5 additions & 1 deletion tiled/server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,11 @@ def override_get_settings():
]:
if authentication.get(item) is not None:
setattr(settings, item, authentication[item])
for item in ["allow_origins", "response_bytesize_limit"]:
for item in [
"allow_origins",
"response_bytesize_limit",
"reject_undeclared_specs",
]:
if server_settings.get(item) is not None:
setattr(settings, item, server_settings[item])
database = server_settings.get("database", {})
Expand Down
65 changes: 38 additions & 27 deletions tiled/server/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,7 @@ def post_metadata(
path: str,
body: schemas.PostMetadataRequest,
validation_registry=Depends(get_validation_registry),
settings: BaseSettings = Depends(get_settings),
entry=SecureEntry(scopes=["write:metadata", "create"]),
):
if not hasattr(entry, "post_metadata"):
Expand Down Expand Up @@ -627,19 +628,24 @@ def post_metadata(
# won't step on each other in this way, but this may need revisiting.
for spec in reversed(specs):
if spec.name not in validation_registry:
raise HTTPException(
status_code=400, detail=f"Unrecognized spec: {spec.name}"
)
validator = validation_registry(spec.name)
try:
result = validator(metadata, structure_family, structure, spec, references)
except ValidationError as e:
raise HTTPException(
status_code=400, detail=f"failed validation for spec {spec.name}:\n{e}"
)
if result is not None:
metadata_modified = True
metadata = result
if settings.reject_undeclared_specs:
raise HTTPException(
status_code=400, detail=f"Unrecognized spec: {spec.name}"
)
else:
validator = validation_registry(spec.name)
try:
result = validator(
metadata, structure_family, structure, spec, references
)
except ValidationError as e:
raise HTTPException(
status_code=400,
detail=f"failed validation for spec {spec.name}:\n{e}",
)
if result is not None:
metadata_modified = True
metadata = result

key = entry.post_metadata(
metadata=body.metadata,
Expand Down Expand Up @@ -764,6 +770,7 @@ async def put_metadata(
request: Request,
body: schemas.PutMetadataRequest,
validation_registry=Depends(get_validation_registry),
settings: BaseSettings = Depends(get_settings),
entry=SecureEntry(scopes=["write:metadata"]),
):
if not hasattr(entry, "put_metadata"):
Expand Down Expand Up @@ -791,20 +798,24 @@ async def put_metadata(
# won't step on each other in this way, but this may need revisiting.
for spec in reversed(specs):
if spec.name not in validation_registry:
raise HTTPException(
status_code=400, detail=f"Unrecognized spec: {spec.name}"
)
validator = validation_registry(spec.name)
try:
result = validator(metadata, structure_family, structure, spec, references)
except ValidationError as e:
raise HTTPException(
status_code=400,
detail=f"failed validation for spec {spec.name}:\n{e}",
)
if result is not None:
metadata_modified = True
metadata = result
if settings.reject_undeclared_specs:
raise HTTPException(
status_code=400, detail=f"Unrecognized spec: {spec.name}"
)
else:
validator = validation_registry(spec.name)
try:
result = validator(
metadata, structure_family, structure, spec, references
)
except ValidationError as e:
raise HTTPException(
status_code=400,
detail=f"failed validation for spec {spec.name}:\n{e}",
)
if result is not None:
metadata_modified = True
metadata = result

entry.put_metadata(metadata=metadata, specs=specs, references=references)

Expand Down
1 change: 1 addition & 0 deletions tiled/server/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class Settings(BaseSettings):
response_bytesize_limit = int(
os.getenv("TILED_RESPONSE_BYTESIZE_LIMIT", 300_000_000)
) # 300 MB
reject_undeclared_specs = bool(int(os.getenv("TILED_REJECT_UNDECLARED_SPECS", 0)))
database_uri: Optional[str] = os.getenv("TILED_DATABASE_URI")
database_pool_size: Optional[int] = int(os.getenv("TILED_DATABASE_POOL_SIZE", 5))
database_pool_pre_ping: Optional[bool] = bool(
Expand Down

0 comments on commit f92b158

Please sign in to comment.