Skip to content

Commit

Permalink
Issue #129 add collection whitelist config
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Oct 24, 2023
1 parent 0c4dfbf commit f33904d
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 10 deletions.
13 changes: 6 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,19 @@ All notable changes to this project will be documented in this file.

The format is roughly based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [Current: 0.11.x]
## [Current: 0.12.x]

### Added
- Add (optional) config for collection id whitelisting.
Keep union of all "upstream" collections as default.
([#129](https://github.com/Open-EO/openeo-aggregator/issues/129))

### Changed

## [0.11.x]

- Dockerfile: switch to `python:3.9-slim-bullseye` base image
- Parallelize `/jobs` requests to upstream back-ends ([#28](https://github.com/Open-EO/openeo-aggregator/issues/28))
- Increase timeout on getting batch job logs to 120s ([#128](https://github.com/Open-EO/openeo-aggregator/issues/128))

### Fixed

### Removed


## [0.10.x]

Expand Down
2 changes: 1 addition & 1 deletion src/openeo_aggregator/about.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import sys

__version__ = "0.11.1a1"
__version__ = "0.12.0a1"


def log_version_info():
Expand Down
10 changes: 9 additions & 1 deletion src/openeo_aggregator/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def __init__(self, backends: MultiBackendConnection, config: AggregatorConfig):
self.backends = backends
self._memoizer = memoizer_from_config(config=config, namespace="CollectionCatalog")
self.backends.on_connections_change.add(self._memoizer.invalidate)
self._collection_whitelist: Optional[List[str]] = config.collection_whitelist

def get_all_metadata(self) -> List[dict]:
metadata, internal = self._get_all_metadata_cached()
Expand Down Expand Up @@ -172,7 +173,14 @@ def _get_all_metadata(self) -> Tuple[List[dict], _InternalCollectionMetadata]:
continue
for collection_metadata in backend_collections:
if "id" in collection_metadata:
grouped[collection_metadata["id"]][con.id] = collection_metadata
collection_id = collection_metadata["id"]
if self._collection_whitelist:
if collection_id not in self._collection_whitelist:
_log.debug(f"Skipping non-whitelisted {collection_id=} from {con.id=}")
continue
else:
_log.debug(f"Preserving whitelisted {collection_id=} from {con.id=}")
grouped[collection_id][con.id] = collection_metadata
# TODO: support a trigger to create a collection alias under other name?
else:
# TODO: there must be something seriously wrong with this backend: skip all its results?
Expand Down
3 changes: 3 additions & 0 deletions src/openeo_aggregator/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ class AggregatorConfig(dict):
# TTL for connection caching.
connections_cache_ttl = dict_item(default=5 * 60.0)

# List of collection ids to cover with the aggregator (when None: support union of all upstream collections)
collection_whitelist = dict_item(default=None)

@staticmethod
def from_py_file(path: Union[str, Path]) -> 'AggregatorConfig':
"""Load config from Python file."""
Expand Down
11 changes: 10 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,15 @@ def connections_cache_ttl() -> float:
return 1.0


@pytest.fixture
def config_override() -> dict:
"""Parameterizable fixture to allow per-test config overrides."""
return {}


@pytest.fixture
def base_config(
configured_oidc_providers, zk_client, memoizer_config, connections_cache_ttl
configured_oidc_providers, zk_client, memoizer_config, connections_cache_ttl, config_override: dict
) -> AggregatorConfig:
"""Base config for tests (without any configured backends)."""
conf = AggregatorConfig()
Expand All @@ -123,6 +129,9 @@ def base_config(
conf.partitioned_job_tracking = {
"zk_client": zk_client,
}

conf.update(config_override)

return conf


Expand Down
38 changes: 38 additions & 0 deletions tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,44 @@ def test_collections_links(self, api100, requests_mock, backend1, backend2):
"links": [],
}

@pytest.mark.parametrize(
["config_override", "expected"],
[
({"collection_whitelist": None}, {"S1", "S2", "S3", "S4"}),
({"collection_whitelist": []}, {"S1", "S2", "S3", "S4"}),
({"collection_whitelist": ["S2"]}, {"S2"}),
({"collection_whitelist": ["S4"]}, {"S4"}),
({"collection_whitelist": ["S2", "S3"]}, {"S2", "S3"}),
({"collection_whitelist": ["S2", "S999"]}, {"S2"}),
({"collection_whitelist": ["S999"]}, set()),
],
)
def test_collections_whitelist(self, api100, requests_mock, backend1, backend2, expected):
requests_mock.get(backend1 + "/collections", json={"collections": [{"id": "S1"}, {"id": "S2"}, {"id": "S3"}]})
for cid in ["S1", "S2", "S3"]:
requests_mock.get(backend1 + f"/collections/{cid}", json={"id": cid, "title": f"b1 {cid}"})
requests_mock.get(backend2 + "/collections", json={"collections": [{"id": "S3"}, {"id": "S4"}]})
for cid in ["S3", "S4"]:
requests_mock.get(backend2 + f"/collections/{cid}", json={"id": cid, "title": f"b2 {cid}"})

res = api100.get("/collections").assert_status_code(200).json
assert set(c["id"] for c in res["collections"]) == expected

res = api100.get("/collections/S2")
if "S2" in expected:
assert res.assert_status_code(200).json == DictSubSet({"id": "S2", "title": "b1 S2"})
else:
res.assert_error(404, "CollectionNotFound")

res = api100.get("/collections/S3")
if "S3" in expected:
assert res.assert_status_code(200).json == DictSubSet({"id": "S3", "title": "b1 S3"})
else:
res.assert_error(404, "CollectionNotFound")

res = api100.get("/collections/S999")
res.assert_error(404, "CollectionNotFound")


class TestAuthentication:
def test_credentials_oidc_default(self, api100, backend1, backend2):
Expand Down

0 comments on commit f33904d

Please sign in to comment.