From 2c4aac747c76f534e9fa6b54d439d2584389ef56 Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Wed, 5 Jul 2023 13:03:06 -0400 Subject: [PATCH] Rename 'node' to 'container' and shorten URLs. (#478) * Rename structure family 'node' to 'container', shorten URLs. * Update tree util. * Make test of inlined HFD5 content more thorough. * Update URLs. * Satisfy linters. * Update renamed queries in docs. * Rename client node module and Node class. --- docs/source/explanations/compression.md | 4 +- docs/source/explanations/structures.md | 18 +- docs/source/how-to/api-keys.md | 6 +- docs/source/how-to/client-logger.md | 2 +- docs/source/how-to/custom-clients.md | 30 +- docs/source/reference/authentication.md | 6 +- docs/source/reference/http-api-overview.md | 2 +- docs/source/reference/python-client.md | 28 +- docs/source/reference/queries.md | 9 +- tiled/_tests/test_catalog.py | 10 +- tiled/_tests/test_distinct.py | 4 +- tiled/_tests/test_hdf5.py | 11 +- tiled/_tests/test_pickle.py | 5 + tiled/_tests/test_queries.py | 18 +- tiled/adapters/array.py | 3 +- tiled/adapters/dataframe.py | 3 +- tiled/adapters/hdf5.py | 3 +- tiled/adapters/mapping.py | 11 +- tiled/adapters/sparse.py | 3 +- tiled/adapters/tiff.py | 3 +- tiled/catalog/adapter.py | 6 +- tiled/client/__init__.py | 2 +- tiled/client/base.py | 4 +- tiled/client/constructors.py | 8 +- tiled/client/container.py | 1003 +++++++++++++++++ tiled/client/context.py | 6 +- tiled/client/node.py | 1003 +---------------- tiled/client/xarray.py | 4 +- tiled/queries.py | 12 +- tiled/serialization/__init__.py | 4 +- tiled/serialization/{node.py => container.py} | 11 +- tiled/serialization/dataframe.py | 2 +- tiled/serialization/xarray.py | 2 +- tiled/server/app.py | 12 +- tiled/server/authentication.py | 2 +- tiled/server/core.py | 42 +- tiled/server/router.py | 36 +- tiled/server/schemas.py | 6 +- tiled/structures/core.py | 2 +- tiled/utils.py | 4 +- web-frontend/src/client.ts | 4 +- web-frontend/src/openapi_schemas.ts | 4 +- 42 files changed, 1196 insertions(+), 1162 deletions(-) create mode 100644 tiled/client/container.py rename tiled/serialization/{node.py => container.py} (91%) diff --git a/docs/source/explanations/compression.md b/docs/source/explanations/compression.md index f22b423b1..12e115c58 100644 --- a/docs/source/explanations/compression.md +++ b/docs/source/explanations/compression.md @@ -142,8 +142,8 @@ Finally, in this example. the server decides that the raw, compressed content is so small (304 bytes) that is isn't not worth compressing. ``` -$ http -p Hh :8000/node/metadata/ -GET /node/metadata/ HTTP/1.1 +$ http -p Hh :8000/metadata/ +GET /metadata/ HTTP/1.1 Accept: */* Accept-Encoding: gzip, deflate Connection: keep-alive diff --git a/docs/source/explanations/structures.md b/docs/source/explanations/structures.md index ac34e1de8..017b901a4 100644 --- a/docs/source/explanations/structures.md +++ b/docs/source/explanations/structures.md @@ -65,7 +65,7 @@ a string label for each dimension. This `(10, 10)`-shaped array fits in a single `(10, 10)`-shaped chunk. ``` -$ http :8000/node/metadata/small_image | jq .data.attributes.structure +$ http :8000/metadata/small_image | jq .data.attributes.structure ``` ```json @@ -101,7 +101,7 @@ This `(10000, 10000)`-shaped array is subdivided into 4 × 4 = 16 chunks, which is why the size of each chunk is given explicitly. ``` -$ http :8000/node/metadata/big_image | jq .data.attributes.structure +$ http :8000/metadata/big_image | jq .data.attributes.structure ``` ```json @@ -142,7 +142,7 @@ This is a 1D array where each item has internal structure, as in numpy's [strucuted data types](https://numpy.org/doc/stable/user/basics.rec.html) ``` -$ http :8000/node/metadata/structured_data/pets | jq .data.attributes.structure +$ http :8000/metadata/structured_data/pets | jq .data.attributes.structure ``` ```json @@ -241,7 +241,7 @@ order, but we cannot make requests like "rows 100-200". (Dask has the same limitation, for the same reason.) ``` -$ http :8000/node/metadata/long_table | jq .data.attributes.structure +$ http :8000/metadata/long_table | jq .data.attributes.structure ``` ```json @@ -354,7 +354,7 @@ In certain cases, it is efficient to in-line all the information about the node' "links": { "block": "http://localhost:8000/api/v1/array/block/structured_data/xarray_dataset/lat?block={index_0},{index_1}", "full": "http://localhost:8000/api/v1/array/full/structured_data/xarray_dataset/lat", - "self": "http://localhost:8000/api/v1/node/metadata/structured_data/xarray_dataset/lat" + "self": "http://localhost:8000/api/v1/metadata/structured_data/xarray_dataset/lat" }, "meta": null }, @@ -401,7 +401,7 @@ In certain cases, it is efficient to in-line all the information about the node' "links": { "block": "http://localhost:8000/api/v1/array/block/structured_data/xarray_dataset/lon?block={index_0},{index_1}", "full": "http://localhost:8000/api/v1/array/full/structured_data/xarray_dataset/lon", - "self": "http://localhost:8000/api/v1/node/metadata/structured_data/xarray_dataset/lon" + "self": "http://localhost:8000/api/v1/metadata/structured_data/xarray_dataset/lon" }, "meta": null }, @@ -453,7 +453,7 @@ In certain cases, it is efficient to in-line all the information about the node' "links": { "block": "http://localhost:8000/api/v1/array/block/structured_data/xarray_dataset/precipitation?block={index_0},{index_1},{index_2}", "full": "http://localhost:8000/api/v1/array/full/structured_data/xarray_dataset/precipitation", - "self": "http://localhost:8000/api/v1/node/metadata/structured_data/xarray_dataset/precipitation" + "self": "http://localhost:8000/api/v1/metadata/structured_data/xarray_dataset/precipitation" }, "meta": null }, @@ -505,7 +505,7 @@ In certain cases, it is efficient to in-line all the information about the node' "links": { "block": "http://localhost:8000/api/v1/array/block/structured_data/xarray_dataset/temperature?block={index_0},{index_1},{index_2}", "full": "http://localhost:8000/api/v1/array/full/structured_data/xarray_dataset/temperature", - "self": "http://localhost:8000/api/v1/node/metadata/structured_data/xarray_dataset/temperature" + "self": "http://localhost:8000/api/v1/metadata/structured_data/xarray_dataset/temperature" }, "meta": null }, @@ -547,7 +547,7 @@ In certain cases, it is efficient to in-line all the information about the node' "links": { "block": "http://localhost:8000/api/v1/array/block/structured_data/xarray_dataset/time?block={index_0}", "full": "http://localhost:8000/api/v1/array/full/structured_data/xarray_dataset/time", - "self": "http://localhost:8000/api/v1/node/metadata/structured_data/xarray_dataset/time" + "self": "http://localhost:8000/api/v1/metadata/structured_data/xarray_dataset/time" }, "meta": null } diff --git a/docs/source/how-to/api-keys.md b/docs/source/how-to/api-keys.md index e6727b8f6..fedc023c4 100644 --- a/docs/source/how-to/api-keys.md +++ b/docs/source/how-to/api-keys.md @@ -98,7 +98,7 @@ We can use in other web clients as well. For example, using [HTTPie](https://htt we can see that unauthenticated requests are refused ``` -$ http http://localhost:8000/api/v1/node/metadata/ +$ http http://localhost:8000/api/v1/metadata/ HTTP/1.1 401 Unauthorized content-length: 30 content-type: application/json @@ -117,7 +117,7 @@ but passing the API key in the `Authorization` header as `Apikey YOUR_KEY_HERE` (Note the use of `'` quotes.) ``` -$ http http://localhost:8000/api/v1/node/metadata/ 'Authorization:Apikey 48e8f8598940fa0f3e80b406def606e17e815a2c76fe21350a99d6d9935371d11533b318' +$ http http://localhost:8000/api/v1/metadata/ 'Authorization:Apikey 48e8f8598940fa0f3e80b406def606e17e815a2c76fe21350a99d6d9935371d11533b318' HTTP/1.1 200 OK content-length: 320 content-type: application/json @@ -130,7 +130,7 @@ set-cookie: tiled_csrf=InE4mplUO0goPxf4V07tVuLSLUvDqhgtALTHYoC3T3s; HttpOnly; Pa ``` The API key can also be passed in the URL like -`http://localhost:8000/api/v1/node/metadata/?api_key=YOUR_KEY_HERE`. Using the +`http://localhost:8000/api/v1/metadata/?api_key=YOUR_KEY_HERE`. Using the `Authorization` header is preferred (more secure) but in some situations, as in pasting a link into a web browser, the URL is the only option. diff --git a/docs/source/how-to/client-logger.md b/docs/source/how-to/client-logger.md index ed9b8ad92..e34cdfc07 100644 --- a/docs/source/how-to/client-logger.md +++ b/docs/source/how-to/client-logger.md @@ -29,7 +29,7 @@ Requests (`->`) and responses (`<-`) will now be logged to the console, like so. 16:49:22.486 <- 200 server:nginx/1.18.0 (Ubuntu) date:Tue, 01 Feb 2022 21:49:22 GMT content-type:application/json content-length:761 connection:keep-alive etag:35b70c6412c39db8b7b5132ddf61973c expires:Tue, 01 Feb 2022 21:59:22 GMT content-encoding:gzip vary:Accept-Encoding server-timing:tok;dur=0.1, pack;dur=0.0, compress;dur=0.1;ratio=3.1, app;dur=3.9 set-cookie:tiled_csrf=-fyaLez0YkradgcEVYBJh4QotR5MNyzouV0SV0NWHmM; HttpOnly; Path=/; SameSite=lax 16:49:22.492 -> GET 'https://tiled-demo.blueskyproject.io/?root_path=true' 'host:tiled-demo.blueskyproject.io' 'accept:application/x-msgpack' 'accept-encoding:gzip,blosc' 'connection:keep-alive' 'user-agent:python-tiled/0.1.0a49' 'cookie:tiled_csrf=-fyaLez0YkradgcEVYBJh4QotR5MNyzouV0SV0NWHmM' 16:49:22.531 <- 200 server:nginx/1.18.0 (Ubuntu) date:Tue, 01 Feb 2022 21:49:22 GMT content-type:application/x-msgpack content-length:773 connection:keep-alive etag:35b70c6412c39db8b7b5132ddf61973c expires:Tue, 01 Feb 2022 21:59:22 GMT content-encoding:gzip vary:Accept-Encoding server-timing:tok;dur=0.1, pack;dur=0.0, compress;dur=0.1;ratio=2.7, app;dur=4.5 -16:49:22.535 -> GET 'https://tiled-demo.blueskyproject.io/node/metadata/' 'host:tiled-demo.blueskyproject.io' 'accept:application/x-msgpack' 'accept-encoding:gzip,blosc' 'connection:keep-alive' 'user-agent:python-tiled/0.1.0a49' 'cookie:tiled_csrf=-fyaLez0YkradgcEVYBJh4QotR5MNyzouV0SV0NWHmM' +16:49:22.535 -> GET 'https://tiled-demo.blueskyproject.io//metadata/' 'host:tiled-demo.blueskyproject.io' 'accept:application/x-msgpack' 'accept-encoding:gzip,blosc' 'connection:keep-alive' 'user-agent:python-tiled/0.1.0a49' 'cookie:tiled_csrf=-fyaLez0YkradgcEVYBJh4QotR5MNyzouV0SV0NWHmM' 16:49:22.572 <- 200 server:nginx/1.18.0 (Ubuntu) date:Tue, 01 Feb 2022 21:49:22 GMT content-type:application/x-msgpack content-length:292 connection:keep-alive etag:821dd2a8b431ecd016f94cacd44af74f server-timing:tok;dur=0.0, pack;dur=0.0, app;dur=3.8 >>> t = c['generated']['short_table'] diff --git a/docs/source/how-to/custom-clients.md b/docs/source/how-to/custom-clients.md index 1a5a832fc..41032365d 100644 --- a/docs/source/how-to/custom-clients.md +++ b/docs/source/how-to/custom-clients.md @@ -1,7 +1,7 @@ # Custom Python Client Objects To provide an "upgraded" and more finely-tuned user experience for certain -kinds of dataset, TIled can be configured to use custom Python objects. +kinds of dataset, Tiled can be configured to use custom Python objects. This is transparent and automatic from the point view of the user. In the Python client, when a user accesses a given item, Tiled inspects the @@ -9,20 +9,20 @@ item to decide what type of object to use to represent it. In simple cases, this is just based on the `structure_family`: `"array"` goes to `tiled.client.array.ArrayClient`; `"dataframe"` goes to `tiled.client.dataframe.DataFrameClient`; `"node"` goes to -`tiled.clide.node.Node`. Those classes then manage further commication with -Tiled server to access their contents. +`tiled.clide.node.Container`. Those classes then manage further communication +with Tiled server to access their contents. Each item always has exactly one `structure_family`, and it's always from a fixed list. In addition, it may have a list of `specs`, labels which are meant to communicate some more specific expectations about the data that may or may not have meaning to a given client. If a client does not recognize some spec, it can still access the metadata and data and performed Tiled's essential -fucntions. If it does recognize a spec, it can provide an upgraded user +functions. If it does recognize a spec, it can provide an upgraded user experience. ## Example -Supose data labeled with the `xdi` spec is guaranteed to have a metadata +Suppose data labeled with the `xdi` spec is guaranteed to have a metadata dictionary containing the following two entries: ```py @@ -64,7 +64,7 @@ display as in: <...> ```` -It is conventional to include angle brakets `<>` when the string is not valid +It is conventional to include angle brackets `<>` when the string is not valid Python code, as opposed to ````py @@ -85,7 +85,7 @@ data labeled with the `xdi` spec. We'll register it manually for development and testing. Then we'll see how to configure it seamlessly for the user. ```py -from tiled.client.node import DEFAULT_STRUCTURE_CLIENT_DISPATCH +from tiled.client.container import DEFAULT_STRUCTURE_CLIENT_DISPATCH from tiled.client import from_uri custom = dict(DEFAULT_STRUCTURE_CLIENT_DISPATCH["numpy"]) @@ -149,7 +149,7 @@ recognizes none of the specs, or if there are no specs, it falls back to using t structure family. Specs should generally be sorted from most specific to least specific, so that Tiled uses the most finely-tuned client object available. -## More Possiblities and Design Guidelines +## More Possibilities and Design Guidelines There are many other useful things we could do with a custom client that is purpose-built for a specific kinds of data and/or metadata. We can add convenience properties @@ -179,13 +179,13 @@ with other scientific Python libraries. _change_ the behavior of the existing methods, attributes, and properties in the base class. This is a well-known [principle](https://en.wikipedia.org/wiki/Liskov_substitution_principle) in - software design generally, and it is especialy crucial here. If a user runs code - in a software environment where the library with the custom objects happens - to be missing, we want the user to immediate notice the missing methods, not - get confusingly different results from the "standard" method and the - customized one. In addition, it is helpful if the "vanilla" Tiled documentation - and user knowledge transfers to the custom classes with _additions_ but no - confusing _changes_. + software design generally, and it is especially crucial here. If a user runs + code in a software environment where the library with the custom objects + happens to be missing, we want the user to immediate notice the missing + methods, not get confusingly different results from the "standard" method + and the customized one. In addition, it is helpful if the "vanilla" Tiled + documentation and user knowledge transfers to the custom classes with + _additions_ but no confusing _changes_. 2. If something custom will do I/O (i.e. download metadata or data from the server) make it method, not a property. Properties that do "surprise" I/O may block over a slow network and can be very confusing. The same guideline diff --git a/docs/source/reference/authentication.md b/docs/source/reference/authentication.md index b66e80f9a..1c153ba91 100644 --- a/docs/source/reference/authentication.md +++ b/docs/source/reference/authentication.md @@ -85,7 +85,7 @@ The content of `tokens.json` looks like Make an authenticated request using that access token. ``` -$ http GET :8000/api/v1/node/metadata/ "Authorization:Bearer `jq -r .access_token tokens.json`" +$ http GET :8000/api/v1//metadata/ "Authorization:Bearer `jq -r .access_token tokens.json`" HTTP/1.1 200 OK content-length: 239 content-type: application/json @@ -106,7 +106,7 @@ set-cookie: tiled_csrf=1-Cpa1WcwggakZ91FtNsscjM8VO1N1znmuILlL5hGY8; HttpOnly; Pa "id": "", "links": { "search": "http://localhost:8000/api/v1/node/search/", - "self": "http://localhost:8000/api/v1/node/metadata/" + "self": "http://localhost:8000/api/v1//metadata/" }, "meta": null, "type": "tree" @@ -121,7 +121,7 @@ When the access token expires (after 15 minutes, by default) requests will be rejected like this. ``` -$ http GET :8000/api/v1/node/metadata/ "Authorization:Bearer `jq -r .access_token tokens.json`" +$ http GET :8000/api/v1//metadata/ "Authorization:Bearer `jq -r .access_token tokens.json`" HTTP/1.1 401 Unauthorized content-length: 53 content-type: application/json diff --git a/docs/source/reference/http-api-overview.md b/docs/source/reference/http-api-overview.md index 026062c14..8e9eba731 100644 --- a/docs/source/reference/http-api-overview.md +++ b/docs/source/reference/http-api-overview.md @@ -18,7 +18,7 @@ The routes are generally spelled like ``GET /{action}/{path}/``, like GitHub repository URLs, with the path following the structure of the Tree entries. -The ``GET /node/metadata/{path}`` route provides the metadata about one node. +The ``GET //metadata/{path}`` route provides the metadata about one node. The ``GET /node/search/{path}`` route provides paginated access to the children of a given node, with optional filtering (search). The ``GET /node/full/{path}`` route provides all the metadata and data below a given node. diff --git a/docs/source/reference/python-client.md b/docs/source/reference/python-client.md index d7b2e63e9..cbb2e8492 100644 --- a/docs/source/reference/python-client.md +++ b/docs/source/reference/python-client.md @@ -27,10 +27,10 @@ as well as: .. autosummary:: :toctree: generated - tiled.client.node.Node.get - tiled.client.node.Node.keys - tiled.client.node.Node.items - tiled.client.node.Node.values + tiled.client.container.Container.get + tiled.client.container.Container.keys + tiled.client.container.Container.items + tiled.client.container.Container.values ``` The views returned by `.keys()`, `.items()`, and `.values()` @@ -63,11 +63,11 @@ Beyond the Mapping interface, Node adds the following attributes .. autosummary:: :toctree: generated - tiled.client.node.Node.metadata - tiled.client.node.Node.references - tiled.client.node.Node.sorting - tiled.client.node.Node.uri - tiled.client.node.Node.specs + tiled.client.container.Container.metadata + tiled.client.container.Container.references + tiled.client.container.Container.sorting + tiled.client.container.Container.uri + tiled.client.container.Container.specs ``` It adds these methods, which return a new Node instance. @@ -76,8 +76,8 @@ It adds these methods, which return a new Node instance. .. autosummary:: :toctree: generated - tiled.client.node.Node.search - tiled.client.node.Node.sort + tiled.client.container.Container.search + tiled.client.container.Container.sort ``` It adds these methods for downloading and refreshing cached data. @@ -86,8 +86,8 @@ It adds these methods for downloading and refreshing cached data. .. autosummary:: :toctree: generated - tiled.client.node.Node.download - tiled.client.node.Node.refresh + tiled.client.container.Container.download + tiled.client.container.Container.refresh ``` It adds this method, which returns the unique metadata keys, @@ -97,7 +97,7 @@ structure_families, and specs of its children along with their counts. .. autosummary:: :toctree: generated - tiled.client.node.Node.distinct + tiled.client.container.Container.distinct ``` ## Structure Clients diff --git a/docs/source/reference/queries.md b/docs/source/reference/queries.md index 1104eaddd..ed025eff4 100644 --- a/docs/source/reference/queries.md +++ b/docs/source/reference/queries.md @@ -22,11 +22,14 @@ Follow the links in the table below for examples specific to each query. tiled.queries.In tiled.queries.NotIn tiled.queries.Regex - tiled.queries.Spec - tiled.queries.Specs - tiled.queries.StructureFamily + tiled.queries.SpecQuery + tiled.queries.SpecsQuery + tiled.queries.StructureFamilyQuery ``` +(Some have the word `Query` at the end of their name to avoid confusion with +other objects in the Tiled codebase.) + ## Query expressions The `Key` object can be used to construct queries in a readable way using diff --git a/tiled/_tests/test_catalog.py b/tiled/_tests/test_catalog.py index a3cfe9ef8..1937912da 100644 --- a/tiled/_tests/test_catalog.py +++ b/tiled/_tests/test_catalog.py @@ -45,7 +45,7 @@ async def test_nested_node_creation(a): await a.create_node( key="b", metadata={}, - structure_family=StructureFamily.node, + structure_family=StructureFamily.container, specs=[], references=[], ) @@ -53,7 +53,7 @@ async def test_nested_node_creation(a): await b.create_node( key="c", metadata={}, - structure_family=StructureFamily.node, + structure_family=StructureFamily.container, specs=[], references=[], ) @@ -84,7 +84,7 @@ async def test_sorting(a): await a.create_node( key=letter, metadata={"letter": letter, "number": number}, - structure_family=StructureFamily.node, + structure_family=StructureFamily.container, specs=[], references=[], ) @@ -130,7 +130,7 @@ async def test_search(a): await a.create_node( key=letter, metadata={"letter": letter, "number": number, "x": {"y": {"z": letter}}}, - structure_family=StructureFamily.node, + structure_family=StructureFamily.container, specs=[], references=[], ) @@ -152,7 +152,7 @@ async def test_search(a): await d.create_node( key=letter, metadata={"letter": letter, "number": number}, - structure_family=StructureFamily.node, + structure_family=StructureFamily.container, specs=[], references=[], ) diff --git a/tiled/_tests/test_distinct.py b/tiled/_tests/test_distinct.py index 6d75ffc2a..1d676003d 100644 --- a/tiled/_tests/test_distinct.py +++ b/tiled/_tests/test_distinct.py @@ -82,7 +82,7 @@ def test_distinct(context): {"value": ["MyDataFrame"], "count": None}, ], "structure_families": [ - {"value": "node", "count": None}, + {"value": "container", "count": None}, {"value": "array", "count": None}, {"value": "dataframe", "count": None}, ], @@ -107,7 +107,7 @@ def test_distinct(context): {"value": ["MyDataFrame"], "count": 5}, ], "structure_families": [ - {"value": "node", "count": 22}, + {"value": "container", "count": 22}, {"value": "array", "count": 10}, {"value": "dataframe", "count": 10}, ], diff --git a/tiled/_tests/test_hdf5.py b/tiled/_tests/test_hdf5.py index d8937a8ed..d662c20ff 100644 --- a/tiled/_tests/test_hdf5.py +++ b/tiled/_tests/test_hdf5.py @@ -104,12 +104,15 @@ def test_from_multiple(example_file): def test_inlined_contents(example_file): - """Serve a Group within an HDF5 file.""" - tree = HDF5Adapter(example_file["a"]["b"]) - assert hdf5_adapters.INLINED_DEPTH > 0 + """Test that the recursive structure and metadata are inlined into one request.""" + tree = HDF5Adapter(example_file) + assert hdf5_adapters.INLINED_DEPTH > 1 original = hdf5_adapters.INLINED_DEPTH try: with Context.from_app(build_app(tree)) as context: + with record_history() as hN: + client = from_context(context) + tree_util(client) hdf5_adapters.INLINED_DEPTH = 1 with record_history() as h1: client = from_context(context) @@ -118,6 +121,6 @@ def test_inlined_contents(example_file): with record_history() as h0: client = from_context(context) tree_util(client) - assert len(h0.requests) > len(h1.requests) + assert len(h0.requests) > len(h1.requests) > len(hN.requests) finally: hdf5_adapters.INLINED_DEPTH = original diff --git a/tiled/_tests/test_pickle.py b/tiled/_tests/test_pickle.py index 540ee5b70..710f0c054 100644 --- a/tiled/_tests/test_pickle.py +++ b/tiled/_tests/test_pickle.py @@ -5,6 +5,7 @@ import httpx import pytest +from packaging.version import parse from ..client import from_context from ..client.context import Context @@ -28,6 +29,10 @@ def test_pickle_clients(structure_clients): except Exception: raise pytest.skip(f"Could not connect to {API_URL}") with Context(API_URL) as context: + if parse(context.server_info["library_version"]) < parse("0.1.0a98"): + raise pytest.skip( + f"Server at {API_URL} is running too old a version to test against." + ) client = from_context(context, structure_clients) pickle.loads(pickle.dumps(client)) for segements in [ diff --git a/tiled/_tests/test_queries.py b/tiled/_tests/test_queries.py index 28aead8bd..b2407efce 100644 --- a/tiled/_tests/test_queries.py +++ b/tiled/_tests/test_queries.py @@ -21,8 +21,8 @@ NotEq, NotIn, Regex, - Specs, - StructureFamily, + SpecsQuery, + StructureFamilyQuery, ) from ..server.app import build_app from .conftest import TILED_TEST_POSTGRESQL_URI @@ -258,24 +258,24 @@ def cm(): else: cm = nullcontext with pytest.raises(TypeError): - Specs("foo") + SpecsQuery("foo") with cm(): - assert sorted(list(client.search(Specs(include=include_values)))) == sorted( - ["specs_foo_bar", "specs_foo_bar_baz"] - ) + assert sorted( + list(client.search(SpecsQuery(include=include_values))) + ) == sorted(["specs_foo_bar", "specs_foo_bar_baz"]) with cm(): assert list( - client.search(Specs(include=include_values, exclude=exclude_values)) + client.search(SpecsQuery(include=include_values, exclude=exclude_values)) ) == ["specs_foo_bar"] def test_structure_families(client): with pytest.raises(ValueError): - StructureFamily("foo") + StructureFamilyQuery("foo") - assert set(client.search(StructureFamily("array"))) == set(mapping) + assert set(client.search(StructureFamilyQuery("array"))) == set(mapping) def test_keys_filter(client): diff --git a/tiled/adapters/array.py b/tiled/adapters/array.py index a01f78d3b..28f3e3396 100644 --- a/tiled/adapters/array.py +++ b/tiled/adapters/array.py @@ -4,6 +4,7 @@ from ..server.object_cache import get_object_cache from ..structures.array import ArrayMacroStructure, BuiltinDtype, StructDtype +from ..structures.core import StructureFamily from ..utils import DictView, ListView @@ -22,7 +23,7 @@ class ArrayAdapter: """ - structure_family = "array" + structure_family = StructureFamily.array def __init__( self, diff --git a/tiled/adapters/dataframe.py b/tiled/adapters/dataframe.py index 2649ca30b..3bd274ea5 100644 --- a/tiled/adapters/dataframe.py +++ b/tiled/adapters/dataframe.py @@ -4,6 +4,7 @@ from ..serialization.dataframe import serialize_arrow from ..server.object_cache import NO_CACHE, get_object_cache +from ..structures.core import StructureFamily from ..structures.dataframe import DataFrameMacroStructure, DataFrameMicroStructure from ..utils import DictView from .array import ArrayAdapter @@ -25,7 +26,7 @@ class DataFrameAdapter: >>> DataFrameAdapter.read_csv("s3://bucket/myfiles.*.csv") """ - structure_family = "dataframe" + structure_family = StructureFamily.dataframe @classmethod def from_pandas(cls, *args, metadata=None, specs=None, references=None, **kwargs): diff --git a/tiled/adapters/hdf5.py b/tiled/adapters/hdf5.py index 2222e9996..171d42880 100644 --- a/tiled/adapters/hdf5.py +++ b/tiled/adapters/hdf5.py @@ -7,6 +7,7 @@ from ..adapters.utils import IndexersMixin from ..iterviews import ItemsView, KeysView, ValuesView +from ..structures.core import StructureFamily from ..utils import DictView, node_repr from .array import ArrayAdapter @@ -47,7 +48,7 @@ class HDF5Adapter(collections.abc.Mapping, IndexersMixin): """ - structure_family = "node" + structure_family = StructureFamily.container def __init__(self, node, *, specs=None, references=None, access_policy=None): self._node = node diff --git a/tiled/adapters/mapping.py b/tiled/adapters/mapping.py index 60f05eab5..8ff40800d 100644 --- a/tiled/adapters/mapping.py +++ b/tiled/adapters/mapping.py @@ -16,10 +16,11 @@ NotEq, NotIn, Regex, - Specs, - StructureFamily, + SpecsQuery, + StructureFamilyQuery, ) from ..query_registration import QueryTranslationRegistry +from ..structures.core import StructureFamily from ..utils import UNCHANGED, DictView from .utils import IndexersMixin @@ -43,7 +44,7 @@ class MapAdapter(collections.abc.Mapping, IndexersMixin): "references", ) - structure_family = "node" + structure_family = StructureFamily.container # Define classmethods for managing what queries this Adapter knows. query_registry = QueryTranslationRegistry() @@ -461,7 +462,7 @@ def specs(query, tree): return tree.new_variation(mapping=matches) -MapAdapter.register_query(Specs, specs) +MapAdapter.register_query(SpecsQuery, specs) def structure_family(query, tree): @@ -473,7 +474,7 @@ def structure_family(query, tree): return tree.new_variation(mapping=matches) -MapAdapter.register_query(StructureFamily, structure_family) +MapAdapter.register_query(StructureFamilyQuery, structure_family) def keys_filter(query, tree): diff --git a/tiled/adapters/sparse.py b/tiled/adapters/sparse.py index ef7e8305a..04f050694 100644 --- a/tiled/adapters/sparse.py +++ b/tiled/adapters/sparse.py @@ -1,13 +1,14 @@ import numpy import sparse +from ..structures.core import StructureFamily from ..structures.sparse import COOStructure from .array import slice_and_shape_from_block_and_chunks class COOAdapter: "Wrap sparse Coordinate List (COO) arrays." - structure_family = "sparse" + structure_family = StructureFamily.sparse @classmethod def from_arrays( diff --git a/tiled/adapters/tiff.py b/tiled/adapters/tiff.py index ee99f157a..b42fd1493 100644 --- a/tiled/adapters/tiff.py +++ b/tiled/adapters/tiff.py @@ -5,6 +5,7 @@ from ..server.object_cache import with_object_cache from ..structures.array import ArrayMacroStructure, BuiltinDtype +from ..structures.core import StructureFamily class TiffAdapter: @@ -17,7 +18,7 @@ class TiffAdapter: >>> TiffAdapter("path/to/file.tiff") """ - structure_family = "array" + structure_family = StructureFamily.array def __init__( self, diff --git a/tiled/catalog/adapter.py b/tiled/catalog/adapter.py index fabff10b6..88e4c80d2 100644 --- a/tiled/catalog/adapter.py +++ b/tiled/catalog/adapter.py @@ -26,8 +26,8 @@ NotEq, NotIn, Operator, + StructureFamilyQuery, ) -from tiled.queries import StructureFamily as StructureFamilyQuery from ..query_registration import QueryTranslationRegistry from ..serialization.dataframe import XLSX_MIME_TYPE @@ -120,7 +120,7 @@ class RootNode: It mocks the relevant part of the interface of .orm.Node. """ - structure_family = StructureFamily.node + structure_family = StructureFamily.container def __init__(self, metadata, specs, references, access_policy): self.metadata = metadata or {} @@ -429,7 +429,7 @@ def adapter_from_node(self, node): adapter.access_policy = self.access_policy # HACK return adapter else: # num_data_sources == 0 - if node.structure_family != StructureFamily.node: + if node.structure_family != StructureFamily.container: raise NotImplementedError # array or dataframe that is uninitialized # A node with no underlying data source return type(self)(self.context, node, access_policy=self.access_policy) diff --git a/tiled/client/__init__.py b/tiled/client/__init__.py index 6a380c31a..721429207 100644 --- a/tiled/client/__init__.py +++ b/tiled/client/__init__.py @@ -1,4 +1,4 @@ from .constructors import from_context, from_profile, from_uri # noqa: F401 +from .container import ASCENDING, DESCENDING # noqa: F401 from .context import Context # noqa: F401 -from .node import ASCENDING, DESCENDING # noqa: F401 from .utils import hide_logs, record_history, show_logs # noqa: F401 diff --git a/tiled/client/base.py b/tiled/client/base.py index e1a837cff..6246b500d 100644 --- a/tiled/client/base.py +++ b/tiled/client/base.py @@ -233,9 +233,7 @@ def update_metadata(self, metadata=None, specs=None, references=None): @property def metadata_revisions(self): if self._metadata_revisions is None: - link = self.item["links"]["self"].replace( - "/node/metadata", "/node/revisions", 1 - ) + link = self.item["links"]["self"].replace("/metadata", "/revisions", 1) self._metadata_revisions = MetadataRevisions(self.context, link) return self._metadata_revisions diff --git a/tiled/client/constructors.py b/tiled/client/constructors.py index 3a67cd464..2aaf89efc 100644 --- a/tiled/client/constructors.py +++ b/tiled/client/constructors.py @@ -4,8 +4,8 @@ import httpx from ..utils import import_object, prepend_to_sys_path +from .container import DEFAULT_STRUCTURE_CLIENT_DISPATCH, Container from .context import DEFAULT_TIMEOUT_PARAMS, DEFAULT_TOKEN_CACHE, UNSET, Context -from .node import DEFAULT_STRUCTURE_CLIENT_DISPATCH, Node from .utils import ClientError, client_for_item @@ -111,8 +111,8 @@ def from_context( raise ValueError("Use api_key or username/auth_provider, not both.") node_path_parts = node_path_parts or [] # Do entrypoint discovery if it hasn't yet been done. - if Node.STRUCTURE_CLIENTS_FROM_ENTRYPOINTS is None: - Node.discover_clients_from_entrypoints() + if Container.STRUCTURE_CLIENTS_FROM_ENTRYPOINTS is None: + Container.discover_clients_from_entrypoints() # Interpret structure_clients="numpy" and structure_clients="dask" shortcuts. if isinstance(structure_clients, str): structure_clients = DEFAULT_STRUCTURE_CLIENT_DISPATCH[structure_clients] @@ -136,7 +136,7 @@ def from_context( prompt_for_reauthentication=prompt_for_reauthentication, ) # Context ensures that context.api_uri has a trailing slash. - item_uri = f"{context.api_uri}node/metadata/{'/'.join(node_path_parts)}" + item_uri = f"{context.api_uri}metadata/{'/'.join(node_path_parts)}" try: content = context.get_json(item_uri) except ClientError as err: diff --git a/tiled/client/container.py b/tiled/client/container.py new file mode 100644 index 000000000..d5684900e --- /dev/null +++ b/tiled/client/container.py @@ -0,0 +1,1003 @@ +import base64 +import collections +import collections.abc +import importlib +import itertools +import time +import warnings +from dataclasses import asdict + +import entrypoints + +from ..adapters.utils import IndexersMixin +from ..iterviews import ItemsView, KeysView, ValuesView +from ..queries import KeyLookup +from ..query_registration import query_registry +from ..structures.core import Spec, StructureFamily +from ..utils import UNCHANGED, OneShotCachedMap, Sentinel, node_repr +from .base import BaseClient +from .cache import Revalidate, verify_cache +from .utils import ClientError, client_for_item, export_util + + +class Container(BaseClient, collections.abc.Mapping, IndexersMixin): + # This maps the structure_family sent by the server to a client-side object that + # can interpret the structure_family's structure and content. OneShotCachedMap is used to + # defer imports. + + # This is populated when the first instance is created. + STRUCTURE_CLIENTS_FROM_ENTRYPOINTS = None + + @classmethod + def _discover_entrypoints(cls, entrypoint_name): + return OneShotCachedMap( + { + name: entrypoint.load + for name, entrypoint in entrypoints.get_group_named( + entrypoint_name + ).items() + } + ) + + @classmethod + def discover_clients_from_entrypoints(cls): + """ + Search the software environment for libraries that register structure clients. + + This is called once automatically the first time Node.from_uri + is called. It is idempotent. + """ + if cls.STRUCTURE_CLIENTS_FROM_ENTRYPOINTS is not None: + # short-circuit + return + # The modules associated with these entrypoints will be imported + # lazily, only when the item is first accessed. + cls.STRUCTURE_CLIENTS_FROM_ENTRYPOINTS = OneShotCachedMap() + # Check old name (special_client) and new name (structure_client). + for entrypoint_name in ["tiled.special_client", "tiled.structure_client"]: + for name, entrypoint in entrypoints.get_group_named( + entrypoint_name + ).items(): + cls.STRUCTURE_CLIENTS_FROM_ENTRYPOINTS.set(name, entrypoint.load) + DEFAULT_STRUCTURE_CLIENT_DISPATCH["numpy"].set(name, entrypoint.load) + DEFAULT_STRUCTURE_CLIENT_DISPATCH["dask"].set(name, entrypoint.load) + + def __init__( + self, + context, + *, + item, + structure_clients, + queries=None, + sorting=None, + structure=None, + ): + "This is not user-facing. Use Node.from_uri." + + self.structure_clients = structure_clients + self._queries = list(queries or []) + self._queries_as_params = _queries_to_params(*self._queries) + # If the user has not specified a sorting, give the server the opportunity + # to tell us the default sorting. + if sorting: + self._sorting = sorting + else: + # In the Python API we encode sorting as (key, direction). + # This order-based "record" notion does not play well with OpenAPI. + # In the HTTP API, therefore, we use {"key": key, "direction": direction}. + self._sorting = [ + (s["key"], int(s["direction"])) + for s in (item["attributes"].get("sorting") or []) + ] + sorting = sorting or item["attributes"].get("sorting") + self._sorting_params = { + "sort": ",".join( + f"{'-' if item[1] < 0 else ''}{item[0]}" for item in self._sorting + ) + } + self._reversed_sorting_params = { + "sort": ",".join( + f"{'-' if item[1] > 0 else ''}{item[0]}" for item in self._sorting + ) + } + super().__init__( + context=context, + item=item, + structure_clients=structure_clients, + ) + + def __repr__(self): + # Display up to the first N keys to avoid making a giant service + # request. Use _keys_slicer because it is unauthenticated. + N = 10 + return node_repr(self, self._keys_slice(0, N, direction=1)) + + @property + def sorting(self): + """ + The current sorting of this Node + + Given as a list of tuples where the first entry is the sorting key + and the second entry indicates ASCENDING (or 1) or DESCENDING (or -1). + """ + return list(self._sorting) + + def download(self): + """ + Access all the data in this Node. + + This causes it to be cached if the context is configured with a cache. + """ + verify_cache(self.context.cache) + self.context.get_json(self.uri) + repr(self) + for key in self: + entry = self[key] + entry.download() + + def refresh(self, force=False): + """ + Refresh cached data for this node. + + Parameters + ---------- + force: bool + If False, (default) refresh only expired cache entries. + If True, refresh all cache entries. + """ + if force: + revalidate = Revalidate.FORCE + else: + revalidate = Revalidate.IF_EXPIRED + with self.context.revalidation(revalidate): + self.download() + + def new_variation( + self, + *, + structure_clients=UNCHANGED, + queries=UNCHANGED, + sorting=UNCHANGED, + **kwargs, + ): + """ + Create a copy of this Node, optionally varying some parameters. + + This is intended primarily for internal use and use by subclasses. + """ + if isinstance(structure_clients, str): + structure_clients = DEFAULT_STRUCTURE_CLIENT_DISPATCH[structure_clients] + if structure_clients is UNCHANGED: + structure_clients = self.structure_clients + if queries is UNCHANGED: + queries = self._queries + if sorting is UNCHANGED: + sorting = self._sorting + return super().new_variation( + context=self.context, + structure_clients=structure_clients, + queries=queries, + sorting=sorting, + **kwargs, + ) + + def __len__(self): + # If the contents of this node was provided in-line, there is an + # implication that the contents are not expected to be dynamic. Used the + # count provided in the structure. + structure = self.item["attributes"]["structure"] + if structure["contents"]: + return structure["count"] + now = time.monotonic() + if self._cached_len is not None: + length, deadline = self._cached_len + if now < deadline: + # Used the cached value and do not make any request. + return length + content = self.context.get_json( + self.item["links"]["search"], + params={ + "fields": "", + **self._queries_as_params, + **self._sorting_params, + }, + ) + length = content["meta"]["count"] + self._cached_len = (length, now + LENGTH_CACHE_TTL) + return length + + def __length_hint__(self): + # TODO The server should provide an estimated count. + # https://www.python.org/dev/peps/pep-0424/ + return len(self) + + def __iter__(self, _ignore_inlined_contents=False): + # If the contents of this node was provided in-line, and we don't need + # to apply any filtering or sorting, we can slice the in-lined data + # without fetching anything from the server. + contents = self.item["attributes"]["structure"]["contents"] + if ( + (contents is not None) + and (not self._queries) + and ((not self.sorting) or (self.sorting == [("_", 1)])) + and (not _ignore_inlined_contents) + ): + return (yield from contents) + next_page_url = self.item["links"]["search"] + while next_page_url is not None: + content = self.context.get_json( + next_page_url, + params={ + "fields": "", + **self._queries_as_params, + **self._sorting_params, + }, + ) + self._cached_len = ( + content["meta"]["count"], + time.monotonic() + LENGTH_CACHE_TTL, + ) + for item in content["data"]: + yield item["id"] + next_page_url = content["links"]["next"] + + def __getitem__(self, keys, _ignore_inlined_contents=False): + # These are equivalent: + # + # >>> node['a']['b']['c'] + # >>> node[('a', 'b', 'c')] + # >>> node['a', 'b', 'c'] + # + # The last two are equivalent at a Python level; + # both call node.__getitem__(('a', 'b', 'c')). + # + # We elide this into a single request to the server rather than + # a chain of requests. This is not totally straightforward because + # of this use case: + # + # >>> node.search(...)['a', 'b'] + # + # which must only return a result if 'a' is contained in the search results. + if not isinstance(keys, tuple): + keys = (keys,) + if self._queries: + # Lookup this key *within the search results* of this Node. + key, *tail = keys + tail = tuple(tail) # list -> tuple + content = self.context.get_json( + self.item["links"]["search"], + params={ + **_queries_to_params(KeyLookup(key)), + **self._queries_as_params, + **self._sorting_params, + }, + ) + self._cached_len = ( + content["meta"]["count"], + time.monotonic() + LENGTH_CACHE_TTL, + ) + data = content["data"] + if not data: + raise KeyError(key) + assert ( + len(data) == 1 + ), "The key lookup query must never result more than one result." + (item,) = data + result = client_for_item(self.context, self.structure_clients, item) + if tail: + result = result[tail] + else: + # Straightforwardly look up the keys under this node. + # There is no search filter in place, so if it is there + # then we want it. + + # The server may greedily send nested information about children + # ("inlined contents") to reduce latency. This is how we handle + # xarray Datasets efficiently, for example. + + # In a loop, walk the key(s). Use inlined contents if we have it. + # When we reach a key that we don't have inlined contents for, send + # out a single request with all the rest of the keys, and break + # the keys-walking loop. We are effectively "jumping" down the tree + # to the node of interest without downloading information about + # intermediate parents. + for i, key in enumerate(keys): + item = (self.item["attributes"]["structure"]["contents"] or {}).get(key) + if (item is None) or _ignore_inlined_contents: + # The item was not inlined, either because nothing was inlined + # or because it was added after we fetched the inlined contents. + # Make a request for it. + try: + self_link = self.item["links"]["self"] + if self_link.endswith("/"): + self_link = self_link[:-1] + content = self.context.get_json( + self_link + "".join(f"/{key}" for key in keys[i:]), + ) + except ClientError as err: + if err.response.status_code == 404: + # If this is a scalar lookup, raise KeyError("X") not KeyError(("X",)). + err_arg = keys[i:] + if len(err_arg) == 1: + (err_arg,) = err_arg + raise KeyError(err_arg) + raise + item = content["data"] + break + result = client_for_item(self.context, self.structure_clients, item) + return result + + def delete(self, key): + self._cached_len = None + self.context.delete_content(f"{self.uri}/{key}", None) + + # The following two methods are used by keys(), values(), items(). + + def _keys_slice(self, start, stop, direction, _ignore_inlined_contents=False): + # If the contents of this node was provided in-line, and we don't need + # to apply any filtering or sorting, we can slice the in-lined data + # without fetching anything from the server. + contents = self.item["attributes"]["structure"]["contents"] + if ( + (contents is not None) + and (not self._queries) + and ((not self.sorting) or (self.sorting == [("_", 1)])) + and (not _ignore_inlined_contents) + ): + keys = list(contents) + if direction < 0: + keys = list(reversed(keys)) + return (yield from keys[start:stop]) + if direction > 0: + sorting_params = self._sorting_params + else: + sorting_params = self._reversed_sorting_params + assert start >= 0 + assert (stop is None) or (stop >= 0) + next_page_url = f"{self.item['links']['search']}?page[offset]={start}" + item_counter = itertools.count(start) + while next_page_url is not None: + content = self.context.get_json( + next_page_url, + params={ + "fields": "", + **self._queries_as_params, + **sorting_params, + }, + ) + self._cached_len = ( + content["meta"]["count"], + time.monotonic() + LENGTH_CACHE_TTL, + ) + for item in content["data"]: + if stop is not None and next(item_counter) == stop: + return + yield item["id"] + next_page_url = content["links"]["next"] + + def _items_slice(self, start, stop, direction, _ignore_inlined_contents=False): + # If the contents of this node was provided in-line, and we don't need + # to apply any filtering or sorting, we can slice the in-lined data + # without fetching anything from the server. + contents = self.item["attributes"]["structure"]["contents"] + if ( + (contents is not None) + and (not self._queries) + and ((not self.sorting) or (self.sorting == [("_", 1)])) + and (not _ignore_inlined_contents) + ): + items = list(contents.items()) + if direction < 0: + items = list(reversed(items)) + for key, item in items[start:stop]: + yield key, client_for_item( + self.context, + self.structure_clients, + item, + ) + return + if direction > 0: + sorting_params = self._sorting_params + else: + sorting_params = self._reversed_sorting_params + assert start >= 0 + assert (stop is None) or (stop >= 0) + next_page_url = f"{self.item['links']['search']}?page[offset]={start}" + item_counter = itertools.count(start) + while next_page_url is not None: + content = self.context.get_json( + next_page_url, + params={**self._queries_as_params, **sorting_params}, + ) + self._cached_len = ( + content["meta"]["count"], + time.monotonic() + LENGTH_CACHE_TTL, + ) + + for item in content["data"]: + if stop is not None and next(item_counter) == stop: + return + key = item["id"] + yield key, client_for_item( + self.context, + self.structure_clients, + item, + ) + next_page_url = content["links"]["next"] + + def keys(self): + return KeysView(lambda: len(self), self._keys_slice) + + def values(self): + return ValuesView(lambda: len(self), self._items_slice) + + def items(self): + return ItemsView(lambda: len(self), self._items_slice) + + def search(self, query): + """ + Make a Node with a subset of this Node's entries, filtered by query. + + Examples + -------- + + >>> from tiled.queries import FullText + >>> tree.search(FullText("hello")) + """ + return self.new_variation(queries=self._queries + [query]) + + def distinct( + self, *metadata_keys, structure_families=False, specs=False, counts=False + ): + """ + Get the unique values and optionally counts of metadata_keys, + structure_families, and specs in this Node's entries + + Examples + -------- + + Query all the distinct values of a key. + + >>> tree.distinct("foo", counts=True) + + Query for multiple keys at once. + + >>> tree.distinct("foo", "bar", counts=True) + """ + + link = self.item["links"]["self"].replace("/metadata", "/distinct", 1) + distinct = self.context.get_json( + link, + params={ + "metadata": metadata_keys, + "structure_families": structure_families, + "specs": specs, + "counts": counts, + **self._queries_as_params, + }, + ) + return distinct + + def sort(self, *sorting): + """ + Make a Node with the same entries but sorted according to `sorting`. + + Examples + -------- + + Sort by "color" in ascending order, and then by "height" in descending order. + + >>> from tiled.client import ASCENDING, DESCENDING + >>> tree.sort(("color", ASCENDING), ("height", DESCENDING)) + + Note that ``1`` may be used as a synonym for ``ASCENDING``, and ``-1`` + may be used as a synonym for ``DESCENDING``. + """ + return self.new_variation(sorting=sorting) + + def export(self, filepath, fields=None, *, format=None): + """ + Download metadata and data below this node in some format and write to a file. + + Parameters + ---------- + file: str or buffer + Filepath or writeable buffer. + fields: List[str], optional + Filter which items in this node to export. + format : str, optional + If format is None and `file` is a filepath, the format is inferred + from the name, like 'table.h5' implies format="application/x-hdf5". The format + may be given as a file extension ("h5") or a media type ("application/x-hdf5"). + + Examples + -------- + + Export all. + + >>> a.export("everything.h5") + + """ + params = {} + if fields is not None: + params["field"] = fields + return export_util( + filepath, + format, + self.context.get_content, + self.item["links"]["full"], + params=params, + ) + + def _ipython_key_completions_(self): + """ + Provide method for the key-autocompletions in IPython. + + See http://ipython.readthedocs.io/en/stable/config/integrating.html#tab-completion + """ + MAX_ENTRIES_SUPPORTED = 40 + try: + if len(self) > MAX_ENTRIES_SUPPORTED: + MSG = ( + "Tab-completition is not supported on this particular Node " + "because it has a large number of entries." + ) + warnings.warn(MSG) + return [] + else: + return list(self) + except Exception: + # Do not print messy traceback from thread. Just fail silently. + return [] + + def new( + self, + structure_family, + structure, + *, + key=None, + metadata=None, + specs=None, + references=None, + ): + """ + Create a new item within this Node. + + This is a low-level method. See high-level convenience methods listed below. + + See Also + -------- + write_array + write_dataframe + write_coo_array + """ + self._cached_len = None + metadata = metadata or {} + specs = specs or [] + normalized_specs = [] + for spec in specs: + if isinstance(spec, str): + spec = Spec(spec) + normalized_specs.append(asdict(spec)) + references = references or [] + data_sources = [] + if structure_family != StructureFamily.container: + # TODO Handle multiple data sources. + data_sources.append({"structure": asdict(structure)}) + item = { + "attributes": { + "metadata": metadata, + "structure_family": StructureFamily(structure_family), + "specs": normalized_specs, + "references": references, + "data_sources": data_sources, + } + } + + if structure_family == StructureFamily.dataframe: + # send bytes base64 encoded + item["attributes"]["data_sources"][0]["structure"]["micro"][ + "meta" + ] = base64.b64encode( + item["attributes"]["data_sources"][0]["structure"]["micro"]["meta"] + ).decode() + item["attributes"]["data_sources"][0]["structure"]["micro"][ + "divisions" + ] = base64.b64encode( + item["attributes"]["data_sources"][0]["structure"]["micro"]["divisions"] + ).decode() + + body = dict(item["attributes"]) + if key is not None: + body["id"] = key + document = self.context.post_json(self.uri, body) + item["attributes"]["structure"] = structure + + # if server returned modified metadata update the local copy + if "metadata" in document: + item["attributes"]["metadata"] = document.pop("metadata") + # Ditto for structure + if "structure" in document: + item["attributes"]["structure"] = document.pop("structure") + + # Merge in "id" and "links" returned by the server. + item.update(document) + + return client_for_item( + self.context, + self.structure_clients, + item, + structure=structure, + ) + + # When (re)chunking arrays for upload, we use this limit + # to attempt to avoid bumping into size limits. + _SUGGESTED_MAX_UPLOAD_SIZE = 100_000_000 # 100 MB + + def create_node( + self, key=None, *, metadata=None, dims=None, specs=None, references=None + ): + """ + EXPERIMENTAL: Write an array. + + Parameters + ---------- + key : str, optional + Key (name) for this new node. If None, the server will provide a unique key. + metadata : dict, optional + User metadata. May be nested. Must contain only basic types + (e.g. numbers, strings, lists, dicts) that are JSON-serializable. + dims : List[str], optional + A label for each dimension of the array. + specs : List[Spec], optional + List of names that are used to label that the data and/or metadata + conform to some named standard specification. + references : List[Dict[str, URL]], optional + References (e.g. links) to related information. This may include + links into other Tiled data sets, search results, or external + resources unrelated to Tiled. + + """ + return self.new( + StructureFamily.container, + {"contents": None, "count": None}, + key=key, + metadata=metadata, + specs=specs, + references=references, + ) + + def write_array( + self, array, *, key=None, metadata=None, dims=None, specs=None, references=None + ): + """ + EXPERIMENTAL: Write an array. + + Parameters + ---------- + array : array-like + key : str, optional + Key (name) for this new node. If None, the server will provide a unique key. + metadata : dict, optional + User metadata. May be nested. Must contain only basic types + (e.g. numbers, strings, lists, dicts) that are JSON-serializable. + dims : List[str], optional + A label for each dimension of the array. + specs : List[Spec], optional + List of names that are used to label that the data and/or metadata + conform to some named standard specification. + references : List[Dict[str, URL]], optional + References (e.g. links) to related information. This may include + links into other Tiled data sets, search results, or external + resources unrelated to Tiled. + + """ + import dask.array + import numpy + from dask.array.core import normalize_chunks + + from ..structures.array import ArrayMacroStructure, ArrayStructure, BuiltinDtype + + if not (hasattr(array, "shape") and hasattr(array, "dtype")): + # This does not implement enough of the array-like interface. + # Coerce to numpy. + array = numpy.asarray(array) + + # Determine chunks such that each chunk is not too large to upload. + # Any existing chunking will be taken into account. + # If the array is small, there will be only one chunk. + if hasattr(array, "chunks"): + chunks = normalize_chunks( + array.chunks, + limit=self._SUGGESTED_MAX_UPLOAD_SIZE, + dtype=array.dtype, + shape=array.shape, + ) + else: + chunks = normalize_chunks( + tuple("auto" for _ in array.shape), + limit=self._SUGGESTED_MAX_UPLOAD_SIZE, + dtype=array.dtype, + shape=array.shape, + ) + + structure = ArrayStructure( + macro=ArrayMacroStructure( + shape=array.shape, + chunks=chunks, + dims=dims, + ), + micro=BuiltinDtype.from_numpy_dtype(array.dtype), + ) + client = self.new( + StructureFamily.array, + structure, + key=key, + metadata=metadata, + specs=specs, + references=references, + ) + chunked = any(len(dim) > 1 for dim in chunks) + if not chunked: + client.write(array) + else: + # Fan out client.write_block over each chunk using dask. + if isinstance(array, dask.array.Array): + da = array.rechunk(chunks) + else: + da = dask.array.from_array(array, chunks=chunks) + + # Dask inspects the signature and passes block_id in if present. + # It also apparently calls it with an empty array and block_id + # once, so we catch that call and become a no-op. + def write_block(x, block_id, client): + if len(block_id): + client.write_block(x, block=block_id) + return x + + # TODO Is there a fire-and-forget analogue such that we don't need + # to bother with the return type? + da.map_blocks(write_block, dtype=da.dtype, client=client).compute() + return client + + def write_sparse( + self, + coords, + data, + shape, + *, + key=None, + metadata=None, + dims=None, + specs=None, + references=None, + ): + """ + EXPERIMENTAL: Write a sparse array. + + Parameters + ---------- + coords : array-like + data : array-like + shape : tuple + key : str, optional + Key (name) for this new node. If None, the server will provide a unique key. + metadata : dict, optional + User metadata. May be nested. Must contain only basic types + (e.g. numbers, strings, lists, dicts) that are JSON-serializable. + dims : List[str], optional + A label for each dimension of the array. + specs : List[Spec], optional + List of names that are used to label that the data and/or metadata + conform to some named standard specification. + references : List[Dict[str, URL]], optional + References (e.g. links) to related information. This may include + links into other Tiled data sets, search results, or external + resources unrelated to Tiled. + + Examples + -------- + + Write a sparse.COO array. + + >>> import sparse + >>> coo = sparse.COO(coords=[[2, 5]], data=[1.3, 7.5], shape=(10,)) + >>> c.write_sparse(coords=coo.coords, data=coo.data, shape=coo.shape) + + This only supports a single chunk. For chunked upload, use lower-level methods. + + # Define the overall shape and the dimensions of each chunk. + >>> from tiled.structures.sparse import COOStructure + >>> x = c.new("sparse", COOStructure(shape=(10,), chunks=((5, 5),))) + # Upload the data in each chunk. + # Coords are given with in the reference frame of each chunk. + >>> x.write_block(coords=[[2, 4]], data=[3.1, 2.8], block=(0,)) + >>> x.write_block(coords=[[0, 1]], data=[6.7, 1.2], block=(1,)) + """ + from ..structures.sparse import COOStructure + + structure = COOStructure( + shape=shape, + # This method only supports single-chunk COO arrays. + chunks=tuple((dim,) for dim in shape), + dims=dims, + ) + client = self.new( + StructureFamily.sparse, + structure, + key=key, + metadata=metadata, + specs=specs, + references=references, + ) + client.write(coords, data) + return client + + def write_dataframe( + self, dataframe, *, key=None, metadata=None, specs=None, references=None + ): + """ + EXPERIMENTAL: Write a DataFrame. + + This is subject to change or removal without notice + + Parameters + ---------- + dataframe : pandas.DataFrame + key : str, optional + Key (name) for this new node. If None, the server will provide a unique key. + metadata : dict, optional + User metadata. May be nested. Must contain only basic types + (e.g. numbers, strings, lists, dicts) that are JSON-serializable. + specs : List[Spec], optional + List of names that are used to label that the data and/or metadata + conform to some named standard specification. + references : List[Dict[str, URL]], optional + References (e.g. links) to related information. This may include + links into other Tiled data sets, search results, or external + resources unrelated to Tiled. + """ + import dask.dataframe + import pandas + + from ..serialization.dataframe import serialize_arrow + from ..structures.dataframe import ( + DataFrameMacroStructure, + DataFrameMicroStructure, + DataFrameStructure, + ) + + metadata = metadata or {} + specs = specs or [] + + if isinstance(dataframe, dask.dataframe.DataFrame): + meta = bytes(serialize_arrow(dataframe._meta, {})) + divisions = bytes( + serialize_arrow( + pandas.DataFrame({"divisions": list(dataframe.divisions)}), {} + ) + ) + micro = DataFrameMicroStructure(meta=meta, divisions=divisions) + npartitions = dataframe.npartitions + else: + micro = DataFrameMicroStructure.from_dataframe(dataframe) + npartitions = 1 + + structure = DataFrameStructure( + micro=micro, + macro=DataFrameMacroStructure( + npartitions=npartitions, columns=list(dataframe.columns) + ), + ) + + client = self.new( + StructureFamily.dataframe, + structure, + key=key, + metadata=metadata, + specs=specs, + references=references, + ) + + if hasattr(dataframe, "partitions"): + if isinstance(dataframe, dask.dataframe.DataFrame): + ddf = dataframe + else: + raise NotImplementedError( + f"Unsure how to handle type {type(dataframe)}" + ) + + def write_partition(x, partition_info): + client.write_partition(x, partition_info["number"]) + return x + + ddf.map_partitions(write_partition, meta=dataframe._meta).compute() + else: + client.write(dataframe) + + return client + + +def _queries_to_params(*queries): + "Compute GET params from the queries." + params = collections.defaultdict(list) + for query in queries: + name = query_registry.query_type_to_name[type(query)] + for field, value in query.encode().items(): + if value is not None: + params[f"filter[{name}][condition][{field}]"].append(value) + return dict(params) + + +LENGTH_CACHE_TTL = 1 # second + + +class Ascending(Sentinel): + "Intended for more readable sorting operations. An alias for 1." + + def __index__(self): + return 1 + + +class Descending(Sentinel): + "Intended for more readable sorting operations. An alias for -1." + + def __index__(self): + return -1 + + +ASCENDING = Ascending("ASCENDING") +"Ascending sort order. An alias for 1." +DESCENDING = Descending("DESCENDING") +"Decending sort order. An alias for -1." + + +class _LazyLoad: + # This exists because lambdas and closures cannot be pickled. + def __init__(self, import_module_args, attr_name): + self.import_module_args = import_module_args + self.attr_name = attr_name + + def __call__(self): + return getattr( + importlib.import_module(*self.import_module_args), self.attr_name + ) + + +class _Wrap: + # This exists because lambdas and closures cannot be pickled. + def __init__(self, obj): + self.obj = obj + + def __call__(self): + return self.obj + + +DEFAULT_STRUCTURE_CLIENT_DISPATCH = { + "numpy": OneShotCachedMap( + { + "container": _Wrap(Container), + "array": _LazyLoad(("..array", Container.__module__), "ArrayClient"), + "dataframe": _LazyLoad( + ("..dataframe", Container.__module__), "DataFrameClient" + ), + "sparse": _LazyLoad(("..sparse", Container.__module__), "SparseClient"), + "xarray_dataset": _LazyLoad( + ("..xarray", Container.__module__), "DatasetClient" + ), + } + ), + "dask": OneShotCachedMap( + { + "container": _Wrap(Container), + "array": _LazyLoad(("..array", Container.__module__), "DaskArrayClient"), + "dataframe": _LazyLoad( + ("..dataframe", Container.__module__), "DaskDataFrameClient" + ), + "sparse": _LazyLoad(("..sparse", Container.__module__), "SparseClient"), + "xarray_dataset": _LazyLoad( + ("..xarray", Container.__module__), "DaskDatasetClient" + ), + } + ), +} diff --git a/tiled/client/context.py b/tiled/client/context.py index 8101166f5..eaabc02c9 100644 --- a/tiled/client/context.py +++ b/tiled/client/context.py @@ -247,14 +247,14 @@ def from_any_uri( """ Accept a URI to a specific node. - For example, given URI "https://example.com/api/v1/node/metadata/a/b/c" + For example, given URI "https://example.com/api/v1//metadata/a/b/c" return a Context connected to "https://examples/api/v1" and the list ["a", "b", "c"]. """ uri = httpx.URL(uri) node_path_parts = [] - if "/node/metadata" in uri.path: - api_path, _, node_path = uri.path.partition("/node/metadata") + if "/metadata" in uri.path: + api_path, _, node_path = uri.path.partition("/metadata") api_uri = uri.copy_with(path=api_path) node_path_parts.extend( [segment for segment in node_path.split("/") if segment] diff --git a/tiled/client/node.py b/tiled/client/node.py index d30ccbdef..5fef0978a 100644 --- a/tiled/client/node.py +++ b/tiled/client/node.py @@ -1,999 +1,10 @@ -import base64 -import collections -import collections.abc -import importlib -import itertools -import time import warnings -from dataclasses import asdict -import entrypoints +warnings.warn( + """The module 'tiled.client.node' has been moved to 'tiled.client.container' and +the object 'Node' has been renamed 'Container'.""", + DeprecationWarning, +) +from .container import * # noqa -from ..adapters.utils import IndexersMixin -from ..iterviews import ItemsView, KeysView, ValuesView -from ..queries import KeyLookup -from ..query_registration import query_registry -from ..structures.core import Spec, StructureFamily -from ..utils import UNCHANGED, OneShotCachedMap, Sentinel, node_repr -from .base import BaseClient -from .cache import Revalidate, verify_cache -from .utils import ClientError, client_for_item, export_util - - -class Node(BaseClient, collections.abc.Mapping, IndexersMixin): - # This maps the structure_family sent by the server to a client-side object that - # can interpret the structure_family's structure and content. OneShotCachedMap is used to - # defer imports. - - # This is populated when the first instance is created. - STRUCTURE_CLIENTS_FROM_ENTRYPOINTS = None - - @classmethod - def _discover_entrypoints(cls, entrypoint_name): - return OneShotCachedMap( - { - name: entrypoint.load - for name, entrypoint in entrypoints.get_group_named( - entrypoint_name - ).items() - } - ) - - @classmethod - def discover_clients_from_entrypoints(cls): - """ - Search the software environment for libraries that register structure clients. - - This is called once automatically the first time Node.from_uri - is called. It is idempotent. - """ - if cls.STRUCTURE_CLIENTS_FROM_ENTRYPOINTS is not None: - # short-circuit - return - # The modules associated with these entrypoints will be imported - # lazily, only when the item is first accessed. - cls.STRUCTURE_CLIENTS_FROM_ENTRYPOINTS = OneShotCachedMap() - # Check old name (special_client) and new name (structure_client). - for entrypoint_name in ["tiled.special_client", "tiled.structure_client"]: - for name, entrypoint in entrypoints.get_group_named( - entrypoint_name - ).items(): - cls.STRUCTURE_CLIENTS_FROM_ENTRYPOINTS.set(name, entrypoint.load) - DEFAULT_STRUCTURE_CLIENT_DISPATCH["numpy"].set(name, entrypoint.load) - DEFAULT_STRUCTURE_CLIENT_DISPATCH["dask"].set(name, entrypoint.load) - - def __init__( - self, - context, - *, - item, - structure_clients, - queries=None, - sorting=None, - structure=None, - ): - "This is not user-facing. Use Node.from_uri." - - self.structure_clients = structure_clients - self._queries = list(queries or []) - self._queries_as_params = _queries_to_params(*self._queries) - # If the user has not specified a sorting, give the server the opportunity - # to tell us the default sorting. - if sorting: - self._sorting = sorting - else: - # In the Python API we encode sorting as (key, direction). - # This order-based "record" notion does not play well with OpenAPI. - # In the HTTP API, therefore, we use {"key": key, "direction": direction}. - self._sorting = [ - (s["key"], int(s["direction"])) - for s in (item["attributes"].get("sorting") or []) - ] - sorting = sorting or item["attributes"].get("sorting") - self._sorting_params = { - "sort": ",".join( - f"{'-' if item[1] < 0 else ''}{item[0]}" for item in self._sorting - ) - } - self._reversed_sorting_params = { - "sort": ",".join( - f"{'-' if item[1] > 0 else ''}{item[0]}" for item in self._sorting - ) - } - super().__init__( - context=context, - item=item, - structure_clients=structure_clients, - ) - - def __repr__(self): - # Display up to the first N keys to avoid making a giant service - # request. Use _keys_slicer because it is unauthenticated. - N = 10 - return node_repr(self, self._keys_slice(0, N, direction=1)) - - @property - def sorting(self): - """ - The current sorting of this Node - - Given as a list of tuples where the first entry is the sorting key - and the second entry indicates ASCENDING (or 1) or DESCENDING (or -1). - """ - return list(self._sorting) - - def download(self): - """ - Access all the data in this Node. - - This causes it to be cached if the context is configured with a cache. - """ - verify_cache(self.context.cache) - self.context.get_json(self.uri) - repr(self) - for key in self: - entry = self[key] - entry.download() - - def refresh(self, force=False): - """ - Refresh cached data for this node. - - Parameters - ---------- - force: bool - If False, (default) refresh only expired cache entries. - If True, refresh all cache entries. - """ - if force: - revalidate = Revalidate.FORCE - else: - revalidate = Revalidate.IF_EXPIRED - with self.context.revalidation(revalidate): - self.download() - - def new_variation( - self, - *, - structure_clients=UNCHANGED, - queries=UNCHANGED, - sorting=UNCHANGED, - **kwargs, - ): - """ - Create a copy of this Node, optionally varying some parameters. - - This is intended primarily for internal use and use by subclasses. - """ - if isinstance(structure_clients, str): - structure_clients = DEFAULT_STRUCTURE_CLIENT_DISPATCH[structure_clients] - if structure_clients is UNCHANGED: - structure_clients = self.structure_clients - if queries is UNCHANGED: - queries = self._queries - if sorting is UNCHANGED: - sorting = self._sorting - return super().new_variation( - context=self.context, - structure_clients=structure_clients, - queries=queries, - sorting=sorting, - **kwargs, - ) - - def __len__(self): - # If the contents of this node was provided in-line, there is an - # implication that the contents are not expected to be dynamic. Used the - # count provided in the structure. - structure = self.item["attributes"]["structure"] - if structure["contents"]: - return structure["count"] - now = time.monotonic() - if self._cached_len is not None: - length, deadline = self._cached_len - if now < deadline: - # Used the cached value and do not make any request. - return length - content = self.context.get_json( - self.item["links"]["search"], - params={ - "fields": "", - **self._queries_as_params, - **self._sorting_params, - }, - ) - length = content["meta"]["count"] - self._cached_len = (length, now + LENGTH_CACHE_TTL) - return length - - def __length_hint__(self): - # TODO The server should provide an estimated count. - # https://www.python.org/dev/peps/pep-0424/ - return len(self) - - def __iter__(self, _ignore_inlined_contents=False): - # If the contents of this node was provided in-line, and we don't need - # to apply any filtering or sorting, we can slice the in-lined data - # without fetching anything from the server. - contents = self.item["attributes"]["structure"]["contents"] - if ( - (contents is not None) - and (not self._queries) - and ((not self.sorting) or (self.sorting == [("_", 1)])) - and (not _ignore_inlined_contents) - ): - return (yield from contents) - next_page_url = self.item["links"]["search"] - while next_page_url is not None: - content = self.context.get_json( - next_page_url, - params={ - "fields": "", - **self._queries_as_params, - **self._sorting_params, - }, - ) - self._cached_len = ( - content["meta"]["count"], - time.monotonic() + LENGTH_CACHE_TTL, - ) - for item in content["data"]: - yield item["id"] - next_page_url = content["links"]["next"] - - def __getitem__(self, keys, _ignore_inlined_contents=False): - # These are equivalent: - # - # >>> node['a']['b']['c'] - # >>> node[('a', 'b', 'c')] - # >>> node['a', 'b', 'c'] - # - # The last two are equivalent at a Python level; - # both call node.__getitem__(('a', 'b', 'c')). - # - # We elide this into a single request to the server rather than - # a chain of requests. This is not totally straightforward because - # of this use case: - # - # >>> node.search(...)['a', 'b'] - # - # which must only return a result if 'a' is contained in the search results. - if not isinstance(keys, tuple): - keys = (keys,) - if self._queries: - # Lookup this key *within the search results* of this Node. - key, *tail = keys - tail = tuple(tail) # list -> tuple - content = self.context.get_json( - self.item["links"]["search"], - params={ - **_queries_to_params(KeyLookup(key)), - **self._queries_as_params, - **self._sorting_params, - }, - ) - self._cached_len = ( - content["meta"]["count"], - time.monotonic() + LENGTH_CACHE_TTL, - ) - data = content["data"] - if not data: - raise KeyError(key) - assert ( - len(data) == 1 - ), "The key lookup query must never result more than one result." - (item,) = data - result = client_for_item(self.context, self.structure_clients, item) - if tail: - result = result[tail] - else: - # Straightforwardly look up the keys under this node. - # There is no search filter in place, so if it is there - # then we want it. - - # The server may greedily send nested information about children - # ("inlined contents") to reduce latency. This is how we handle - # xarray Datasets efficiently, for example. - - # In a loop, walk the key(s). Use inlined contents if we have it. - # When we reach a key that we don't have inlined contents for, send - # out a single request with all the rest of the keys, and break - # the keys-walking loop. We are effectively "jumping" down the tree - # to the node of interest without downloading information about - # intermediate parents. - for i, key in enumerate(keys): - item = (self.item["attributes"]["structure"]["contents"] or {}).get(key) - if (item is None) or _ignore_inlined_contents: - # The item was not inlined, either because nothing was inlined - # or because it was added after we fetched the inlined contents. - # Make a request for it. - try: - self_link = self.item["links"]["self"] - if self_link.endswith("/"): - self_link = self_link[:-1] - content = self.context.get_json( - self_link + "".join(f"/{key}" for key in keys[i:]), - ) - except ClientError as err: - if err.response.status_code == 404: - # If this is a scalar lookup, raise KeyError("X") not KeyError(("X",)). - err_arg = keys[i:] - if len(err_arg) == 1: - (err_arg,) = err_arg - raise KeyError(err_arg) - raise - item = content["data"] - break - result = client_for_item(self.context, self.structure_clients, item) - return result - - def delete(self, key): - self._cached_len = None - self.context.delete_content(f"{self.uri}/{key}", None) - - # The following two methods are used by keys(), values(), items(). - - def _keys_slice(self, start, stop, direction, _ignore_inlined_contents=False): - # If the contents of this node was provided in-line, and we don't need - # to apply any filtering or sorting, we can slice the in-lined data - # without fetching anything from the server. - contents = self.item["attributes"]["structure"]["contents"] - if ( - (contents is not None) - and (not self._queries) - and ((not self.sorting) or (self.sorting == [("_", 1)])) - and (not _ignore_inlined_contents) - ): - keys = list(contents) - if direction < 0: - keys = list(reversed(keys)) - return (yield from keys[start:stop]) - if direction > 0: - sorting_params = self._sorting_params - else: - sorting_params = self._reversed_sorting_params - assert start >= 0 - assert (stop is None) or (stop >= 0) - next_page_url = f"{self.item['links']['search']}?page[offset]={start}" - item_counter = itertools.count(start) - while next_page_url is not None: - content = self.context.get_json( - next_page_url, - params={ - "fields": "", - **self._queries_as_params, - **sorting_params, - }, - ) - self._cached_len = ( - content["meta"]["count"], - time.monotonic() + LENGTH_CACHE_TTL, - ) - for item in content["data"]: - if stop is not None and next(item_counter) == stop: - return - yield item["id"] - next_page_url = content["links"]["next"] - - def _items_slice(self, start, stop, direction, _ignore_inlined_contents=False): - # If the contents of this node was provided in-line, and we don't need - # to apply any filtering or sorting, we can slice the in-lined data - # without fetching anything from the server. - contents = self.item["attributes"]["structure"]["contents"] - if ( - (contents is not None) - and (not self._queries) - and ((not self.sorting) or (self.sorting == [("_", 1)])) - and (not _ignore_inlined_contents) - ): - items = list(contents.items()) - if direction < 0: - items = list(reversed(items)) - for key, item in items[start:stop]: - yield key, client_for_item( - self.context, - self.structure_clients, - item, - ) - return - if direction > 0: - sorting_params = self._sorting_params - else: - sorting_params = self._reversed_sorting_params - assert start >= 0 - assert (stop is None) or (stop >= 0) - next_page_url = f"{self.item['links']['search']}?page[offset]={start}" - item_counter = itertools.count(start) - while next_page_url is not None: - content = self.context.get_json( - next_page_url, - params={**self._queries_as_params, **sorting_params}, - ) - self._cached_len = ( - content["meta"]["count"], - time.monotonic() + LENGTH_CACHE_TTL, - ) - - for item in content["data"]: - if stop is not None and next(item_counter) == stop: - return - key = item["id"] - yield key, client_for_item( - self.context, - self.structure_clients, - item, - ) - next_page_url = content["links"]["next"] - - def keys(self): - return KeysView(lambda: len(self), self._keys_slice) - - def values(self): - return ValuesView(lambda: len(self), self._items_slice) - - def items(self): - return ItemsView(lambda: len(self), self._items_slice) - - def search(self, query): - """ - Make a Node with a subset of this Node's entries, filtered by query. - - Examples - -------- - - >>> from tiled.queries import FullText - >>> tree.search(FullText("hello")) - """ - return self.new_variation(queries=self._queries + [query]) - - def distinct( - self, *metadata_keys, structure_families=False, specs=False, counts=False - ): - """ - Get the unique values and optionally counts of metadata_keys, - structure_families, and specs in this Node's entries - - Examples - -------- - - Query all the distinct values of a key. - - >>> tree.distinct("foo", counts=True) - - Query for multiple keys at once. - - >>> tree.distinct("foo", "bar", counts=True) - """ - - link = self.item["links"]["self"].replace("/node/metadata", "/node/distinct", 1) - distinct = self.context.get_json( - link, - params={ - "metadata": metadata_keys, - "structure_families": structure_families, - "specs": specs, - "counts": counts, - **self._queries_as_params, - }, - ) - return distinct - - def sort(self, *sorting): - """ - Make a Node with the same entries but sorted according to `sorting`. - - Examples - -------- - - Sort by "color" in ascending order, and then by "height" in descending order. - - >>> from tiled.client import ASCENDING, DESCENDING - >>> tree.sort(("color", ASCENDING), ("height", DESCENDING)) - - Note that ``1`` may be used as a synonym for ``ASCENDING``, and ``-1`` - may be used as a synonym for ``DESCENDING``. - """ - return self.new_variation(sorting=sorting) - - def export(self, filepath, fields=None, *, format=None): - """ - Download metadata and data below this node in some format and write to a file. - - Parameters - ---------- - file: str or buffer - Filepath or writeable buffer. - fields: List[str], optional - Filter which items in this node to export. - format : str, optional - If format is None and `file` is a filepath, the format is inferred - from the name, like 'table.h5' implies format="application/x-hdf5". The format - may be given as a file extension ("h5") or a media type ("application/x-hdf5"). - - Examples - -------- - - Export all. - - >>> a.export("everything.h5") - - """ - params = {} - if fields is not None: - params["field"] = fields - return export_util( - filepath, - format, - self.context.get_content, - self.item["links"]["full"], - params=params, - ) - - def _ipython_key_completions_(self): - """ - Provide method for the key-autocompletions in IPython. - - See http://ipython.readthedocs.io/en/stable/config/integrating.html#tab-completion - """ - MAX_ENTRIES_SUPPORTED = 40 - try: - if len(self) > MAX_ENTRIES_SUPPORTED: - MSG = ( - "Tab-completition is not supported on this particular Node " - "because it has a large number of entries." - ) - warnings.warn(MSG) - return [] - else: - return list(self) - except Exception: - # Do not print messy traceback from thread. Just fail silently. - return [] - - def new( - self, - structure_family, - structure, - *, - key=None, - metadata=None, - specs=None, - references=None, - ): - """ - Create a new item within this Node. - - This is a low-level method. See high-level convenience methods listed below. - - See Also - -------- - write_array - write_dataframe - write_coo_array - """ - self._cached_len = None - metadata = metadata or {} - specs = specs or [] - normalized_specs = [] - for spec in specs: - if isinstance(spec, str): - spec = Spec(spec) - normalized_specs.append(asdict(spec)) - references = references or [] - data_sources = [] - if structure_family != StructureFamily.node: - # TODO Handle multiple data sources. - data_sources.append({"structure": asdict(structure)}) - item = { - "attributes": { - "metadata": metadata, - "structure_family": StructureFamily(structure_family), - "specs": normalized_specs, - "references": references, - "data_sources": data_sources, - } - } - - if structure_family == StructureFamily.dataframe: - # send bytes base64 encoded - item["attributes"]["data_sources"][0]["structure"]["micro"][ - "meta" - ] = base64.b64encode( - item["attributes"]["data_sources"][0]["structure"]["micro"]["meta"] - ).decode() - item["attributes"]["data_sources"][0]["structure"]["micro"][ - "divisions" - ] = base64.b64encode( - item["attributes"]["data_sources"][0]["structure"]["micro"]["divisions"] - ).decode() - - body = dict(item["attributes"]) - if key is not None: - body["id"] = key - document = self.context.post_json(self.uri, body) - item["attributes"]["structure"] = structure - - # if server returned modified metadata update the local copy - if "metadata" in document: - item["attributes"]["metadata"] = document.pop("metadata") - # Ditto for structure - if "structure" in document: - item["attributes"]["structure"] = document.pop("structure") - - # Merge in "id" and "links" returned by the server. - item.update(document) - - return client_for_item( - self.context, - self.structure_clients, - item, - structure=structure, - ) - - # When (re)chunking arrays for upload, we use this limit - # to attempt to avoid bumping into size limits. - _SUGGESTED_MAX_UPLOAD_SIZE = 100_000_000 # 100 MB - - def create_node( - self, key=None, *, metadata=None, dims=None, specs=None, references=None - ): - """ - EXPERIMENTAL: Write an array. - - Parameters - ---------- - key : str, optional - Key (name) for this new node. If None, the server will provide a unique key. - metadata : dict, optional - User metadata. May be nested. Must contain only basic types - (e.g. numbers, strings, lists, dicts) that are JSON-serializable. - dims : List[str], optional - A label for each dimension of the array. - specs : List[Spec], optional - List of names that are used to label that the data and/or metadata - conform to some named standard specification. - references : List[Dict[str, URL]], optional - References (e.g. links) to related information. This may include - links into other Tiled data sets, search results, or external - resources unrelated to Tiled. - - """ - return self.new( - StructureFamily.node, - {"contents": None, "count": None}, - key=key, - metadata=metadata, - specs=specs, - references=references, - ) - - def write_array( - self, array, *, key=None, metadata=None, dims=None, specs=None, references=None - ): - """ - EXPERIMENTAL: Write an array. - - Parameters - ---------- - array : array-like - key : str, optional - Key (name) for this new node. If None, the server will provide a unique key. - metadata : dict, optional - User metadata. May be nested. Must contain only basic types - (e.g. numbers, strings, lists, dicts) that are JSON-serializable. - dims : List[str], optional - A label for each dimension of the array. - specs : List[Spec], optional - List of names that are used to label that the data and/or metadata - conform to some named standard specification. - references : List[Dict[str, URL]], optional - References (e.g. links) to related information. This may include - links into other Tiled data sets, search results, or external - resources unrelated to Tiled. - - """ - import dask.array - import numpy - from dask.array.core import normalize_chunks - - from ..structures.array import ArrayMacroStructure, ArrayStructure, BuiltinDtype - - if not (hasattr(array, "shape") and hasattr(array, "dtype")): - # This does not implement enough of the array-like interface. - # Coerce to numpy. - array = numpy.asarray(array) - - # Determine chunks such that each chunk is not too large to upload. - # Any existing chunking will be taken into account. - # If the array is small, there will be only one chunk. - if hasattr(array, "chunks"): - chunks = normalize_chunks( - array.chunks, - limit=self._SUGGESTED_MAX_UPLOAD_SIZE, - dtype=array.dtype, - shape=array.shape, - ) - else: - chunks = normalize_chunks( - tuple("auto" for _ in array.shape), - limit=self._SUGGESTED_MAX_UPLOAD_SIZE, - dtype=array.dtype, - shape=array.shape, - ) - - structure = ArrayStructure( - macro=ArrayMacroStructure( - shape=array.shape, - chunks=chunks, - dims=dims, - ), - micro=BuiltinDtype.from_numpy_dtype(array.dtype), - ) - client = self.new( - StructureFamily.array, - structure, - key=key, - metadata=metadata, - specs=specs, - references=references, - ) - chunked = any(len(dim) > 1 for dim in chunks) - if not chunked: - client.write(array) - else: - # Fan out client.write_block over each chunk using dask. - if isinstance(array, dask.array.Array): - da = array.rechunk(chunks) - else: - da = dask.array.from_array(array, chunks=chunks) - - # Dask inspects the signature and passes block_id in if present. - # It also apparently calls it with an empty array and block_id - # once, so we catch that call and become a no-op. - def write_block(x, block_id, client): - if len(block_id): - client.write_block(x, block=block_id) - return x - - # TODO Is there a fire-and-forget analogue such that we don't need - # to bother with the return type? - da.map_blocks(write_block, dtype=da.dtype, client=client).compute() - return client - - def write_sparse( - self, - coords, - data, - shape, - *, - key=None, - metadata=None, - dims=None, - specs=None, - references=None, - ): - """ - EXPERIMENTAL: Write a sparse array. - - Parameters - ---------- - coords : array-like - data : array-like - shape : tuple - key : str, optional - Key (name) for this new node. If None, the server will provide a unique key. - metadata : dict, optional - User metadata. May be nested. Must contain only basic types - (e.g. numbers, strings, lists, dicts) that are JSON-serializable. - dims : List[str], optional - A label for each dimension of the array. - specs : List[Spec], optional - List of names that are used to label that the data and/or metadata - conform to some named standard specification. - references : List[Dict[str, URL]], optional - References (e.g. links) to related information. This may include - links into other Tiled data sets, search results, or external - resources unrelated to Tiled. - - Examples - -------- - - Write a sparse.COO array. - - >>> import sparse - >>> coo = sparse.COO(coords=[[2, 5]], data=[1.3, 7.5], shape=(10,)) - >>> c.write_sparse(coords=coo.coords, data=coo.data, shape=coo.shape) - - This only supports a single chunk. For chunked upload, use lower-level methods. - - # Define the overall shape and the dimensions of each chunk. - >>> from tiled.structures.sparse import COOStructure - >>> x = c.new("sparse", COOStructure(shape=(10,), chunks=((5, 5),))) - # Upload the data in each chunk. - # Coords are given with in the reference frame of each chunk. - >>> x.write_block(coords=[[2, 4]], data=[3.1, 2.8], block=(0,)) - >>> x.write_block(coords=[[0, 1]], data=[6.7, 1.2], block=(1,)) - """ - from ..structures.sparse import COOStructure - - structure = COOStructure( - shape=shape, - # This method only supports single-chunk COO arrays. - chunks=tuple((dim,) for dim in shape), - dims=dims, - ) - client = self.new( - StructureFamily.sparse, - structure, - key=key, - metadata=metadata, - specs=specs, - references=references, - ) - client.write(coords, data) - return client - - def write_dataframe( - self, dataframe, *, key=None, metadata=None, specs=None, references=None - ): - """ - EXPERIMENTAL: Write a DataFrame. - - This is subject to change or removal without notice - - Parameters - ---------- - dataframe : pandas.DataFrame - key : str, optional - Key (name) for this new node. If None, the server will provide a unique key. - metadata : dict, optional - User metadata. May be nested. Must contain only basic types - (e.g. numbers, strings, lists, dicts) that are JSON-serializable. - specs : List[Spec], optional - List of names that are used to label that the data and/or metadata - conform to some named standard specification. - references : List[Dict[str, URL]], optional - References (e.g. links) to related information. This may include - links into other Tiled data sets, search results, or external - resources unrelated to Tiled. - """ - import dask.dataframe - import pandas - - from ..serialization.dataframe import serialize_arrow - from ..structures.dataframe import ( - DataFrameMacroStructure, - DataFrameMicroStructure, - DataFrameStructure, - ) - - metadata = metadata or {} - specs = specs or [] - - if isinstance(dataframe, dask.dataframe.DataFrame): - meta = bytes(serialize_arrow(dataframe._meta, {})) - divisions = bytes( - serialize_arrow( - pandas.DataFrame({"divisions": list(dataframe.divisions)}), {} - ) - ) - micro = DataFrameMicroStructure(meta=meta, divisions=divisions) - npartitions = dataframe.npartitions - else: - micro = DataFrameMicroStructure.from_dataframe(dataframe) - npartitions = 1 - - structure = DataFrameStructure( - micro=micro, - macro=DataFrameMacroStructure( - npartitions=npartitions, columns=list(dataframe.columns) - ), - ) - - client = self.new( - StructureFamily.dataframe, - structure, - key=key, - metadata=metadata, - specs=specs, - references=references, - ) - - if hasattr(dataframe, "partitions"): - if isinstance(dataframe, dask.dataframe.DataFrame): - ddf = dataframe - else: - raise NotImplementedError( - f"Unsure how to handle type {type(dataframe)}" - ) - - def write_partition(x, partition_info): - client.write_partition(x, partition_info["number"]) - return x - - ddf.map_partitions(write_partition, meta=dataframe._meta).compute() - else: - client.write(dataframe) - - return client - - -def _queries_to_params(*queries): - "Compute GET params from the queries." - params = collections.defaultdict(list) - for query in queries: - name = query_registry.query_type_to_name[type(query)] - for field, value in query.encode().items(): - if value is not None: - params[f"filter[{name}][condition][{field}]"].append(value) - return dict(params) - - -LENGTH_CACHE_TTL = 1 # second - - -class Ascending(Sentinel): - "Intended for more readable sorting operations. An alias for 1." - - def __index__(self): - return 1 - - -class Descending(Sentinel): - "Intended for more readable sorting operations. An alias for -1." - - def __index__(self): - return -1 - - -ASCENDING = Ascending("ASCENDING") -"Ascending sort order. An alias for 1." -DESCENDING = Descending("DESCENDING") -"Decending sort order. An alias for -1." - - -class _LazyLoad: - # This exists because lambdas and closures cannot be pickled. - def __init__(self, import_module_args, attr_name): - self.import_module_args = import_module_args - self.attr_name = attr_name - - def __call__(self): - return getattr( - importlib.import_module(*self.import_module_args), self.attr_name - ) - - -class _Wrap: - # This exists because lambdas and closures cannot be pickled. - def __init__(self, obj): - self.obj = obj - - def __call__(self): - return self.obj - - -DEFAULT_STRUCTURE_CLIENT_DISPATCH = { - "numpy": OneShotCachedMap( - { - "node": _Wrap(Node), - "array": _LazyLoad(("..array", Node.__module__), "ArrayClient"), - "dataframe": _LazyLoad(("..dataframe", Node.__module__), "DataFrameClient"), - "sparse": _LazyLoad(("..sparse", Node.__module__), "SparseClient"), - "xarray_dataset": _LazyLoad(("..xarray", Node.__module__), "DatasetClient"), - } - ), - "dask": OneShotCachedMap( - { - "node": _Wrap(Node), - "array": _LazyLoad(("..array", Node.__module__), "DaskArrayClient"), - "dataframe": _LazyLoad( - ("..dataframe", Node.__module__), "DaskDataFrameClient" - ), - "sparse": _LazyLoad(("..sparse", Node.__module__), "SparseClient"), - "xarray_dataset": _LazyLoad( - ("..xarray", Node.__module__), "DaskDatasetClient" - ), - } - ), -} +Node = Container # noqa diff --git a/tiled/client/xarray.py b/tiled/client/xarray.py index 1059a2aab..d6be791f9 100644 --- a/tiled/client/xarray.py +++ b/tiled/client/xarray.py @@ -9,12 +9,12 @@ from ..serialization.dataframe import deserialize_arrow from ..structures.core import Spec from ..utils import APACHE_ARROW_FILE_MIME_TYPE -from .node import Node +from .container import Container LENGTH_LIMIT_FOR_WIDE_TABLE_OPTIMIZATION = 1_000_000 -class DaskDatasetClient(Node): +class DaskDatasetClient(Container): def _repr_pretty_(self, p, cycle): """ Provide "pretty" display in IPython/Jupyter. diff --git a/tiled/queries.py b/tiled/queries.py index 720cdece8..af5851a5b 100644 --- a/tiled/queries.py +++ b/tiled/queries.py @@ -383,7 +383,7 @@ def decode(cls, *, key, value): @register(name="specs") @dataclass(init=False) -class Specs: +class SpecsQuery: """ Query if specs list matches all elements in include list and does not match any element in exclude list @@ -397,7 +397,7 @@ class Specs: Search for specs ["foo", "bar"] and NOT "baz" - >>> c.search(Specs(include=["foo", "bar"], exclude=["baz"])) + >>> c.search(SpecsQuery(include=["foo", "bar"], exclude=["baz"])) """ include: List[str] @@ -426,11 +426,11 @@ def decode(cls, *, include, exclude): return cls(include=json.loads(include), exclude=json.loads(exclude)) -def Spec(spec): +def SpecQuery(spec): """ Convenience function for querying if specs list contains a given spec - Equivalent to Specs([spec]). + Equivalent to SpecsQuery([spec]). Parameters ---------- @@ -444,12 +444,12 @@ def Spec(spec): >>> c.search(Spec("foo")) """ - return Specs([spec]) + return SpecsQuery([spec]) @register(name="structure_family") @dataclass(init=False) -class StructureFamily: +class StructureFamilyQuery: """ Query if structure_families match value diff --git a/tiled/serialization/__init__.py b/tiled/serialization/__init__.py index b08351282..3aa3e08d6 100644 --- a/tiled/serialization/__init__.py +++ b/tiled/serialization/__init__.py @@ -7,9 +7,9 @@ def register_builtin_serializers(): """ # Each submodule in ..serialization registers serializers on import. # Some are conditional on the availability of particular I/O libraries. - from ..serialization import node as _node # noqa: F401 + from ..serialization import container as _container # noqa: F401 - del _node + del _container if modules_available("numpy", "dask.array"): from ..serialization import array as _array # noqa: F401 diff --git a/tiled/serialization/node.py b/tiled/serialization/container.py similarity index 91% rename from tiled/serialization/node.py rename to tiled/serialization/container.py index 65a767c3c..b7fbf2b9c 100644 --- a/tiled/serialization/node.py +++ b/tiled/serialization/container.py @@ -1,6 +1,7 @@ import io from ..media_type_registration import serialization_registry +from ..structures.core import StructureFamily from ..utils import SerializationError, modules_available, safe_json_dump @@ -17,7 +18,7 @@ async def walk(node, filter_for_access, pre=None): ] """ pre = pre[:] if pre else [] - if node.structure_family != "array": + if node.structure_family != StructureFamily.array: if hasattr(node, "items_range"): for key, value in await filter_for_access(node).items_range(0, None): async for d in walk(value, filter_for_access, pre + [key]): @@ -70,7 +71,9 @@ async def serialize_hdf5(node, metadata, filter_for_access): dataset.attrs.create(k, v) return buffer.getbuffer() - serialization_registry.register("node", "application/x-hdf5", serialize_hdf5) + serialization_registry.register( + StructureFamily.container, "application/x-hdf5", serialize_hdf5 + ) if modules_available("orjson"): @@ -91,4 +94,6 @@ async def serialize_json(node, metadata, filter_for_access): d = d[key]["contents"] return safe_json_dump(to_serialize) - serialization_registry.register("node", "application/json", serialize_json) + serialization_registry.register( + StructureFamily.container, "application/json", serialize_json + ) diff --git a/tiled/serialization/dataframe.py b/tiled/serialization/dataframe.py index c9269c8f3..99383fbaa 100644 --- a/tiled/serialization/dataframe.py +++ b/tiled/serialization/dataframe.py @@ -113,6 +113,6 @@ def json_sequence(df, metadata): ) if modules_available("h5py"): - from .node import serialize_hdf5 + from .container import serialize_hdf5 serialization_registry.register("dataframe", "application/x-hdf5", serialize_hdf5) diff --git a/tiled/serialization/xarray.py b/tiled/serialization/xarray.py index 86c756713..8301d0380 100644 --- a/tiled/serialization/xarray.py +++ b/tiled/serialization/xarray.py @@ -2,6 +2,7 @@ from ..media_type_registration import serialization_registry from ..utils import modules_available +from .container import walk from .dataframe import ( APACHE_ARROW_FILE_MIME_TYPE, XLSX_MIME_TYPE, @@ -11,7 +12,6 @@ serialize_html, serialize_parquet, ) -from .node import walk async def as_dataset(node): diff --git a/tiled/server/app.py b/tiled/server/app.py index edfe3b2b9..1cc69f32c 100644 --- a/tiled/server/app.py +++ b/tiled/server/app.py @@ -40,7 +40,7 @@ from .object_cache import NO_CACHE, ObjectCache from .object_cache import logger as object_cache_logger from .object_cache import set_object_cache -from .router import node_distinct, node_search, patch_route_signature, router +from .router import distinct, patch_route_signature, router, search from .settings import get_settings from .utils import ( API_KEY_COOKIE_NAME, @@ -213,7 +213,7 @@ async def lookup_file(path, try_app=True): raise HTTPException(status_code=401) except FileNotFoundError: # This may be a URL that has meaning to the client-side application, - # such as /ui/node/metadata/a/b/c. + # such as /ui//metadata/a/b/c. # Serve index.html and let the client-side application sort it out. if try_app: response = await lookup_file("index.html", try_app=False) @@ -369,17 +369,17 @@ async def unicorn_exception_handler(request: Request, exc: UnsupportedQueryType) # The /search route is defined after import time so that the user has the # opporunity to register custom query types before startup. app.get( - "/api/v1/node/search/{path:path}", + "/api/v1/search/{path:path}", response_model=schemas.Response[ List[schemas.Resource[schemas.NodeAttributes, dict, dict]], schemas.PaginationLinks, dict, ], - )(patch_route_signature(node_search, query_registry)) + )(patch_route_signature(search, query_registry)) app.get( - "/api/v1/node/distinct/{path:path}", + "/api/v1/distinct/{path:path}", response_model=schemas.GetDistinctResponse, - )(patch_route_signature(node_distinct, query_registry)) + )(patch_route_signature(distinct, query_registry)) @lru_cache(1) def override_get_authenticators(): diff --git a/tiled/server/authentication.py b/tiled/server/authentication.py index 5042f0185..996a2f67a 100644 --- a/tiled/server/authentication.py +++ b/tiled/server/authentication.py @@ -323,7 +323,7 @@ async def get_current_principal( if not set(security_scopes.scopes).issubset(scopes): # Include a link to the root page which provides a list of # authenticators. The use case here is: - # 1. User is emailed a link like https://example.com/subpath/node/metadata/a/b/c + # 1. User is emailed a link like https://example.com/subpath//metadata/a/b/c # 2. Tiled Client tries to connect to that and gets 401. # 3. Client can use this header to find its way to # https://examples.com/subpath/ and obtain a list of diff --git a/tiled/server/core.py b/tiled/server/core.py index f6d1521b2..45cef4805 100644 --- a/tiled/server/core.py +++ b/tiled/server/core.py @@ -25,7 +25,7 @@ from ..adapters.mapping import MapAdapter from ..queries import KeyLookup, QueryValueError from ..serialization import register_builtin_serializers -from ..structures.core import Spec +from ..structures.core import Spec, StructureFamily from ..utils import ( APACHE_ARROW_FILE_MIME_TYPE, SerializationError, @@ -242,10 +242,10 @@ async def construct_entries_response( DEFAULT_MEDIA_TYPES = { - "array": {"*/*": "application/octet-stream", "image/*": "image/png"}, - "dataframe": {"*/*": APACHE_ARROW_FILE_MIME_TYPE}, - "node": {"*/*": "application/x-hdf5"}, - "sparse": {"*/*": APACHE_ARROW_FILE_MIME_TYPE}, + StructureFamily.array: {"*/*": "application/octet-stream", "image/*": "image/png"}, + StructureFamily.dataframe: {"*/*": APACHE_ARROW_FILE_MIME_TYPE}, + StructureFamily.container: {"*/*": "application/x-hdf5"}, + StructureFamily.sparse: {"*/*": APACHE_ARROW_FILE_MIME_TYPE}, } @@ -319,7 +319,7 @@ async def construct_data_response( for media_type in media_types: if media_type == "*/*": media_type = DEFAULT_MEDIA_TYPES[structure_family]["*/*"] - elif structure_family == "array" and media_type == "image/*": + elif structure_family == StructureFamily.array and media_type == "image/*": media_type = DEFAULT_MEDIA_TYPES[structure_family]["image/*"] # Compare the request formats to the formats supported by each spec # name and, finally, by the structure family. @@ -418,8 +418,8 @@ async def construct_resource( attributes["specs"] = specs if schemas.EntryFields.references in fields: attributes["references"] = getattr(entry, "references", []) - if (entry is not None) and entry.structure_family == "node": - attributes["structure_family"] = "node" + if (entry is not None) and entry.structure_family == StructureFamily.container: + attributes["structure_family"] = StructureFamily.container if schemas.EntryFields.structure in fields: if ( ((max_depth is None) or (depth < max_depth)) @@ -486,16 +486,16 @@ async def construct_resource( } if not omit_links: d["links"] = { - "self": f"{base_url}/node/metadata/{path_str}", - "search": f"{base_url}/node/search/{path_str}", + "self": f"{base_url}/metadata/{path_str}", + "search": f"{base_url}/search/{path_str}", "full": f"{base_url}/node/full/{path_str}", } resource = schemas.Resource[ - schemas.NodeAttributes, schemas.NodeLinks, schemas.NodeMeta + schemas.NodeAttributes, schemas.ContainerLinks, schemas.ContainerMeta ](**d) else: - links = {"self": f"{base_url}/node/metadata/{path_str}"} + links = {"self": f"{base_url}/metadata/{path_str}"} structure = {} if entry is not None: # entry is None when we are pulling just *keys* from the @@ -511,7 +511,7 @@ async def construct_resource( ) if schemas.EntryFields.structure_family in fields: attributes["structure_family"] = entry.structure_family - if entry.structure_family == "sparse": + if entry.structure_family == StructureFamily.sparse: # This arises from back-compat...needs revisiting. structure_maybe_method = entry.structure if callable(structure_maybe_method): @@ -538,10 +538,10 @@ async def construct_resource( if (schemas.EntryFields.microstructure in fields) or ( schemas.EntryFields.structure in fields ): - if entry.structure_family == "node": + if entry.structure_family == StructureFamily.container: assert False # not sure if this ever happens pass - elif entry.structure_family == "dataframe": + elif entry.structure_family == StructureFamily.dataframe: microstructure = entry.microstructure() meta = microstructure.meta divisions = microstructure.divisions @@ -562,7 +562,7 @@ async def construct_resource( microstructure = entry.microstructure() if microstructure is not None: structure["micro"] = asdict(microstructure) - if entry.structure_family == "array": + if entry.structure_family == StructureFamily.array: shape = structure.get("macro", {}).get("shape") if shape is None: # The client did not request structure so we have not yet @@ -574,7 +574,7 @@ async def construct_resource( links[ "block" ] = f"{base_url}/array/block/{path_str}?block={block_template}" - elif entry.structure_family == "dataframe": + elif entry.structure_family == StructureFamily.dataframe: links[ "partition" ] = f"{base_url}/dataframe/partition/{path_str}?partition={{index}}" @@ -759,10 +759,10 @@ class WrongTypeForRoute(Exception): FULL_LINKS = { - "node": {"full": "{base_url}/node/full/{path}"}, - "array": {"full": "{base_url}/array/full/{path}"}, - "dataframe": {"full": "{base_url}/node/full/{path}"}, - "sparse": {"full": "{base_url}/array/full/{path}"}, + StructureFamily.container: {"full": "{base_url}/node/full/{path}"}, + StructureFamily.array: {"full": "{base_url}/array/full/{path}"}, + StructureFamily.dataframe: {"full": "{base_url}/node/full/{path}"}, + StructureFamily.sparse: {"full": "{base_url}/array/full/{path}"}, } diff --git a/tiled/server/router.py b/tiled/server/router.py index 2a62395f4..c5ea0ba3e 100644 --- a/tiled/server/router.py +++ b/tiled/server/router.py @@ -132,7 +132,7 @@ async def about( authentication=authentication, links={ "self": base_url, - "documentation": f"{base_url}docs", + "documentation": f"{base_url}/docs", }, meta={"root_path": request.scope.get("root_path") or "" + "/api"}, ).dict(), @@ -140,7 +140,7 @@ async def about( ) -async def node_search( +async def search( request: Request, path: str, fields: Optional[List[schemas.EntryFields]] = Query(list(schemas.EntryFields)), @@ -159,7 +159,7 @@ async def node_search( **filters, ): request.state.endpoint = "search" - if entry.structure_family != "node": + if entry.structure_family != StructureFamily.container: raise WrongTypeForRoute("This is not a Node; it cannot be searched or listed.") entry = filter_for_access( entry, principal, ["read:metadata"], request.state.metrics @@ -168,7 +168,7 @@ async def node_search( resource, metadata_stale_at, must_revalidate = await construct_entries_response( query_registry, entry, - "/node/search", + "/search", path, offset, limit, @@ -210,7 +210,7 @@ async def node_search( ) -async def node_distinct( +async def distinct( request: Request, structure_families: bool = False, specs: bool = False, @@ -298,12 +298,12 @@ async def route_with_sig(*args, **kwargs): @router.get( - "/node/metadata/{path:path}", + "/metadata/{path:path}", response_model=schemas.Response[ schemas.Resource[schemas.NodeAttributes, dict, dict], dict, dict ], ) -async def node_metadata( +async def metadata( request: Request, path: str, fields: Optional[List[schemas.EntryFields]] = Query(list(schemas.EntryFields)), @@ -583,7 +583,7 @@ async def node_full( "request a smaller chunks." ), ) - if entry.structure_family == "node": + if entry.structure_family == StructureFamily.container: curried_filter = partial( filter_for_access, principal=principal, @@ -611,7 +611,7 @@ async def node_full( raise HTTPException(status_code=406, detail=err.args[0]) -@router.post("/node/metadata/{path:path}", response_model=schemas.PostMetadataResponse) +@router.post("/metadata/{path:path}", response_model=schemas.PostMetadataResponse) async def post_metadata( request: Request, path: str, @@ -641,7 +641,7 @@ async def post_metadata( body.specs, body.references, ) - if structure_family == StructureFamily.node: + if structure_family == StructureFamily.container: structure = None else: if len(body.data_sources) != 1: @@ -691,7 +691,7 @@ async def post_metadata( base_url = get_base_url(request) path_parts = [segment for segment in path.split("/") if segment] + [key] path_str = "/".join(path_parts) - links["self"] = f"{base_url}/node/metadata/{path_str}" + links["self"] = f"{base_url}/metadata/{path_str}" if body.structure_family == StructureFamily.array: block_template = ",".join( f"{{{index}}}" for index in range(len(node.structure.macro.shape)) @@ -711,9 +711,9 @@ async def post_metadata( "partition" ] = f"{base_url}/dataframe/partition/{path_str}?partition={{index}}" links["full"] = f"{base_url}/node/full/{path_str}" - elif body.structure_family == StructureFamily.node: + elif body.structure_family == StructureFamily.container: links["full"] = f"{base_url}/node/full/{path_str}" - links["search"] = f"{base_url}/node/search/{path_str}" + links["search"] = f"{base_url}/search/{path_str}" else: raise NotImplementedError(body.structure_family) response_data = { @@ -726,7 +726,7 @@ async def post_metadata( return json_or_msgpack(request, response_data) -@router.delete("/node/metadata/{path:path}") +@router.delete("/metadata/{path:path}") async def delete( request: Request, entry=SecureEntry(scopes=["write:data", "write:metadata"], kind=EntryKind.node), @@ -834,7 +834,7 @@ async def put_dataframe_partition( return json_or_msgpack(request, None) -@router.put("/node/metadata/{path:path}", response_model=schemas.PutMetadataResponse) +@router.put("/metadata/{path:path}", response_model=schemas.PutMetadataResponse) async def put_metadata( request: Request, body: schemas.PutMetadataRequest, @@ -894,7 +894,7 @@ async def put_metadata( return json_or_msgpack(request, response_data) -@router.get("/node/revisions/{path:path}") +@router.get("/revisions/{path:path}") async def get_revisions( request: Request, path: str, @@ -913,7 +913,7 @@ async def get_revisions( resource = await construct_revisions_response( entry, base_url, - "/node/revisions", + "/revisions", path, offset, limit, @@ -922,7 +922,7 @@ async def get_revisions( return json_or_msgpack(request, resource.dict()) -@router.delete("/node/revisions/{path:path}") +@router.delete("/revisions/{path:path}") async def delete_revision( request: Request, number: int, diff --git a/tiled/server/schemas.py b/tiled/server/schemas.py index 11a777222..6896f2002 100644 --- a/tiled/server/schemas.py +++ b/tiled/server/schemas.py @@ -189,7 +189,7 @@ class SelfLinkOnly(pydantic.BaseModel): self: str -class NodeLinks(pydantic.BaseModel): +class ContainerLinks(pydantic.BaseModel): self: str search: str full: str @@ -214,7 +214,7 @@ class SparseLinks(pydantic.BaseModel): resource_links_type_by_structure_family = { - "node": NodeLinks, + "container": ContainerLinks, "array": ArrayLinks, "dataframe": DataFrameLinks, "sparse": SparseLinks, @@ -225,7 +225,7 @@ class EmptyDict(pydantic.BaseModel): pass -class NodeMeta(pydantic.BaseModel): +class ContainerMeta(pydantic.BaseModel): count: int diff --git a/tiled/structures/core.py b/tiled/structures/core.py index 2f08f68ef..0e07be2d2 100644 --- a/tiled/structures/core.py +++ b/tiled/structures/core.py @@ -10,7 +10,7 @@ class StructureFamily(str, enum.Enum): - node = "node" + container = "container" array = "array" dataframe = "dataframe" sparse = "sparse" diff --git a/tiled/utils.py b/tiled/utils.py index 5a4465ed9..71e8a0e18 100644 --- a/tiled/utils.py +++ b/tiled/utils.py @@ -325,13 +325,13 @@ def gen_tree(tree, nodes=None, last=None): # when this function is used in a CLI where import overhead can accumulate to # about 2 seconds, the bulk of the time. Therefore, we do something a bit # "clever" here to override the normal structure clients with dummy placeholders. - from .client.node import Node + from .client.container import Container def dummy_client(*args, **kwargs): return None structure_clients = collections.defaultdict(lambda: dummy_client) - structure_clients["node"] = Node + structure_clients["container"] = Container fast_tree = tree.new_variation(structure_clients=structure_clients) if nodes is None: last_index = len(fast_tree) - 1 diff --git a/web-frontend/src/client.ts b/web-frontend/src/client.ts index cad85db14..14baca1d5 100644 --- a/web-frontend/src/client.ts +++ b/web-frontend/src/client.ts @@ -14,7 +14,7 @@ export const search = async ( ): Promise< components["schemas"]["Response_List_tiled.server.router.Resource_NodeAttributes__dict__dict____PaginationLinks__dict_"] > => { - let url = `${apiURL}/node/search/${segments.join( + let url = `${apiURL}/search/${segments.join( "/" )}?page[offset]=${pageOffset}&page[limit]=${pageLimit}&fields=${fields.join( "&fields=" @@ -35,7 +35,7 @@ export const metadata = async ( components["schemas"]["Response_Resource_NodeAttributes__dict__dict___dict__dict_"] > => { const response = await axiosInstance.get( - `${apiURL}/node/metadata/${segments.join("/")}?fields=${fields.join("&fields=")}`, + `${apiURL}/metadata/${segments.join("/")}?fields=${fields.join("&fields=")}`, { signal: signal } ); return response.data; diff --git a/web-frontend/src/openapi_schemas.ts b/web-frontend/src/openapi_schemas.ts index c9485cf1f..b95faf61d 100644 --- a/web-frontend/src/openapi_schemas.ts +++ b/web-frontend/src/openapi_schemas.ts @@ -13,7 +13,7 @@ export interface paths { "/api/": { get: operations["about_api__get"]; }; - "/api/node/metadata/{path}": { + "/api/metadata/{path}": { /** Fetch the metadata and structure information for one entry. */ get: operations["node_metadata_api_node_metadata__path__get"]; }; @@ -33,7 +33,7 @@ export interface paths { /** Fetch the data below the given node. */ get: operations["full_xarray_Dataset_api_node_full__path__get"]; }; - "/api/node/search/{path}": { + "/api/search/{path}": { get: operations["node_search_api_node_search__path__get"]; }; }