diff --git a/README.md b/README.md index 4655392..07dc02f 100644 --- a/README.md +++ b/README.md @@ -34,13 +34,21 @@ from datatree import DataTree, open_datatree import json import numpy as np import os +``` + +### Create component shard stores +```python base_store = DirectoryStore("base.zarr") shard1 = DirectoryStore("shard1.zarr") shard2 = DirectoryStore("shard2.zarr") array_shards1 = array_shard_directory_store("array_shards1") array_shards2 = array_shard_directory_store("array_shards2") +``` + +### Generate data for the example +```python # xarray-datatree Quick Overview data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]}) # Sharded array dimensions must have a chunk shape of 1. @@ -53,30 +61,45 @@ ds3 = xr.Dataset( coords={"species": "human"}, ) dt = DataTree.from_dict({"simulation/coarse": ds, "simulation/fine": ds2, "/": ds3}) +``` + +### A monolithic store -# A monolithic store +```python single_store = DirectoryStore("single.zarr") dt.to_zarr(single_store) +``` +### A sharded store demonstrating sharding on groups and arrays. + +Arrays are sharded over 1 dimension. -# A sharded store demonstrating sharding on groups and arrays. -# Arrays are sharded over 1 dimension. +```python sharded_store = ShardedStore(base_store, {'people': shard1, 'species': shard2}, {'simulation/coarse/foo': (1, array_shards1), 'simulation/fine/foo': (1, array_shards2)}) dt.to_zarr(sharded_store) +``` + +### Serialize / deserialize -# Serialize / deserialize +```python config = sharded_store.get_config() config_str = json.dumps(config) config = json.loads(config_str) sharded_store = ShardedStore.from_config(config) +``` + +### Validate +```python from_single = open_datatree(single_store, engine='zarr').compute() from_sharded = open_datatree(sharded_store, engine='zarr').compute() - assert from_single.identical(from_sharded) +``` -# Run transformations over component shards with `map_shards` +### Run transformations over component shards with `map_shards` + +```python to_zip_stores = to_zip_store_with_prefix("zip_stores") zip_sharded_stores = sharded_store.map_shards(to_zip_stores) ```