Skip to content

Extend dataset_to_point_dict to accept both dataset and dict of dataarray #7097

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pymc/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,18 +239,18 @@ def enhanced(*args, **kwargs):


def dataset_to_point_list(
ds: xarray.Dataset, sample_dims: Sequence[str]
ds: Union[xarray.Dataset, dict[str, xarray.DataArray]], sample_dims: Sequence[str]
) -> Tuple[List[Dict[str, np.ndarray]], Dict[str, Any]]:
# All keys of the dataset must be a str
var_names = list(ds.keys())
for vn in var_names:
if not isinstance(vn, str):
raise ValueError(f"Variable names must be str, but dataset key {vn} is a {type(vn)}.")
num_sample_dims = len(sample_dims)
stacked_dims = {dim_name: ds[dim_name] for dim_name in sample_dims}
ds = ds.transpose(*sample_dims, ...)
stacked_dims = {dim_name: ds[var_names[0]][dim_name] for dim_name in sample_dims}
stacked_dict = {
vn: da.values.reshape((-1, *da.shape[num_sample_dims:])) for vn, da in ds.items()
vn: da.transpose(*sample_dims, ...).values.reshape((-1, *da.shape[num_sample_dims:]))
for vn, da in ds.items()
}
points = [
{vn: stacked_dict[vn][i, ...] for vn in var_names}
Expand Down
11 changes: 9 additions & 2 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,16 +156,23 @@ def fn(a=UNSET):
assert "a=UNSET" in captured.out


def test_dataset_to_point_list():
ds = xarray.Dataset()
@pytest.mark.parametrize("input_type", ("dict", "Dataset"))
def test_dataset_to_point_list(input_type):
if input_type == "dict":
ds = {}
elif input_type == "Dataset":
ds = xarray.Dataset()
ds["A"] = xarray.DataArray([[1, 2, 3]] * 2, dims=("chain", "draw"))
pl, _ = dataset_to_point_list(ds, sample_dims=["chain", "draw"])
assert isinstance(pl, list)
assert len(pl) == 6
assert isinstance(pl[0], dict)
assert isinstance(pl[0]["A"], np.ndarray)


def test_dataset_to_point_list_str_key():
# Check that non-str keys are caught
ds = xarray.Dataset()
ds[3] = xarray.DataArray([1, 2, 3])
with pytest.raises(ValueError, match="must be str"):
dataset_to_point_list(ds, sample_dims=["chain", "draw"])
Expand Down