Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

[Full Changelog](https://github.com/intake/intake-esm/compare/v2025.2.3...v2025.7.9)

## vUNRELEASED

### New features added

- New `iterable_columns` field in the ESM catalog spec to specify which columns should be read as iterables (tuples). Deprecates argument `columns_with_iterable` of the `esm_datastore` by @aulemahal in https://github.com/intake/intake-esm/pull/752

## v2025.7.9

### New features added
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/esm-catalog-spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ They should be either [URIs](https://en.wikipedia.org/wiki/Uniform_Resource_Iden
| description | string | **REQUIRED.** Detailed multi-line description to fully explain the catalog. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. |
| catalog_file | string | **REQUIRED.** Path to a the CSV file with the catalog contents. |
| catalog_dict | array | If specified, it is mutually exclusive with `catalog_file`. An array of dictionaries that represents the data that would otherwise be in the csv. |
| iterable_columns | array | A list of columns names to that contain iterable values instead of scalar ones. |
| attributes | [[Attribute Object](#attribute-object)] | **REQUIRED.** A list of attribute columns in the data set. |
| assets | [Assets Object](#assets-object) | **REQUIRED.** Description of how the assets (data files) are referenced in the CSV catalog file. |
| aggregation_control | [Aggregation Control Object](#aggregation-control-object) | **OPTIONAL.** Description of how to support aggregation of multiple assets into a single xarray data set. |
Expand Down
11 changes: 11 additions & 0 deletions intake_esm/cat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import ast
import builtins
import datetime
import enum
Expand Down Expand Up @@ -115,6 +116,7 @@ class ESMCatalogModel(pydantic.BaseModel):
id: str = ''
catalog_dict: list[dict] | None = None
catalog_file: pydantic.StrictStr | None = None
iterable_columns: set[pydantic.StrictStr] | None = None
description: pydantic.StrictStr | None = None
title: pydantic.StrictStr | None = None
last_updated: datetime.datetime | datetime.date | None = None
Expand Down Expand Up @@ -320,6 +322,15 @@ def _df_from_file(
csv_path = f'{os.path.dirname(_mapper.root)}/{cat.catalog_file}'
cat.catalog_file = csv_path

if self.iterable_columns:
converter = ast.literal_eval
read_kwargs.setdefault('converters', {})
for col in self.iterable_columns:
if read_kwargs['converters'].setdefault(col, converter) != converter:
raise ValueError(
f"Cannot provide converter for '{col}' via `read_kwargs` when '{col}' is also specified in `iterable_columns`"
)

reader = CatalogFileDataReader(cat.catalog_file, storage_options, **read_kwargs)
self._iterable_dtype_map = reader.dtype_map
return reader.frames
Expand Down
1 change: 1 addition & 0 deletions intake_esm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class esm_datastore(Catalog):
A list of columns in the csv file containing iterables. Values in columns specified here will be
converted with `ast.literal_eval` when :py:func:`~pandas.read_csv` is called (i.e., this is a
shortcut to passing converters to `read_kwargs`).
Catalogs might also have such columns configured in their ``iterable_columns`` field.
storage_options : dict, optional
Parameters passed to the backend file-system such as Google Cloud Storage,
Amazon Web Service S3.
Expand Down
55 changes: 55 additions & 0 deletions tests/sample-catalogs/multi-variable-hardcoded-catalog.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"esmcat_version": "0.1.0",
"id": "sample-multi-variable-cesm1-lens",
"description": "This is a sample ESM catalog emulating multi variable/history files for CESM1-LENS",
"catalog_file": "multi-variable-catalog.csv",
"iterable_columns": ["variable"],
"attributes": [
{
"column_name": "experiment",
"vocabulary": ""
},
{
"column_name": "case",
"vocabulary": ""
},
{
"column_name": "component",
"vocabulary": ""
},
{
"column_name": "stream",
"vocabulary": ""
},
{ "column_name": "variable", "vocabulary": "" },
{
"column_name": "member_id",
"vocabulary": ""
}
],
"assets": {
"column_name": "path",
"format": "netcdf"
},

"aggregation_control": {
"variable_column_name": "variable",
"groupby_attrs": ["component", "experiment", "stream"],
"aggregations": [
{
"type": "join_new",
"attribute_name": "member_id",
"options": { "coords": "minimal", "compat": "override" }
},
{
"type": "join_existing",
"attribute_name": "time_range",
"options": { "dim": "time" }
},
{
"type": "union",
"attribute_name": "variable"
}
]
}
}
2 changes: 2 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
cdf_cat_sample_cmip6_noagg,
mixed_cat_sample_cmip6,
multi_variable_cat,
multi_variable_hard_cat,
opendap_cat_sample_noaa,
sample_df,
sample_esmcat_data,
Expand Down Expand Up @@ -158,6 +159,7 @@ def test_catalog_init_back_compat(capsys, obj, sep, read_kwargs, read_csv_kwargs
[
(multi_variable_cat, {'converters': {'variable': ast.literal_eval}}, None),
(multi_variable_cat, None, ['variable']),
(multi_variable_hard_cat, None, None),
],
)
def test_columns_with_iterables(capsys, obj, read_kwargs, columns_with_iterables):
Expand Down
3 changes: 3 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
zarr_cat_pangeo_cmip6 = 'https://storage.googleapis.com/cmip6/pangeo-cmip6.json'
cdf_cat_sample_cmip6 = os.path.join(here, 'sample-catalogs/cmip6-netcdf.json')
multi_variable_cat = os.path.join(here, 'sample-catalogs/multi-variable-catalog.json')
multi_variable_hard_cat = os.path.join(
here, 'sample-catalogs/multi-variable-hardcoded-catalog.json'
)
cdf_cat_sample_cmip5 = os.path.join(here, 'sample-catalogs/cmip5-netcdf.json')
cdf_cat_sample_cmip5_pq = os.path.join(here, 'sample-catalogs/cmip5-netcdf-parquet.json')
cdf_cat_sample_cesmle = os.path.join(here, 'sample-catalogs/cesm1-lens-netcdf.json')
Expand Down
Loading