intake · aulemahal · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025
@@ -2,6 +2,12 @@
 
 [Full Changelog](https://github.com/intake/intake-esm/compare/v2025.2.3...v2025.7.9)
 
+## vUNRELEASED
+
+### New features added
+
+- New `iterable_columns` field in the ESM catalog spec to specify which columns should be read as iterables (tuples). Deprecates argument `columns_with_iterable` of the `esm_datastore` by @aulemahal in https://github.com/intake/intake-esm/pull/752
+
 ## v2025.7.9
 
 ### New features added

@@ -67,6 +67,7 @@ They should be either [URIs](https://en.wikipedia.org/wiki/Uniform_Resource_Iden
 | description         | string                                                    | **REQUIRED.** Detailed multi-line description to fully explain the catalog. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. |
 | catalog_file        | string                                                    | **REQUIRED.** Path to a the CSV file with the catalog contents.                                                                                                        |
 | catalog_dict        | array                                                     | If specified, it is mutually exclusive with `catalog_file`. An array of dictionaries that represents the data that would otherwise be in the csv.                      |
+| iterable_columns    | array                                                     | A list of columns names to that contain iterable values instead of scalar ones.                                                                                        |
 | attributes          | [[Attribute Object](#attribute-object)]                   | **REQUIRED.** A list of attribute columns in the data set.                                                                                                             |
 | assets              | [Assets Object](#assets-object)                           | **REQUIRED.** Description of how the assets (data files) are referenced in the CSV catalog file.                                                                       |
 | aggregation_control | [Aggregation Control Object](#aggregation-control-object) | **OPTIONAL.** Description of how to support aggregation of multiple assets into a single xarray data set.                                                              |

@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import ast
 import builtins
 import datetime
 import enum
@@ -115,6 +116,7 @@ class ESMCatalogModel(pydantic.BaseModel):
     id: str = ''
     catalog_dict: list[dict] | None = None
     catalog_file: pydantic.StrictStr | None = None
+    iterable_columns: set[pydantic.StrictStr] | None = None
     description: pydantic.StrictStr | None = None
     title: pydantic.StrictStr | None = None
     last_updated: datetime.datetime | datetime.date | None = None
@@ -320,6 +322,15 @@ def _df_from_file(
             csv_path = f'{os.path.dirname(_mapper.root)}/{cat.catalog_file}'
         cat.catalog_file = csv_path
 
+        if self.iterable_columns:
+            converter = ast.literal_eval
+            read_kwargs.setdefault('converters', {})
+            for col in self.iterable_columns:
+                if read_kwargs['converters'].setdefault(col, converter) != converter:
+                    raise ValueError(
+                        f"Cannot provide converter for '{col}' via `read_kwargs` when '{col}' is also specified in `iterable_columns`"
+                    )
+
         reader = CatalogFileDataReader(cat.catalog_file, storage_options, **read_kwargs)
         self._iterable_dtype_map = reader.dtype_map
         return reader.frames

@@ -59,6 +59,7 @@ class esm_datastore(Catalog):
         A list of columns in the csv file containing iterables. Values in columns specified here will be
         converted with `ast.literal_eval` when :py:func:`~pandas.read_csv` is called (i.e., this is a
         shortcut to passing converters to `read_kwargs`).
+        Catalogs might also have such columns configured in their ``iterable_columns`` field.
     storage_options : dict, optional
         Parameters passed to the backend file-system such as Google Cloud Storage,
         Amazon Web Service S3.

@@ -0,0 +1,55 @@
+{
+  "esmcat_version": "0.1.0",
+  "id": "sample-multi-variable-cesm1-lens",
+  "description": "This is a sample ESM catalog emulating multi variable/history files for CESM1-LENS",
+  "catalog_file": "multi-variable-catalog.csv",
+  "iterable_columns": ["variable"],
+  "attributes": [
+    {
+      "column_name": "experiment",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "case",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "component",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "stream",
+      "vocabulary": ""
+    },
+    { "column_name": "variable", "vocabulary": "" },
+    {
+      "column_name": "member_id",
+      "vocabulary": ""
+    }
+  ],
+  "assets": {
+    "column_name": "path",
+    "format": "netcdf"
+  },
+
+  "aggregation_control": {
+    "variable_column_name": "variable",
+    "groupby_attrs": ["component", "experiment", "stream"],
+    "aggregations": [
+      {
+        "type": "join_new",
+        "attribute_name": "member_id",
+        "options": { "coords": "minimal", "compat": "override" }
+      },
+      {
+        "type": "join_existing",
+        "attribute_name": "time_range",
+        "options": { "dim": "time" }
+      },
+      {
+        "type": "union",
+        "attribute_name": "variable"
+      }
+    ]
+  }
+}
@@ -28,6 +28,7 @@
     cdf_cat_sample_cmip6_noagg,
     mixed_cat_sample_cmip6,
     multi_variable_cat,
+    multi_variable_hard_cat,
     opendap_cat_sample_noaa,
     sample_df,
     sample_esmcat_data,
@@ -158,6 +159,7 @@ def test_catalog_init_back_compat(capsys, obj, sep, read_kwargs, read_csv_kwargs
     [
         (multi_variable_cat, {'converters': {'variable': ast.literal_eval}}, None),
         (multi_variable_cat, None, ['variable']),
+        (multi_variable_hard_cat, None, None),
     ],
 )
 def test_columns_with_iterables(capsys, obj, read_kwargs, columns_with_iterables):

@@ -7,6 +7,9 @@
 zarr_cat_pangeo_cmip6 = 'https://storage.googleapis.com/cmip6/pangeo-cmip6.json'
 cdf_cat_sample_cmip6 = os.path.join(here, 'sample-catalogs/cmip6-netcdf.json')
 multi_variable_cat = os.path.join(here, 'sample-catalogs/multi-variable-catalog.json')
+multi_variable_hard_cat = os.path.join(
+    here, 'sample-catalogs/multi-variable-hardcoded-catalog.json'
+)
 cdf_cat_sample_cmip5 = os.path.join(here, 'sample-catalogs/cmip5-netcdf.json')
 cdf_cat_sample_cmip5_pq = os.path.join(here, 'sample-catalogs/cmip5-netcdf-parquet.json')
 cdf_cat_sample_cesmle = os.path.join(here, 'sample-catalogs/cesm1-lens-netcdf.json')