pydata · max-sixty · Sep 10, 2024 · Sep 7, 2024 · Sep 7, 2024 · Sep 7, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,7 @@
 # https://pre-commit.com/
 ci:
     autoupdate_schedule: monthly
+    autoupdate_commit_msg: 'Update pre-commit hooks'
 exclude: 'xarray/datatree_.*'
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks

diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py
@@ -724,7 +724,7 @@ class PerformanceBackend(xr.backends.BackendEntrypoint):
             def open_dataset(
                 self,
                 filename_or_obj: str | os.PathLike | None,
-                drop_variables: tuple[str] = None,
+                drop_variables: tuple[str, ...] = None,
                 *,
                 mask_and_scale=True,
                 decode_times=True,

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -174,7 +174,7 @@ def setup(self, use_cftime, use_flox):
         # GH9426 - deep-copying CFTime object arrays is weirdly slow
         asda = xr.DataArray(time)
         labeled_time = []
-        for year, month in zip(asda.dt.year, asda.dt.month):
+        for year, month in zip(asda.dt.year, asda.dt.month, strict=True):
             labeled_time.append(cftime.datetime(year, month, 1))
 
         self.da = xr.DataArray(

diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -64,7 +64,7 @@ def time_rolling_long(self, func, pandas, use_bottleneck):
     def time_rolling_np(self, window_, min_periods, use_bottleneck):
         with xr.set_options(use_bottleneck=use_bottleneck):
             self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce(
-                getattr(np, "nansum")
+                np.nansum
             ).load()
 
     @parameterized(

diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst
@@ -193,7 +193,7 @@ different type:
 
 .. ipython:: python
 
-    def sparse_random_arrays(shape: tuple[int]) -> sparse._coo.core.COO:
+    def sparse_random_arrays(shape: tuple[int, ...]) -> sparse._coo.core.COO:
         """Strategy which generates random sparse.COO arrays"""
         if shape is None:
             shape = npst.array_shapes()

diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py
@@ -80,7 +80,7 @@ def test_roundtrip_dataarray(data, arr) -> None:
             tuple
         )
     )
-    coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape)}
+    coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape, strict=True)}
     original = xr.DataArray(arr, dims=names, coords=coords)
     roundtripped = xr.DataArray(original.to_pandas())
     xr.testing.assert_identical(original, roundtripped)

diff --git a/pyproject.toml b/pyproject.toml
@@ -84,14 +84,13 @@ source = ["xarray"]
 exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"]
 
 [tool.mypy]
-enable_error_code = "redundant-self"
+enable_error_code = ["ignore-without-code", "redundant-self", "redundant-expr"]
 exclude = [
   'build',
   'xarray/util/generate_.*\.py',
   'xarray/datatree_/doc/.*\.py',
 ]
 files = "xarray"
-show_error_codes = true
 show_error_context = true
 warn_redundant_casts = true
 warn_unused_configs = true
@@ -240,7 +239,6 @@ extend-exclude = [
   "doc",
   "_typed_ops.pyi",
 ]
-target-version = "py310"
 
 [tool.ruff.lint]
 # E402: module level import not at top of file
@@ -249,13 +247,13 @@ target-version = "py310"
 extend-safe-fixes = [
   "TID252", # absolute imports
 ]
-ignore = [
+extend-ignore = [
   "E402",
   "E501",
   "E731",
   "UP007",
 ]
-select = [
+extend-select = [
   "F", # Pyflakes
   "E", # Pycodestyle
   "W",

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -51,7 +51,7 @@
     try:
         from dask.delayed import Delayed
     except ImportError:
-        Delayed = None  # type: ignore
+        Delayed = None  # type: ignore[assignment, misc]
     from io import BufferedIOBase
 
     from xarray.backends.common import BackendEntrypoint
@@ -1113,7 +1113,7 @@ def open_mfdataset(
             list(combined_ids_paths.keys()),
             list(combined_ids_paths.values()),
         )
-    elif combine == "by_coords" and concat_dim is not None:
+    elif concat_dim is not None:
         raise ValueError(
             "When combine='by_coords', passing a value for `concat_dim` has no "
             "effect. To manually combine along a specific dimension you should "
@@ -1432,7 +1432,7 @@ def to_netcdf(
             store.sync()
             return target.getvalue()
     finally:
-        if not multifile and compute:
+        if not multifile and compute:  # type: ignore[redundant-expr]
             store.close()
 
     if not compute:
@@ -1585,8 +1585,9 @@ def save_mfdataset(
                 multifile=True,
                 **kwargs,
             )
-            for ds, path, group in zip(datasets, paths, groups)
-        ]
+            for ds, path, group in zip(datasets, paths, groups, strict=True)
+        ],
+        strict=True,
     )
 
     try:
@@ -1600,7 +1601,10 @@ def save_mfdataset(
         import dask
 
         return dask.delayed(
-            [dask.delayed(_finalize_store)(w, s) for w, s in zip(writes, stores)]
+            [
+                dask.delayed(_finalize_store)(w, s)
+                for w, s in zip(writes, stores, strict=True)
+            ]
         )
 
 

diff --git a/xarray/backends/common.py b/xarray/backends/common.py
@@ -431,7 +431,7 @@ def set_dimensions(self, variables, unlimited_dims=None):
         for v in unlimited_dims:  # put unlimited_dims first
             dims[v] = None
         for v in variables.values():
-            dims.update(dict(zip(v.dims, v.shape)))
+            dims.update(dict(zip(v.dims, v.shape, strict=True)))
 
         for dim, length in dims.items():
             if dim in existing_dims and length != existing_dims[dim]:

diff --git a/xarray/backends/file_manager.py b/xarray/backends/file_manager.py
@@ -276,7 +276,7 @@ def __getstate__(self):
     def __setstate__(self, state) -> None:
         """Restore from a pickle."""
         opener, args, mode, kwargs, lock, manager_id = state
-        self.__init__(  # type: ignore
+        self.__init__(  # type: ignore[misc]
             opener, *args, mode=mode, kwargs=kwargs, lock=lock, manager_id=manager_id
         )
 

diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
@@ -208,7 +208,9 @@ def open_store_variable(self, name, var):
             "shuffle": var.shuffle,
         }
         if var.chunks:
-            encoding["preferred_chunks"] = dict(zip(var.dimensions, var.chunks))
+            encoding["preferred_chunks"] = dict(
+                zip(var.dimensions, var.chunks, strict=True)
+            )
         # Convert h5py-style compression options to NetCDF4-Python
         # style, if possible
         if var.compression == "gzip":

diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py
@@ -278,7 +278,9 @@ def _extract_nc4_variable_encoding(
         chunksizes = encoding["chunksizes"]
         chunks_too_big = any(
             c > d and dim not in unlimited_dims
-            for c, d, dim in zip(chunksizes, variable.shape, variable.dims)
+            for c, d, dim in zip(
+                chunksizes, variable.shape, variable.dims, strict=False
+            )
         )
         has_original_shape = "original_shape" in encoding
         changed_shape = (
@@ -446,7 +448,9 @@ def open_store_variable(self, name: str, var):
             else:
                 encoding["contiguous"] = False
                 encoding["chunksizes"] = tuple(chunking)
-                encoding["preferred_chunks"] = dict(zip(var.dimensions, chunking))
+                encoding["preferred_chunks"] = dict(
+                    zip(var.dimensions, chunking, strict=True)
+                )
         # TODO: figure out how to round-trip "endian-ness" without raising
         # warnings from netCDF4
         # encoding['endian'] = var.endian()

diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py
@@ -199,7 +199,7 @@ def get_backend(engine: str | type[BackendEntrypoint]) -> BackendEntrypoint:
                 "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html"
             )
         backend = engines[engine]
-    elif isinstance(engine, type) and issubclass(engine, BackendEntrypoint):
+    elif issubclass(engine, BackendEntrypoint):
         backend = engine()
     else:
         raise TypeError(

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -186,7 +186,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
     # TODO: incorporate synchronizer to allow writes from multiple dask
     # threads
     if var_chunks and enc_chunks_tuple:
-        for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks):
+        for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks, strict=True):
             for dchunk in dchunks[:-1]:
                 if dchunk % zchunk:
                     base_error = (
@@ -548,13 +548,13 @@ def open_store_variable(self, name, zarr_array=None):
 
         encoding = {
             "chunks": zarr_array.chunks,
-            "preferred_chunks": dict(zip(dimensions, zarr_array.chunks)),
+            "preferred_chunks": dict(zip(dimensions, zarr_array.chunks, strict=True)),
             "compressor": zarr_array.compressor,
             "filters": zarr_array.filters,
         }
         # _FillValue needs to be in attributes, not encoding, so it will get
         # picked up by decode_cf
-        if getattr(zarr_array, "fill_value") is not None:
+        if zarr_array.fill_value is not None:
             attributes["_FillValue"] = zarr_array.fill_value
 
         return Variable(dimensions, data, attributes, encoding)
@@ -576,7 +576,7 @@ def get_dimensions(self):
         dimensions = {}
         for k, v in self.zarr_group.arrays():
             dim_names, _ = _get_zarr_dims_and_attrs(v, DIMENSION_KEY, try_nczarr)
-            for d, s in zip(dim_names, v.shape):
+            for d, s in zip(dim_names, v.shape, strict=True):
                 if d in dimensions and dimensions[d] != s:
                     raise ValueError(
                         f"found conflicting lengths for dimension {d} "

diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py
@@ -207,7 +207,7 @@ def convert_calendar(
                 _convert_to_new_calendar_with_new_day_of_year(
                     date, newdoy, calendar, use_cftime
                 )
-                for date, newdoy in zip(time.variable._data.array, new_doy)
+                for date, newdoy in zip(time.variable._data.array, new_doy, strict=True)
             ],
             dims=(dim,),
             name=dim,

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -204,7 +204,7 @@ def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]:
 
 
 def _decode_cf_datetime_dtype(
-    data, units: str, calendar: str, use_cftime: bool | None
+    data, units: str, calendar: str | None, use_cftime: bool | None
 ) -> np.dtype:
     # Verify that at least the first and last date can be decoded
     # successfully. Otherwise, tracebacks end up swallowed by
@@ -704,7 +704,7 @@ def _cast_to_dtype_if_safe(num: np.ndarray, dtype: np.dtype) -> np.ndarray:
 
 
 def encode_cf_datetime(
-    dates: T_DuckArray,  # type: ignore
+    dates: T_DuckArray,  # type: ignore[misc]
     units: str | None = None,
     calendar: str | None = None,
     dtype: np.dtype | None = None,
@@ -726,7 +726,7 @@ def encode_cf_datetime(
 
 
 def _eagerly_encode_cf_datetime(
-    dates: T_DuckArray,  # type: ignore
+    dates: T_DuckArray,  # type: ignore[misc]
     units: str | None = None,
     calendar: str | None = None,
     dtype: np.dtype | None = None,
@@ -809,7 +809,7 @@ def _eagerly_encode_cf_datetime(
 
 
 def _encode_cf_datetime_within_map_blocks(
-    dates: T_DuckArray,  # type: ignore
+    dates: T_DuckArray,  # type: ignore[misc]
     units: str,
     calendar: str,
     dtype: np.dtype,
@@ -859,7 +859,7 @@ def _lazily_encode_cf_datetime(
 
 
 def encode_cf_timedelta(
-    timedeltas: T_DuckArray,  # type: ignore
+    timedeltas: T_DuckArray,  # type: ignore[misc]
     units: str | None = None,
     dtype: np.dtype | None = None,
 ) -> tuple[T_DuckArray, str]:
@@ -871,7 +871,7 @@ def encode_cf_timedelta(
 
 
 def _eagerly_encode_cf_timedelta(
-    timedeltas: T_DuckArray,  # type: ignore
+    timedeltas: T_DuckArray,  # type: ignore[misc]
     units: str | None = None,
     dtype: np.dtype | None = None,
     allow_units_modification: bool = True,
@@ -923,7 +923,7 @@ def _eagerly_encode_cf_timedelta(
 
 
 def _encode_cf_timedelta_within_map_blocks(
-    timedeltas: T_DuckArray,  # type:ignore
+    timedeltas: T_DuckArray,  # type: ignore[misc]
     units: str,
     dtype: np.dtype,
 ) -> T_DuckArray:

diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
@@ -405,14 +405,15 @@ def align_indexes(self) -> None:
                     zip(
                         [joined_index] + matching_indexes,
                         [joined_index_vars] + matching_index_vars,
+                        strict=True,
                     )
                 )
                 need_reindex = self._need_reindex(dims, cmp_indexes)
             else:
                 if len(matching_indexes) > 1:
                     need_reindex = self._need_reindex(
                         dims,
-                        list(zip(matching_indexes, matching_index_vars)),
+                        list(zip(matching_indexes, matching_index_vars, strict=True)),
                     )
                 else:
                     need_reindex = False
@@ -557,7 +558,7 @@ def reindex_all(self) -> None:
         self.results = tuple(
             self._reindex_one(obj, matching_indexes)
             for obj, matching_indexes in zip(
-                self.objects, self.objects_matching_indexes
+                self.objects, self.objects_matching_indexes, strict=True
             )
         )
 
@@ -952,7 +953,7 @@ def is_alignable(obj):
         fill_value=fill_value,
     )
 
-    for position, key, aligned_obj in zip(positions, keys, aligned):
+    for position, key, aligned_obj in zip(positions, keys, aligned, strict=True):
         if key is no_key:
             out[position] = aligned_obj
         else:

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -139,7 +139,8 @@ def _infer_concat_order_from_coords(datasets):
                 # Append positions along extra dimension to structure which
                 # encodes the multi-dimensional concatenation order
                 tile_ids = [
-                    tile_id + (position,) for tile_id, position in zip(tile_ids, order)
+                    tile_id + (position,)
+                    for tile_id, position in zip(tile_ids, order, strict=True)
                 ]
 
     if len(datasets) > 1 and not concat_dims:
@@ -148,7 +149,7 @@ def _infer_concat_order_from_coords(datasets):
             "order the datasets for concatenation"
         )
 
-    combined_ids = dict(zip(tile_ids, datasets))
+    combined_ids = dict(zip(tile_ids, datasets, strict=True))
 
     return combined_ids, concat_dims
 
@@ -349,7 +350,7 @@ def _nested_combine(
         combined_ids = _infer_concat_order_from_positions(datasets)
     else:
         # Already sorted so just use the ids already passed
-        combined_ids = dict(zip(ids, datasets))
+        combined_ids = dict(zip(ids, datasets, strict=True))
 
     # Check that the inferred shape is combinable
     _check_shape_tile_ids(combined_ids)

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -254,7 +254,7 @@ def sizes(self: Any) -> Mapping[Hashable, int]:
         --------
         Dataset.sizes
         """
-        return Frozen(dict(zip(self.dims, self.shape)))
+        return Frozen(dict(zip(self.dims, self.shape, strict=True)))
 
 
 class AttrAccessMixin: