Skip to content

Commit

Permalink
Repo checker (pydata#9450)
Browse files Browse the repository at this point in the history
* Remove default mypy option

* Implement mypy ignore-without-code option

* Enable mypy redundant-expr

* Fix erroneous tuple types

* Remove ruff target-version, redundant with project

* Use extend selections for ruff

* Fix B009 and B010 with ruff

* Fix test parametrization

* Fix FutureWarning

* Make zips strict unless it is causing errors

In which case set them to explicit False

* Add a commit message for pre-commit autoupdate
  • Loading branch information
Armavica authored and hollymandel committed Sep 23, 2024
1 parent a52f861 commit 9c02047
Show file tree
Hide file tree
Showing 78 changed files with 421 additions and 313 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# https://pre-commit.com/
ci:
autoupdate_schedule: monthly
autoupdate_commit_msg: 'Update pre-commit hooks'
exclude: 'xarray/datatree_.*'
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/dataset_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ class PerformanceBackend(xr.backends.BackendEntrypoint):
def open_dataset(
self,
filename_or_obj: str | os.PathLike | None,
drop_variables: tuple[str] = None,
drop_variables: tuple[str, ...] = None,
*,
mask_and_scale=True,
decode_times=True,
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def setup(self, use_cftime, use_flox):
# GH9426 - deep-copying CFTime object arrays is weirdly slow
asda = xr.DataArray(time)
labeled_time = []
for year, month in zip(asda.dt.year, asda.dt.month):
for year, month in zip(asda.dt.year, asda.dt.month, strict=True):
labeled_time.append(cftime.datetime(year, month, 1))

self.da = xr.DataArray(
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def time_rolling_long(self, func, pandas, use_bottleneck):
def time_rolling_np(self, window_, min_periods, use_bottleneck):
with xr.set_options(use_bottleneck=use_bottleneck):
self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce(
getattr(np, "nansum")
np.nansum
).load()

@parameterized(
Expand Down
2 changes: 1 addition & 1 deletion doc/user-guide/testing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ different type:

.. ipython:: python
def sparse_random_arrays(shape: tuple[int]) -> sparse._coo.core.COO:
def sparse_random_arrays(shape: tuple[int, ...]) -> sparse._coo.core.COO:
"""Strategy which generates random sparse.COO arrays"""
if shape is None:
shape = npst.array_shapes()
Expand Down
2 changes: 1 addition & 1 deletion properties/test_pandas_roundtrip.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def test_roundtrip_dataarray(data, arr) -> None:
tuple
)
)
coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape)}
coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape, strict=True)}
original = xr.DataArray(arr, dims=names, coords=coords)
roundtripped = xr.DataArray(original.to_pandas())
xr.testing.assert_identical(original, roundtripped)
Expand Down
8 changes: 3 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,13 @@ source = ["xarray"]
exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"]

[tool.mypy]
enable_error_code = "redundant-self"
enable_error_code = ["ignore-without-code", "redundant-self", "redundant-expr"]
exclude = [
'build',
'xarray/util/generate_.*\.py',
'xarray/datatree_/doc/.*\.py',
]
files = "xarray"
show_error_codes = true
show_error_context = true
warn_redundant_casts = true
warn_unused_configs = true
Expand Down Expand Up @@ -240,7 +239,6 @@ extend-exclude = [
"doc",
"_typed_ops.pyi",
]
target-version = "py310"

[tool.ruff.lint]
# E402: module level import not at top of file
Expand All @@ -249,13 +247,13 @@ target-version = "py310"
extend-safe-fixes = [
"TID252", # absolute imports
]
ignore = [
extend-ignore = [
"E402",
"E501",
"E731",
"UP007",
]
select = [
extend-select = [
"F", # Pyflakes
"E", # Pycodestyle
"W",
Expand Down
16 changes: 10 additions & 6 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
try:
from dask.delayed import Delayed
except ImportError:
Delayed = None # type: ignore
Delayed = None # type: ignore[assignment, misc]
from io import BufferedIOBase

from xarray.backends.common import BackendEntrypoint
Expand Down Expand Up @@ -1113,7 +1113,7 @@ def open_mfdataset(
list(combined_ids_paths.keys()),
list(combined_ids_paths.values()),
)
elif combine == "by_coords" and concat_dim is not None:
elif concat_dim is not None:
raise ValueError(
"When combine='by_coords', passing a value for `concat_dim` has no "
"effect. To manually combine along a specific dimension you should "
Expand Down Expand Up @@ -1432,7 +1432,7 @@ def to_netcdf(
store.sync()
return target.getvalue()
finally:
if not multifile and compute:
if not multifile and compute: # type: ignore[redundant-expr]
store.close()

if not compute:
Expand Down Expand Up @@ -1585,8 +1585,9 @@ def save_mfdataset(
multifile=True,
**kwargs,
)
for ds, path, group in zip(datasets, paths, groups)
]
for ds, path, group in zip(datasets, paths, groups, strict=True)
],
strict=True,
)

try:
Expand All @@ -1600,7 +1601,10 @@ def save_mfdataset(
import dask

return dask.delayed(
[dask.delayed(_finalize_store)(w, s) for w, s in zip(writes, stores)]
[
dask.delayed(_finalize_store)(w, s)
for w, s in zip(writes, stores, strict=True)
]
)


Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def set_dimensions(self, variables, unlimited_dims=None):
for v in unlimited_dims: # put unlimited_dims first
dims[v] = None
for v in variables.values():
dims.update(dict(zip(v.dims, v.shape)))
dims.update(dict(zip(v.dims, v.shape, strict=True)))

for dim, length in dims.items():
if dim in existing_dims and length != existing_dims[dim]:
Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/file_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def __getstate__(self):
def __setstate__(self, state) -> None:
"""Restore from a pickle."""
opener, args, mode, kwargs, lock, manager_id = state
self.__init__( # type: ignore
self.__init__( # type: ignore[misc]
opener, *args, mode=mode, kwargs=kwargs, lock=lock, manager_id=manager_id
)

Expand Down
4 changes: 3 additions & 1 deletion xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,9 @@ def open_store_variable(self, name, var):
"shuffle": var.shuffle,
}
if var.chunks:
encoding["preferred_chunks"] = dict(zip(var.dimensions, var.chunks))
encoding["preferred_chunks"] = dict(
zip(var.dimensions, var.chunks, strict=True)
)
# Convert h5py-style compression options to NetCDF4-Python
# style, if possible
if var.compression == "gzip":
Expand Down
8 changes: 6 additions & 2 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,9 @@ def _extract_nc4_variable_encoding(
chunksizes = encoding["chunksizes"]
chunks_too_big = any(
c > d and dim not in unlimited_dims
for c, d, dim in zip(chunksizes, variable.shape, variable.dims)
for c, d, dim in zip(
chunksizes, variable.shape, variable.dims, strict=False
)
)
has_original_shape = "original_shape" in encoding
changed_shape = (
Expand Down Expand Up @@ -446,7 +448,9 @@ def open_store_variable(self, name: str, var):
else:
encoding["contiguous"] = False
encoding["chunksizes"] = tuple(chunking)
encoding["preferred_chunks"] = dict(zip(var.dimensions, chunking))
encoding["preferred_chunks"] = dict(
zip(var.dimensions, chunking, strict=True)
)
# TODO: figure out how to round-trip "endian-ness" without raising
# warnings from netCDF4
# encoding['endian'] = var.endian()
Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def get_backend(engine: str | type[BackendEntrypoint]) -> BackendEntrypoint:
"https://docs.xarray.dev/en/stable/getting-started-guide/installing.html"
)
backend = engines[engine]
elif isinstance(engine, type) and issubclass(engine, BackendEntrypoint):
elif issubclass(engine, BackendEntrypoint):
backend = engine()
else:
raise TypeError(
Expand Down
8 changes: 4 additions & 4 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
# TODO: incorporate synchronizer to allow writes from multiple dask
# threads
if var_chunks and enc_chunks_tuple:
for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks):
for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks, strict=True):
for dchunk in dchunks[:-1]:
if dchunk % zchunk:
base_error = (
Expand Down Expand Up @@ -548,13 +548,13 @@ def open_store_variable(self, name, zarr_array=None):

encoding = {
"chunks": zarr_array.chunks,
"preferred_chunks": dict(zip(dimensions, zarr_array.chunks)),
"preferred_chunks": dict(zip(dimensions, zarr_array.chunks, strict=True)),
"compressor": zarr_array.compressor,
"filters": zarr_array.filters,
}
# _FillValue needs to be in attributes, not encoding, so it will get
# picked up by decode_cf
if getattr(zarr_array, "fill_value") is not None:
if zarr_array.fill_value is not None:
attributes["_FillValue"] = zarr_array.fill_value

return Variable(dimensions, data, attributes, encoding)
Expand All @@ -576,7 +576,7 @@ def get_dimensions(self):
dimensions = {}
for k, v in self.zarr_group.arrays():
dim_names, _ = _get_zarr_dims_and_attrs(v, DIMENSION_KEY, try_nczarr)
for d, s in zip(dim_names, v.shape):
for d, s in zip(dim_names, v.shape, strict=True):
if d in dimensions and dimensions[d] != s:
raise ValueError(
f"found conflicting lengths for dimension {d} "
Expand Down
2 changes: 1 addition & 1 deletion xarray/coding/calendar_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def convert_calendar(
_convert_to_new_calendar_with_new_day_of_year(
date, newdoy, calendar, use_cftime
)
for date, newdoy in zip(time.variable._data.array, new_doy)
for date, newdoy in zip(time.variable._data.array, new_doy, strict=True)
],
dims=(dim,),
name=dim,
Expand Down
14 changes: 7 additions & 7 deletions xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]:


def _decode_cf_datetime_dtype(
data, units: str, calendar: str, use_cftime: bool | None
data, units: str, calendar: str | None, use_cftime: bool | None
) -> np.dtype:
# Verify that at least the first and last date can be decoded
# successfully. Otherwise, tracebacks end up swallowed by
Expand Down Expand Up @@ -704,7 +704,7 @@ def _cast_to_dtype_if_safe(num: np.ndarray, dtype: np.dtype) -> np.ndarray:


def encode_cf_datetime(
dates: T_DuckArray, # type: ignore
dates: T_DuckArray, # type: ignore[misc]
units: str | None = None,
calendar: str | None = None,
dtype: np.dtype | None = None,
Expand All @@ -726,7 +726,7 @@ def encode_cf_datetime(


def _eagerly_encode_cf_datetime(
dates: T_DuckArray, # type: ignore
dates: T_DuckArray, # type: ignore[misc]
units: str | None = None,
calendar: str | None = None,
dtype: np.dtype | None = None,
Expand Down Expand Up @@ -809,7 +809,7 @@ def _eagerly_encode_cf_datetime(


def _encode_cf_datetime_within_map_blocks(
dates: T_DuckArray, # type: ignore
dates: T_DuckArray, # type: ignore[misc]
units: str,
calendar: str,
dtype: np.dtype,
Expand Down Expand Up @@ -859,7 +859,7 @@ def _lazily_encode_cf_datetime(


def encode_cf_timedelta(
timedeltas: T_DuckArray, # type: ignore
timedeltas: T_DuckArray, # type: ignore[misc]
units: str | None = None,
dtype: np.dtype | None = None,
) -> tuple[T_DuckArray, str]:
Expand All @@ -871,7 +871,7 @@ def encode_cf_timedelta(


def _eagerly_encode_cf_timedelta(
timedeltas: T_DuckArray, # type: ignore
timedeltas: T_DuckArray, # type: ignore[misc]
units: str | None = None,
dtype: np.dtype | None = None,
allow_units_modification: bool = True,
Expand Down Expand Up @@ -923,7 +923,7 @@ def _eagerly_encode_cf_timedelta(


def _encode_cf_timedelta_within_map_blocks(
timedeltas: T_DuckArray, # type:ignore
timedeltas: T_DuckArray, # type: ignore[misc]
units: str,
dtype: np.dtype,
) -> T_DuckArray:
Expand Down
7 changes: 4 additions & 3 deletions xarray/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,14 +405,15 @@ def align_indexes(self) -> None:
zip(
[joined_index] + matching_indexes,
[joined_index_vars] + matching_index_vars,
strict=True,
)
)
need_reindex = self._need_reindex(dims, cmp_indexes)
else:
if len(matching_indexes) > 1:
need_reindex = self._need_reindex(
dims,
list(zip(matching_indexes, matching_index_vars)),
list(zip(matching_indexes, matching_index_vars, strict=True)),
)
else:
need_reindex = False
Expand Down Expand Up @@ -557,7 +558,7 @@ def reindex_all(self) -> None:
self.results = tuple(
self._reindex_one(obj, matching_indexes)
for obj, matching_indexes in zip(
self.objects, self.objects_matching_indexes
self.objects, self.objects_matching_indexes, strict=True
)
)

Expand Down Expand Up @@ -952,7 +953,7 @@ def is_alignable(obj):
fill_value=fill_value,
)

for position, key, aligned_obj in zip(positions, keys, aligned):
for position, key, aligned_obj in zip(positions, keys, aligned, strict=True):
if key is no_key:
out[position] = aligned_obj
else:
Expand Down
7 changes: 4 additions & 3 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ def _infer_concat_order_from_coords(datasets):
# Append positions along extra dimension to structure which
# encodes the multi-dimensional concatenation order
tile_ids = [
tile_id + (position,) for tile_id, position in zip(tile_ids, order)
tile_id + (position,)
for tile_id, position in zip(tile_ids, order, strict=True)
]

if len(datasets) > 1 and not concat_dims:
Expand All @@ -148,7 +149,7 @@ def _infer_concat_order_from_coords(datasets):
"order the datasets for concatenation"
)

combined_ids = dict(zip(tile_ids, datasets))
combined_ids = dict(zip(tile_ids, datasets, strict=True))

return combined_ids, concat_dims

Expand Down Expand Up @@ -349,7 +350,7 @@ def _nested_combine(
combined_ids = _infer_concat_order_from_positions(datasets)
else:
# Already sorted so just use the ids already passed
combined_ids = dict(zip(ids, datasets))
combined_ids = dict(zip(ids, datasets, strict=True))

# Check that the inferred shape is combinable
_check_shape_tile_ids(combined_ids)
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def sizes(self: Any) -> Mapping[Hashable, int]:
--------
Dataset.sizes
"""
return Frozen(dict(zip(self.dims, self.shape)))
return Frozen(dict(zip(self.dims, self.shape, strict=True)))


class AttrAccessMixin:
Expand Down
Loading

0 comments on commit 9c02047

Please sign in to comment.