Skip to content

Commit 4d8bbee

Browse files
authored
Add typos check to pre-commit hooks (#10040)
* Add typos check to pre-commit hooks Also fixes a bunch of typos. The work here was adding the exclusions (I had an LLM do them but also checked them)
1 parent 2a8340d commit 4d8bbee

14 files changed

+93
-40
lines changed

.pre-commit-config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,9 @@ repos:
7373
hooks:
7474
- id: validate-pyproject
7575
additional_dependencies: ["validate-pyproject-schema-store[all]"]
76+
- repo: https://github.com/crate-ci/typos
77+
rev: dictgen-v0.3.1
78+
hooks:
79+
- id: typos
80+
# https://github.com/crate-ci/typos/issues/347
81+
pass_filenames: false

doc/whats-new.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class can be passed through the ``decode_times`` keyword argument (see also
111111
coder = xr.coders.CFDatetimeCoder(time_unit="s")
112112
ds = xr.open_dataset(filename, decode_times=coder)
113113
114-
Similar control of the resoution of decoded timedeltas can be achieved through
114+
Similar control of the resolution of decoded timedeltas can be achieved through
115115
passing a :py:class:`coders.CFTimedeltaCoder` instance to the
116116
``decode_timedelta`` keyword argument:
117117

pyproject.toml

Lines changed: 67 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
[project]
2-
authors = [
3-
{ name = "xarray Developers", email = "xarray@googlegroups.com" },
4-
]
2+
authors = [{ name = "xarray Developers", email = "xarray@googlegroups.com" }]
53
classifiers = [
64
"Development Status :: 5 - Production/Stable",
75
"License :: OSI Approved :: Apache Software License",
@@ -22,11 +20,7 @@ name = "xarray"
2220
readme = "README.md"
2321
requires-python = ">=3.10"
2422

25-
dependencies = [
26-
"numpy>=1.24",
27-
"packaging>=23.2",
28-
"pandas>=2.1",
29-
]
23+
dependencies = ["numpy>=1.24", "packaging>=23.2", "pandas>=2.1"]
3024

3125
# We don't encode minimum requirements here (though if we can write a script to
3226
# generate the text from `min_deps_check.py`, that's welcome...). We do add
@@ -70,6 +64,7 @@ types = [
7064
"types-PyYAML",
7165
"types-Pygments",
7266
"types-colorama",
67+
"types-decorator",
7368
"types-defusedxml",
7469
"types-docutils",
7570
"types-networkx",
@@ -93,10 +88,7 @@ dask = "xarray.namedarray.daskmanager:DaskManager"
9388

9489
[build-system]
9590
build-backend = "setuptools.build_meta"
96-
requires = [
97-
"setuptools>=42",
98-
"setuptools-scm>=7",
99-
]
91+
requires = ["setuptools>=42", "setuptools-scm>=7"]
10092

10193
[tool.setuptools]
10294
packages = ["xarray"]
@@ -120,10 +112,7 @@ exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"]
120112

121113
[tool.mypy]
122114
enable_error_code = ["ignore-without-code", "redundant-self", "redundant-expr"]
123-
exclude = [
124-
'build',
125-
'xarray/util/generate_.*\.py',
126-
]
115+
exclude = ['build', 'xarray/util/generate_.*\.py']
127116
files = "xarray"
128117
show_error_context = true
129118
warn_redundant_casts = true
@@ -254,10 +243,7 @@ module = ["xarray.namedarray.*", "xarray.tests.test_namedarray"]
254243
# reportMissingTypeStubs = false
255244

256245
[tool.ruff]
257-
extend-exclude = [
258-
"doc",
259-
"_typed_ops.pyi",
260-
]
246+
extend-exclude = ["doc", "_typed_ops.pyi"]
261247

262248
[tool.ruff.lint]
263249
extend-select = [
@@ -383,3 +369,64 @@ test = "pytest"
383369
ignore = [
384370
"PP308", # This option creates a large amount of log lines.
385371
]
372+
373+
[tool.typos]
374+
375+
[tool.typos.default]
376+
extend-ignore-identifiers-re = [
377+
# Variable names
378+
"nd_.*",
379+
".*_nd",
380+
"ba_.*",
381+
".*_ba",
382+
"ser_.*",
383+
".*_ser",
384+
# Function/class names
385+
"NDArray.*",
386+
".*NDArray.*",
387+
]
388+
389+
[tool.typos.default.extend-words]
390+
# NumPy function names
391+
arange = "arange"
392+
393+
# Technical terms
394+
nd = "nd"
395+
nin = "nin"
396+
397+
# Variable names
398+
ba = "ba"
399+
ser = "ser"
400+
fo = "fo"
401+
iy = "iy"
402+
vart = "vart"
403+
ede = "ede"
404+
405+
# Organization/Institution names
406+
Stichting = "Stichting"
407+
Mathematisch = "Mathematisch"
408+
409+
# People's names
410+
Soler = "Soler"
411+
Bruning = "Bruning"
412+
Tung = "Tung"
413+
Claus = "Claus"
414+
Celles = "Celles"
415+
slowy = "slowy"
416+
Commun = "Commun"
417+
418+
# Tests
419+
Ome = "Ome"
420+
SUR = "SUR"
421+
Tio = "Tio"
422+
Ono = "Ono"
423+
abl = "abl"
424+
425+
# Technical terms
426+
splitted = "splitted"
427+
childs = "childs"
428+
cutted = "cutted"
429+
LOCA = "LOCA"
430+
431+
[tool.typos.type.jupyter]
432+
extend-ignore-re = ["\"id\": \".*\""]

xarray/core/alignment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,7 @@ def reindex(
979979
"""
980980

981981
# TODO: (benbovy - explicit indexes): uncomment?
982-
# --> from reindex docstrings: "any mis-matched dimension is simply ignored"
982+
# --> from reindex docstrings: "any mismatched dimension is simply ignored"
983983
# bad_keys = [k for k in indexers if k not in obj._indexes and k not in obj.dims]
984984
# if bad_keys:
985985
# raise ValueError(

xarray/core/dataarray.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1973,8 +1973,8 @@ def reindex_like(
19731973
names to pandas.Index objects, which provides coordinates upon
19741974
which to index the variables in this dataset. The indexes on this
19751975
other object need not be the same as the indexes on this
1976-
dataset. Any mis-matched index values will be filled in with
1977-
NaN, and any mis-matched dimension names will simply be ignored.
1976+
dataset. Any mismatched index values will be filled in with
1977+
NaN, and any mismatched dimension names will simply be ignored.
19781978
method : {None, "nearest", "pad", "ffill", "backfill", "bfill"}, optional
19791979
Method to use for filling index values from other not found on this
19801980
data array:
@@ -2155,8 +2155,8 @@ def reindex(
21552155
----------
21562156
indexers : dict, optional
21572157
Dictionary with keys given by dimension names and values given by
2158-
arrays of coordinates tick labels. Any mis-matched coordinate
2159-
values will be filled in with NaN, and any mis-matched dimension
2158+
arrays of coordinates tick labels. Any mismatched coordinate
2159+
values will be filled in with NaN, and any mismatched dimension
21602160
names will simply be ignored.
21612161
One of indexers or indexers_kwargs must be provided.
21622162
copy : bool, optional

xarray/core/dataset.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3364,8 +3364,8 @@ def reindex_like(
33643364
names to pandas.Index objects, which provides coordinates upon
33653365
which to index the variables in this dataset. The indexes on this
33663366
other object need not be the same as the indexes on this
3367-
dataset. Any mis-matched index values will be filled in with
3368-
NaN, and any mis-matched dimension names will simply be ignored.
3367+
dataset. Any mismatched index values will be filled in with
3368+
NaN, and any mismatched dimension names will simply be ignored.
33693369
method : {None, "nearest", "pad", "ffill", "backfill", "bfill", None}, optional
33703370
Method to use for filling index values from other not found in this
33713371
dataset:
@@ -3430,8 +3430,8 @@ def reindex(
34303430
----------
34313431
indexers : dict, optional
34323432
Dictionary with keys given by dimension names and values given by
3433-
arrays of coordinates tick labels. Any mis-matched coordinate
3434-
values will be filled in with NaN, and any mis-matched dimension
3433+
arrays of coordinates tick labels. Any mismatched coordinate
3434+
values will be filled in with NaN, and any mismatched dimension
34353435
names will simply be ignored.
34363436
One of indexers or indexers_kwargs must be provided.
34373437
method : {None, "nearest", "pad", "ffill", "backfill", "bfill", None}, optional

xarray/core/indexes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1464,7 +1464,7 @@ def sel(
14641464
if any(ds != dim_size0 for ds in dim_size):
14651465
raise ValueError(
14661466
"CoordinateTransformIndex only supports advanced (point-wise) indexing "
1467-
"with xarray.DataArray or xarray.Variable objects of macthing dimensions."
1467+
"with xarray.DataArray or xarray.Variable objects of matching dimensions."
14681468
)
14691469

14701470
coord_labels = {

xarray/testing/strategies.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ def unique_subset_of(
477477
)
478478

479479

480-
class CFTimeStategy(st.SearchStrategy):
480+
class CFTimeStrategy(st.SearchStrategy):
481481
def __init__(self, min_value, max_value):
482482
self.min_value = min_value
483483
self.max_value = max_value
@@ -506,5 +506,5 @@ def do_draw(self, data):
506506
daysinmonth = date_type(99999, 12, 1).daysinmonth
507507
min_value = date_type(-99999, 1, 1)
508508
max_value = date_type(99999, 12, daysinmonth, 23, 59, 59, 999999)
509-
strategy = CFTimeStategy(min_value, max_value)
509+
strategy = CFTimeStrategy(min_value, max_value)
510510
return strategy.do_draw(data)

xarray/tests/test_backends.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2576,7 +2576,7 @@ def test_chunk_encoding_with_dask(self) -> None:
25762576
with self.roundtrip(original) as actual:
25772577
assert_identical(original, actual)
25782578

2579-
# but itermediate unaligned chunks are bad
2579+
# but intermediate unaligned chunks are bad
25802580
badenc = ds.chunk({"x": (3, 5, 3, 1)})
25812581
badenc.var1.encoding["chunks"] = (3,)
25822582
with pytest.raises(ValueError, match=r"would overlap multiple dask chunks"):

xarray/tests/test_coding_times.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1826,7 +1826,7 @@ def test_encode_cf_timedelta_casting_overflow_error(use_dask, dtype) -> None:
18261826

18271827
_DECODE_TIMEDELTA_TESTS = {
18281828
"default": (True, None, np.dtype("timedelta64[ns]"), True),
1829-
"decode_timdelta=False": (True, False, np.dtype("int64"), False),
1829+
"decode_timedelta=False": (True, False, np.dtype("int64"), False),
18301830
"inherit-time_unit-from-decode_times": (
18311831
CFDatetimeCoder(time_unit="s"),
18321832
None,

xarray/tests/test_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,7 +1243,7 @@ def test_chunk_by_frequency(self, freq: str, calendar: str, add_gap: bool) -> No
12431243
assert rechunked.chunksizes["time"] == expected
12441244
assert rechunked.chunksizes["x"] == (2,) * 5
12451245

1246-
def test_chunk_by_frequecy_errors(self):
1246+
def test_chunk_by_frequency_errors(self):
12471247
ds = Dataset({"foo": ("x", [1, 2, 3])})
12481248
with pytest.raises(ValueError, match="virtual variable"):
12491249
ds.chunk(x=TimeResampler("YE"))
@@ -2204,7 +2204,7 @@ def test_reindex(self) -> None:
22042204

22052205
# invalid dimension
22062206
# TODO: (benbovy - explicit indexes): uncomment?
2207-
# --> from reindex docstrings: "any mis-matched dimension is simply ignored"
2207+
# --> from reindex docstrings: "any mismatched dimension is simply ignored"
22082208
# with pytest.raises(ValueError, match=r"indexer keys.*not correspond.*"):
22092209
# data.reindex(invalid=0)
22102210

xarray/tests/test_datatree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1602,7 +1602,7 @@ def test_filter_like(self) -> None:
16021602
assert filtered_tree.equals(barren_tree)
16031603
assert "flowers" not in filtered_tree.children
16041604

1605-
# test symetrical pruning results in isomorphic trees
1605+
# test symmetrical pruning results in isomorphic trees
16061606
assert flower_tree.filter_like(fruit_tree).isomorphic(
16071607
fruit_tree.filter_like(flower_tree)
16081608
)

xarray/tests/test_rolling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,7 @@ def test_rolling_construct_automatic_rechunk(self):
606606

607607
# Construct dataset with chunk size of (400, 400, 1) or 1.22 MiB
608608
da = DataArray(
609-
dims=["latitute", "longitude", "time"],
609+
dims=["latitude", "longitude", "time"],
610610
data=dask.array.random.random((400, 400, 400), chunks=(-1, -1, 1)),
611611
name="foo",
612612
)

xarray/tests/test_variable.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -868,7 +868,7 @@ def test_getitem_error(self):
868868

869869
v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5))
870870
ind = Variable(["x"], [0, 1])
871-
with pytest.raises(IndexError, match=r"Dimensions of indexers mis"):
871+
with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"):
872872
v[:, ind]
873873

874874
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)