Skip to content

Commit 1a7b285

Browse files
andersy005keewismathauseshoyer
authored
Code cleanup (#5234)
Co-authored-by: keewis <keewis@users.noreply.github.com> Co-authored-by: Mathias Hauser <mathause@users.noreply.github.com> Co-authored-by: Stephan Hoyer <shoyer@google.com>
1 parent 4067c01 commit 1a7b285

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+377
-514
lines changed

xarray/backends/api.py

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,11 @@ def _get_default_engine_netcdf():
102102

103103
def _get_default_engine(path: str, allow_remote: bool = False):
104104
if allow_remote and is_remote_uri(path):
105-
engine = _get_default_engine_remote_uri()
105+
return _get_default_engine_remote_uri()
106106
elif path.endswith(".gz"):
107-
engine = _get_default_engine_gz()
107+
return _get_default_engine_gz()
108108
else:
109-
engine = _get_default_engine_netcdf()
110-
return engine
109+
return _get_default_engine_netcdf()
111110

112111

113112
def _validate_dataset_names(dataset):
@@ -282,7 +281,7 @@ def _chunk_ds(
282281

283282
mtime = _get_mtime(filename_or_obj)
284283
token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
285-
name_prefix = "open_dataset-%s" % token
284+
name_prefix = f"open_dataset-{token}"
286285

287286
variables = {}
288287
for name, var in backend_ds.variables.items():
@@ -295,8 +294,7 @@ def _chunk_ds(
295294
name_prefix=name_prefix,
296295
token=token,
297296
)
298-
ds = backend_ds._replace(variables)
299-
return ds
297+
return backend_ds._replace(variables)
300298

301299

302300
def _dataset_from_backend_dataset(
@@ -308,12 +306,10 @@ def _dataset_from_backend_dataset(
308306
overwrite_encoded_chunks,
309307
**extra_tokens,
310308
):
311-
if not (isinstance(chunks, (int, dict)) or chunks is None):
312-
if chunks != "auto":
313-
raise ValueError(
314-
"chunks must be an int, dict, 'auto', or None. "
315-
"Instead found %s. " % chunks
316-
)
309+
if not isinstance(chunks, (int, dict)) and chunks not in {None, "auto"}:
310+
raise ValueError(
311+
f"chunks must be an int, dict, 'auto', or None. Instead found {chunks}."
312+
)
317313

318314
_protect_dataset_variables_inplace(backend_ds, cache)
319315
if chunks is None:
@@ -331,9 +327,8 @@ def _dataset_from_backend_dataset(
331327
ds.set_close(backend_ds._close)
332328

333329
# Ensure source filename always stored in dataset object (GH issue #2550)
334-
if "source" not in ds.encoding:
335-
if isinstance(filename_or_obj, str):
336-
ds.encoding["source"] = filename_or_obj
330+
if "source" not in ds.encoding and isinstance(filename_or_obj, str):
331+
ds.encoding["source"] = filename_or_obj
337332

338333
return ds
339334

@@ -515,7 +510,6 @@ def open_dataset(
515510
**decoders,
516511
**kwargs,
517512
)
518-
519513
return ds
520514

521515

@@ -1015,8 +1009,8 @@ def to_netcdf(
10151009
elif engine != "scipy":
10161010
raise ValueError(
10171011
"invalid engine for creating bytes with "
1018-
"to_netcdf: %r. Only the default engine "
1019-
"or engine='scipy' is supported" % engine
1012+
f"to_netcdf: {engine!r}. Only the default engine "
1013+
"or engine='scipy' is supported"
10201014
)
10211015
if not compute:
10221016
raise NotImplementedError(
@@ -1037,7 +1031,7 @@ def to_netcdf(
10371031
try:
10381032
store_open = WRITEABLE_STORES[engine]
10391033
except KeyError:
1040-
raise ValueError("unrecognized engine for to_netcdf: %r" % engine)
1034+
raise ValueError(f"unrecognized engine for to_netcdf: {engine!r}")
10411035

10421036
if format is not None:
10431037
format = format.upper()
@@ -1049,9 +1043,8 @@ def to_netcdf(
10491043
autoclose = have_chunks and scheduler in ["distributed", "multiprocessing"]
10501044
if autoclose and engine == "scipy":
10511045
raise NotImplementedError(
1052-
"Writing netCDF files with the %s backend "
1053-
"is not currently supported with dask's %s "
1054-
"scheduler" % (engine, scheduler)
1046+
f"Writing netCDF files with the {engine} backend "
1047+
f"is not currently supported with dask's {scheduler} scheduler"
10551048
)
10561049

10571050
target = path_or_file if path_or_file is not None else BytesIO()
@@ -1061,7 +1054,7 @@ def to_netcdf(
10611054
kwargs["invalid_netcdf"] = invalid_netcdf
10621055
else:
10631056
raise ValueError(
1064-
"unrecognized option 'invalid_netcdf' for engine %s" % engine
1057+
f"unrecognized option 'invalid_netcdf' for engine {engine}"
10651058
)
10661059
store = store_open(target, mode, format, group, **kwargs)
10671060

@@ -1203,7 +1196,7 @@ def save_mfdataset(
12031196
Data variables:
12041197
a (time) float64 0.0 0.02128 0.04255 0.06383 ... 0.9574 0.9787 1.0
12051198
>>> years, datasets = zip(*ds.groupby("time.year"))
1206-
>>> paths = ["%s.nc" % y for y in years]
1199+
>>> paths = [f"{y}.nc" for y in years]
12071200
>>> xr.save_mfdataset(datasets, paths)
12081201
"""
12091202
if mode == "w" and len(set(paths)) < len(paths):
@@ -1215,7 +1208,7 @@ def save_mfdataset(
12151208
if not isinstance(obj, Dataset):
12161209
raise TypeError(
12171210
"save_mfdataset only supports writing Dataset "
1218-
"objects, received type %s" % type(obj)
1211+
f"objects, received type {type(obj)}"
12191212
)
12201213

12211214
if groups is None:

xarray/backends/cfgrib_.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,7 @@ def get_dimensions(self):
9090

9191
def get_encoding(self):
9292
dims = self.get_dimensions()
93-
encoding = {"unlimited_dims": {k for k, v in dims.items() if v is None}}
94-
return encoding
93+
return {"unlimited_dims": {k for k, v in dims.items() if v is None}}
9594

9695

9796
class CfgribfBackendEntrypoint(BackendEntrypoint):

xarray/backends/common.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,8 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500
6969
base_delay = initial_delay * 2 ** n
7070
next_delay = base_delay + np.random.randint(base_delay)
7171
msg = (
72-
"getitem failed, waiting %s ms before trying again "
73-
"(%s tries remaining). Full traceback: %s"
74-
% (next_delay, max_retries - n, traceback.format_exc())
72+
f"getitem failed, waiting {next_delay} ms before trying again "
73+
f"({max_retries - n} tries remaining). Full traceback: {traceback.format_exc()}"
7574
)
7675
logger.debug(msg)
7776
time.sleep(1e-3 * next_delay)
@@ -336,7 +335,7 @@ def set_dimensions(self, variables, unlimited_dims=None):
336335
if dim in existing_dims and length != existing_dims[dim]:
337336
raise ValueError(
338337
"Unable to update size for existing dimension"
339-
"%r (%d != %d)" % (dim, length, existing_dims[dim])
338+
f"{dim!r} ({length} != {existing_dims[dim]})"
340339
)
341340
elif dim not in existing_dims:
342341
is_unlimited = dim in unlimited_dims

xarray/backends/h5netcdf_.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@
3737
class H5NetCDFArrayWrapper(BaseNetCDF4Array):
3838
def get_array(self, needs_lock=True):
3939
ds = self.datastore._acquire(needs_lock)
40-
variable = ds.variables[self.variable_name]
41-
return variable
40+
return ds.variables[self.variable_name]
4241

4342
def __getitem__(self, key):
4443
return indexing.explicit_indexing_adapter(
@@ -102,7 +101,7 @@ def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=Fal
102101
if group is None:
103102
root, group = find_root_and_group(manager)
104103
else:
105-
if not type(manager) is h5netcdf.File:
104+
if type(manager) is not h5netcdf.File:
106105
raise ValueError(
107106
"must supply a h5netcdf.File if the group "
108107
"argument is provided"
@@ -233,11 +232,9 @@ def get_dimensions(self):
233232
return self.ds.dimensions
234233

235234
def get_encoding(self):
236-
encoding = {}
237-
encoding["unlimited_dims"] = {
238-
k for k, v in self.ds.dimensions.items() if v is None
235+
return {
236+
"unlimited_dims": {k for k, v in self.ds.dimensions.items() if v is None}
239237
}
240-
return encoding
241238

242239
def set_dimension(self, name, length, is_unlimited=False):
243240
if is_unlimited:
@@ -266,9 +263,9 @@ def prepare_variable(
266263
"h5netcdf does not yet support setting a fill value for "
267264
"variable-length strings "
268265
"(https://github.com/shoyer/h5netcdf/issues/37). "
269-
"Either remove '_FillValue' from encoding on variable %r "
266+
f"Either remove '_FillValue' from encoding on variable {name!r} "
270267
"or set {'dtype': 'S1'} in encoding to use the fixed width "
271-
"NC_CHAR type." % name
268+
"NC_CHAR type."
272269
)
273270

274271
if dtype is str:

xarray/backends/locks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def locked(self):
167167
return any(lock.locked for lock in self.locks)
168168

169169
def __repr__(self):
170-
return "CombinedLock(%r)" % list(self.locks)
170+
return f"CombinedLock({list(self.locks)!r})"
171171

172172

173173
class DummyLock:

xarray/backends/netCDF4_.py

Lines changed: 28 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -122,25 +122,23 @@ def _encode_nc4_variable(var):
122122
def _check_encoding_dtype_is_vlen_string(dtype):
123123
if dtype is not str:
124124
raise AssertionError( # pragma: no cover
125-
"unexpected dtype encoding %r. This shouldn't happen: please "
126-
"file a bug report at github.com/pydata/xarray" % dtype
125+
f"unexpected dtype encoding {dtype!r}. This shouldn't happen: please "
126+
"file a bug report at github.com/pydata/xarray"
127127
)
128128

129129

130130
def _get_datatype(var, nc_format="NETCDF4", raise_on_invalid_encoding=False):
131131
if nc_format == "NETCDF4":
132-
datatype = _nc4_dtype(var)
133-
else:
134-
if "dtype" in var.encoding:
135-
encoded_dtype = var.encoding["dtype"]
136-
_check_encoding_dtype_is_vlen_string(encoded_dtype)
137-
if raise_on_invalid_encoding:
138-
raise ValueError(
139-
"encoding dtype=str for vlen strings is only supported "
140-
"with format='NETCDF4'."
141-
)
142-
datatype = var.dtype
143-
return datatype
132+
return _nc4_dtype(var)
133+
if "dtype" in var.encoding:
134+
encoded_dtype = var.encoding["dtype"]
135+
_check_encoding_dtype_is_vlen_string(encoded_dtype)
136+
if raise_on_invalid_encoding:
137+
raise ValueError(
138+
"encoding dtype=str for vlen strings is only supported "
139+
"with format='NETCDF4'."
140+
)
141+
return var.dtype
144142

145143

146144
def _nc4_dtype(var):
@@ -178,7 +176,7 @@ def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
178176
ds = create_group(ds, key)
179177
else:
180178
# wrap error to provide slightly more helpful message
181-
raise OSError("group not found: %s" % key, e)
179+
raise OSError(f"group not found: {key}", e)
182180
return ds
183181

184182

@@ -203,7 +201,7 @@ def _force_native_endianness(var):
203201
# if endian exists, remove it from the encoding.
204202
var.encoding.pop("endian", None)
205203
# check to see if encoding has a value for endian its 'native'
206-
if not var.encoding.get("endian", "native") == "native":
204+
if var.encoding.get("endian", "native") != "native":
207205
raise NotImplementedError(
208206
"Attempt to write non-native endian type, "
209207
"this is not supported by the netCDF4 "
@@ -270,8 +268,8 @@ def _extract_nc4_variable_encoding(
270268
invalid = [k for k in encoding if k not in valid_encodings]
271269
if invalid:
272270
raise ValueError(
273-
"unexpected encoding parameters for %r backend: %r. Valid "
274-
"encodings are: %r" % (backend, invalid, valid_encodings)
271+
f"unexpected encoding parameters for {backend!r} backend: {invalid!r}. Valid "
272+
f"encodings are: {valid_encodings!r}"
275273
)
276274
else:
277275
for k in list(encoding):
@@ -282,10 +280,8 @@ def _extract_nc4_variable_encoding(
282280

283281

284282
def _is_list_of_strings(value):
285-
if np.asarray(value).dtype.kind in ["U", "S"] and np.asarray(value).size > 1:
286-
return True
287-
else:
288-
return False
283+
arr = np.asarray(value)
284+
return arr.dtype.kind in ["U", "S"] and arr.size > 1
289285

290286

291287
class NetCDF4DataStore(WritableCFDataStore):
@@ -313,7 +309,7 @@ def __init__(
313309
if group is None:
314310
root, group = find_root_and_group(manager)
315311
else:
316-
if not type(manager) is netCDF4.Dataset:
312+
if type(manager) is not netCDF4.Dataset:
317313
raise ValueError(
318314
"must supply a root netCDF4.Dataset if the group "
319315
"argument is provided"
@@ -417,25 +413,22 @@ def open_store_variable(self, name, var):
417413
return Variable(dimensions, data, attributes, encoding)
418414

419415
def get_variables(self):
420-
dsvars = FrozenDict(
416+
return FrozenDict(
421417
(k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items()
422418
)
423-
return dsvars
424419

425420
def get_attrs(self):
426-
attrs = FrozenDict((k, self.ds.getncattr(k)) for k in self.ds.ncattrs())
427-
return attrs
421+
return FrozenDict((k, self.ds.getncattr(k)) for k in self.ds.ncattrs())
428422

429423
def get_dimensions(self):
430-
dims = FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items())
431-
return dims
424+
return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items())
432425

433426
def get_encoding(self):
434-
encoding = {}
435-
encoding["unlimited_dims"] = {
436-
k for k, v in self.ds.dimensions.items() if v.isunlimited()
427+
return {
428+
"unlimited_dims": {
429+
k for k, v in self.ds.dimensions.items() if v.isunlimited()
430+
}
437431
}
438-
return encoding
439432

440433
def set_dimension(self, name, length, is_unlimited=False):
441434
dim_length = length if not is_unlimited else None
@@ -473,9 +466,9 @@ def prepare_variable(
473466
"netCDF4 does not yet support setting a fill value for "
474467
"variable-length strings "
475468
"(https://github.com/Unidata/netcdf4-python/issues/730). "
476-
"Either remove '_FillValue' from encoding on variable %r "
469+
f"Either remove '_FillValue' from encoding on variable {name!r} "
477470
"or set {'dtype': 'S1'} in encoding to use the fixed width "
478-
"NC_CHAR type." % name
471+
"NC_CHAR type."
479472
)
480473

481474
encoding = _extract_nc4_variable_encoding(

xarray/backends/netcdf3.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,6 @@ def is_valid_nc3_name(s):
125125
"""
126126
if not isinstance(s, str):
127127
return False
128-
if not isinstance(s, str):
129-
s = s.decode("utf-8")
130128
num_bytes = len(s.encode("utf-8"))
131129
return (
132130
(unicodedata.normalize("NFC", s) == s)

xarray/backends/plugins.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,7 @@ def build_engines(pkg_entrypoints):
8787
backend_entrypoints.update(external_backend_entrypoints)
8888
backend_entrypoints = sort_backends(backend_entrypoints)
8989
set_missing_parameters(backend_entrypoints)
90-
engines = {}
91-
for name, backend in backend_entrypoints.items():
92-
engines[name] = backend()
93-
return engines
90+
return {name: backend() for name, backend in backend_entrypoints.items()}
9491

9592

9693
@functools.lru_cache(maxsize=1)

xarray/backends/pydap_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def _getitem(self, key):
4747
result = robust_getitem(array, key, catch=ValueError)
4848
# in some cases, pydap doesn't squeeze axes automatically like numpy
4949
axis = tuple(n for n, k in enumerate(key) if isinstance(k, integer_types))
50-
if result.ndim + len(axis) != array.ndim and len(axis) > 0:
50+
if result.ndim + len(axis) != array.ndim and axis:
5151
result = np.squeeze(result, axis)
5252

5353
return result

xarray/backends/rasterio_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc
389389
# the filename is probably an s3 bucket rather than a regular file
390390
mtime = None
391391
token = tokenize(filename, mtime, chunks)
392-
name_prefix = "open_rasterio-%s" % token
392+
name_prefix = f"open_rasterio-{token}"
393393
result = result.chunk(chunks, name_prefix=name_prefix, token=token)
394394

395395
# Make the file closeable

0 commit comments

Comments
 (0)