-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Migration of datatree/ops.py -> datatree_ops.py #8976
Changes from 10 commits
b22a56e
d072845
7d59d3a
5954034
3a494ae
5859b5c
3575257
f80a466
51e8f90
b5ccd64
696cd3e
3f65927
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
import re | ||
import textwrap | ||
|
||
from xarray.core.dataset import Dataset | ||
|
||
from xarray.core.datatree_mapping import map_over_subtree | ||
|
||
""" | ||
|
@@ -12,11 +12,10 @@ | |
""" | ||
|
||
|
||
_MAPPED_DOCSTRING_ADDENDUM = textwrap.fill( | ||
_MAPPED_DOCSTRING_ADDENDUM = ( | ||
"This method was copied from xarray.Dataset, but has been altered to " | ||
"call the method on the Datasets stored in every node of the subtree. " | ||
"See the `map_over_subtree` function for more details.", | ||
width=117, | ||
"See the `map_over_subtree` function for more details." | ||
) | ||
|
||
# TODO equals, broadcast_equals etc. | ||
|
@@ -173,7 +172,7 @@ def _wrap_then_attach_to_cls( | |
target_cls_dict, source_cls, methods_to_set, wrap_func=None | ||
): | ||
""" | ||
Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree) | ||
Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree). | ||
|
||
Result is like having written this in the classes' definition: | ||
``` | ||
|
@@ -208,16 +207,62 @@ def method_name(self, *args, **kwargs): | |
if wrap_func is map_over_subtree: | ||
# Add a paragraph to the method's docstring explaining how it's been mapped | ||
orig_method_docstring = orig_method.__doc__ | ||
# if orig_method_docstring is not None: | ||
# if "\n" in orig_method_docstring: | ||
# new_method_docstring = orig_method_docstring.replace( | ||
# "\n", _MAPPED_DOCSTRING_ADDENDUM, 1 | ||
# ) | ||
# else: | ||
# new_method_docstring = ( | ||
# orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}" | ||
# ) | ||
setattr(target_cls_dict[method_name], "__doc__", orig_method_docstring) | ||
|
||
if orig_method_docstring is not None: | ||
new_method_docstring = insert_doc_addendum( | ||
orig_method_docstring, _MAPPED_DOCSTRING_ADDENDUM | ||
) | ||
setattr(target_cls_dict[method_name], "__doc__", new_method_docstring) | ||
|
||
|
||
def insert_doc_addendum(docstring, addendum): | ||
"""Insert addendum after first paragraph or at the end of the docstring. | ||
|
||
There are a number of Dataset's functions that are wrapped. These come from | ||
Dataset directly as well as the mixins: DataWithCoords, DatasetAggregations, and DatasetOpsMixin. | ||
|
||
The majority of the docstrings fall into a parseable pattern. Those that | ||
don't, just have the addendum appeneded after. None values are returned. | ||
|
||
""" | ||
if docstring is None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is, and I couldn't decide if I should take it out above or leave it here, so it's belts and braces. |
||
return None | ||
|
||
pattern = re.compile( | ||
r"^(?P<start>(\S+)?(.*?))(?P<paragraph_break>\n\s*\n)(?P<whitespace>[ ]*)(?P<rest>.*)", | ||
re.DOTALL, | ||
) | ||
capture = re.match(pattern, docstring) | ||
if capture is None: | ||
### single line docstring. | ||
return ( | ||
docstring | ||
+ "\n\n" | ||
+ textwrap.fill( | ||
addendum, | ||
subsequent_indent=" ", | ||
width=79, | ||
) | ||
) | ||
|
||
if len(capture.groups()) == 6: | ||
return ( | ||
capture["start"] | ||
+ capture["paragraph_break"] | ||
+ capture["whitespace"] | ||
+ ".. note::\n" | ||
+ textwrap.fill( | ||
addendum, | ||
initial_indent=capture["whitespace"] + " ", | ||
subsequent_indent=capture["whitespace"] + " ", | ||
width=79, | ||
) | ||
+ capture["paragraph_break"] | ||
+ capture["whitespace"] | ||
+ capture["rest"] | ||
) | ||
else: | ||
return docstring | ||
|
||
|
||
class MappedDatasetMethodsMixin: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,12 @@ | ||
from copy import copy, deepcopy | ||
from textwrap import dedent | ||
|
||
import numpy as np | ||
import pytest | ||
|
||
import xarray as xr | ||
from xarray.core.datatree import DataTree | ||
from xarray.core.datatree_ops import _MAPPED_DOCSTRING_ADDENDUM, insert_doc_addendum | ||
from xarray.core.treenode import NotFoundInTreeError | ||
from xarray.testing import assert_equal, assert_identical | ||
from xarray.tests import create_test_data, source_ndarray | ||
|
@@ -824,3 +826,79 @@ def test_tree(self, create_test_datatree): | |
expected = create_test_datatree(modify=lambda ds: np.sin(ds)) | ||
result_tree = np.sin(dt) | ||
assert_equal(result_tree, expected) | ||
|
||
|
||
class TestDocInsertion: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice tests. |
||
"""Tests map_over_subtree docstring injection.""" | ||
|
||
def test_standard_doc(self): | ||
|
||
dataset_doc = dedent( | ||
"""\ | ||
Manually trigger loading and/or computation of this dataset's data | ||
from disk or a remote source into memory and return this dataset. | ||
Unlike compute, the original dataset is modified and returned. | ||
|
||
Normally, it should not be necessary to call this method in user code, | ||
because all xarray functions should either work on deferred data or | ||
load data automatically. However, this method can be necessary when | ||
working with many file objects on disk. | ||
|
||
Parameters | ||
---------- | ||
**kwargs : dict | ||
Additional keyword arguments passed on to ``dask.compute``. | ||
|
||
See Also | ||
-------- | ||
dask.compute""" | ||
) | ||
|
||
expected_doc = dedent( | ||
"""\ | ||
Manually trigger loading and/or computation of this dataset's data | ||
from disk or a remote source into memory and return this dataset. | ||
Unlike compute, the original dataset is modified and returned. | ||
|
||
.. note:: | ||
This method was copied from xarray.Dataset, but has been altered to | ||
call the method on the Datasets stored in every node of the | ||
subtree. See the `map_over_subtree` function for more details. | ||
|
||
Normally, it should not be necessary to call this method in user code, | ||
because all xarray functions should either work on deferred data or | ||
load data automatically. However, this method can be necessary when | ||
working with many file objects on disk. | ||
|
||
Parameters | ||
---------- | ||
**kwargs : dict | ||
Additional keyword arguments passed on to ``dask.compute``. | ||
|
||
See Also | ||
-------- | ||
dask.compute""" | ||
) | ||
|
||
wrapped_doc = insert_doc_addendum(dataset_doc, _MAPPED_DOCSTRING_ADDENDUM) | ||
|
||
assert expected_doc == wrapped_doc | ||
|
||
def test_one_liner(self): | ||
mixin_doc = "Same as abs(a)." | ||
|
||
expected_doc = dedent( | ||
"""\ | ||
Same as abs(a). | ||
|
||
This method was copied from xarray.Dataset, but has been altered to call the | ||
method on the Datasets stored in every node of the subtree. See the | ||
`map_over_subtree` function for more details.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, when this message is just appended on the end of a single paragraph/line docstring, it isn't a note? I don't have strong opinions, but just highlighting the difference and asking if it should still be a note. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I already don't know if I did this on purpose or not. All of the single
I don't see in the RTD docs if those are not built, or if I just didn't insert |
||
) | ||
|
||
actual_doc = insert_doc_addendum(mixin_doc, _MAPPED_DOCSTRING_ADDENDUM) | ||
assert expected_doc == actual_doc | ||
|
||
def test_none(self): | ||
actual_doc = insert_doc_addendum(None, _MAPPED_DOCSTRING_ADDENDUM) | ||
assert actual_doc is None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Is it overkill to suggest type hints for this function?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably not
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
696cd3e