pydata · TomNicholas · May 2, 2024 · Apr 24, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -73,6 +73,8 @@ Internal Changes
   ``xarray/testing/assertions`` for ``DataTree``. (:pull:`8967`)
   By `Owen Littlejohns <https://github.com/owenlittlejohns>`_ and
   `Tom Nicholas <https://github.com/TomNicholas>`_.
+- Migrates ``ops.py`` functionality into ``xarray/core/datatree_ops.py`` (:pull:`8976`)
+  By `Matt Savoie <https://github.com/flamingbear>`_ and `Tom Nicholas <https://github.com/TomNicholas>`_.
 - ``transpose``, ``set_dims``, ``stack`` & ``unstack`` now use a ``dim`` kwarg
   rather than ``dims`` or ``dimensions``. This is the final change to make xarray methods
   consistent with their use of ``dim``. Using the existing kwarg will raise a

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -5269,7 +5269,7 @@ def differentiate(
         edge_order: Literal[1, 2] = 1,
         datetime_unit: DatetimeUnitOptions = None,
     ) -> Self:
-        """ Differentiate the array with the second order accurate central
+        """Differentiate the array with the second order accurate central
         differences.
 
         .. note::

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -8339,7 +8339,7 @@ def differentiate(
         edge_order: Literal[1, 2] = 1,
         datetime_unit: DatetimeUnitOptions | None = None,
     ) -> Self:
-        """ Differentiate with the second order accurate central
+        """Differentiate with the second order accurate central
         differences.
 
         .. note::

diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py
@@ -23,6 +23,11 @@
     check_isomorphic,
     map_over_subtree,
 )
+from xarray.core.datatree_ops import (
+    DataTreeArithmeticMixin,
+    MappedDatasetMethodsMixin,
+    MappedDataWithCoords,
+)
 from xarray.core.datatree_render import RenderDataTree
 from xarray.core.formatting import datatree_repr
 from xarray.core.formatting_html import (
@@ -42,11 +47,6 @@
 )
 from xarray.core.variable import Variable
 from xarray.datatree_.datatree.common import TreeAttrAccessMixin
-from xarray.datatree_.datatree.ops import (
-    DataTreeArithmeticMixin,
-    MappedDatasetMethodsMixin,
-    MappedDataWithCoords,
-)
 
 try:
     from xarray.core.variable import calculate_dimensions

diff --git a/xarray/core/datatree_mapping.py b/xarray/core/datatree_mapping.py
@@ -98,10 +98,10 @@ def map_over_subtree(func: Callable) -> Callable:
         Function will not be applied to any nodes without datasets.
     *args : tuple, optional
         Positional arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
-        via .ds .
+        via `.ds`.
     **kwargs : Any
         Keyword arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
-        via .ds .
+        via `.ds`.
 
     Returns
     -------

diff --git a/xarray/datatree_/datatree/ops.py → xarray/core/datatree_ops.py b/xarray/datatree_/datatree/ops.py → xarray/core/datatree_ops.py
@@ -1,7 +1,7 @@
+import re
 import textwrap
 
 from xarray.core.dataset import Dataset
-
 from xarray.core.datatree_mapping import map_over_subtree
 
 """
@@ -12,11 +12,10 @@
 """
 
 
-_MAPPED_DOCSTRING_ADDENDUM = textwrap.fill(
+_MAPPED_DOCSTRING_ADDENDUM = (
     "This method was copied from xarray.Dataset, but has been altered to "
     "call the method on the Datasets stored in every node of the subtree. "
-    "See the `map_over_subtree` function for more details.",
-    width=117,
+    "See the `map_over_subtree` function for more details."
 )
 
 # TODO equals, broadcast_equals etc.
@@ -173,7 +172,7 @@ def _wrap_then_attach_to_cls(
     target_cls_dict, source_cls, methods_to_set, wrap_func=None
 ):
     """
-    Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree)
+    Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree).
 
     Result is like having written this in the classes' definition:
     ```
@@ -208,16 +207,62 @@ def method_name(self, *args, **kwargs):
         if wrap_func is map_over_subtree:
             # Add a paragraph to the method's docstring explaining how it's been mapped
             orig_method_docstring = orig_method.__doc__
-            # if orig_method_docstring is not None:
-            #     if "\n" in orig_method_docstring:
-            #         new_method_docstring = orig_method_docstring.replace(
-            #             "\n", _MAPPED_DOCSTRING_ADDENDUM, 1
-            #         )
-            #     else:
-            #         new_method_docstring = (
-            #             orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}"
-            #         )
-            setattr(target_cls_dict[method_name], "__doc__", orig_method_docstring)
+
+            if orig_method_docstring is not None:
+                new_method_docstring = insert_doc_addendum(
+                    orig_method_docstring, _MAPPED_DOCSTRING_ADDENDUM
+                )
+                setattr(target_cls_dict[method_name], "__doc__", new_method_docstring)
+
+
+def insert_doc_addendum(docstring, addendum):
+    """Insert addendum after first paragraph or at the end of the docstring.
+
+    There are a number of Dataset's functions that are wrapped. These come from
+    Dataset directly as well as the mixins: DataWithCoords, DatasetAggregations, and DatasetOpsMixin.
+
+    The majority of the docstrings fall into a parseable pattern. Those that
+    don't, just have the addendum appeneded after. None values are returned.
+
+    """
+    if docstring is None:
+        return None
+
+    pattern = re.compile(
+        r"^(?P<start>(\S+)?(.*?))(?P<paragraph_break>\n\s*\n)(?P<whitespace>[ ]*)(?P<rest>.*)",
+        re.DOTALL,
+    )
+    capture = re.match(pattern, docstring)
+    if capture is None:
+        ### single line docstring.
+        return (
+            docstring
+            + "\n\n"
+            + textwrap.fill(
+                addendum,
+                subsequent_indent="    ",
+                width=79,
+            )
+        )
+
+    if len(capture.groups()) == 6:
+        return (
+            capture["start"]
+            + capture["paragraph_break"]
+            + capture["whitespace"]
+            + ".. note::\n"
+            + textwrap.fill(
+                addendum,
+                initial_indent=capture["whitespace"] + "    ",
+                subsequent_indent=capture["whitespace"] + "    ",
+                width=79,
+            )
+            + capture["paragraph_break"]
+            + capture["whitespace"]
+            + capture["rest"]
+        )
+    else:
+        return docstring
 
 
 class MappedDatasetMethodsMixin:

diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py
@@ -1,10 +1,12 @@
 from copy import copy, deepcopy
+from textwrap import dedent
 
 import numpy as np
 import pytest
 
 import xarray as xr
 from xarray.core.datatree import DataTree
+from xarray.core.datatree_ops import _MAPPED_DOCSTRING_ADDENDUM, insert_doc_addendum
 from xarray.core.treenode import NotFoundInTreeError
 from xarray.testing import assert_equal, assert_identical
 from xarray.tests import create_test_data, source_ndarray
@@ -824,3 +826,79 @@ def test_tree(self, create_test_datatree):
         expected = create_test_datatree(modify=lambda ds: np.sin(ds))
         result_tree = np.sin(dt)
         assert_equal(result_tree, expected)
+
+
+class TestDocInsertion:
+    """Tests map_over_subtree docstring injection."""
+
+    def test_standard_doc(self):
+
+        dataset_doc = dedent(
+            """\
+            Manually trigger loading and/or computation of this dataset's data
+                    from disk or a remote source into memory and return this dataset.
+                    Unlike compute, the original dataset is modified and returned.
+
+                    Normally, it should not be necessary to call this method in user code,
+                    because all xarray functions should either work on deferred data or
+                    load data automatically. However, this method can be necessary when
+                    working with many file objects on disk.
+
+                    Parameters
+                    ----------
+                    **kwargs : dict
+                        Additional keyword arguments passed on to ``dask.compute``.
+
+                    See Also
+                    --------
+                    dask.compute"""
+        )
+
+        expected_doc = dedent(
+            """\
+            Manually trigger loading and/or computation of this dataset's data
+                    from disk or a remote source into memory and return this dataset.
+                    Unlike compute, the original dataset is modified and returned.
+
+                    .. note::
+                        This method was copied from xarray.Dataset, but has been altered to
+                        call the method on the Datasets stored in every node of the
+                        subtree. See the `map_over_subtree` function for more details.
+
+                    Normally, it should not be necessary to call this method in user code,
+                    because all xarray functions should either work on deferred data or
+                    load data automatically. However, this method can be necessary when
+                    working with many file objects on disk.
+
+                    Parameters
+                    ----------
+                    **kwargs : dict
+                        Additional keyword arguments passed on to ``dask.compute``.
+
+                    See Also
+                    --------
+                    dask.compute"""
+        )
+
+        wrapped_doc = insert_doc_addendum(dataset_doc, _MAPPED_DOCSTRING_ADDENDUM)
+
+        assert expected_doc == wrapped_doc
+
+    def test_one_liner(self):
+        mixin_doc = "Same as abs(a)."
+
+        expected_doc = dedent(
+            """\
+            Same as abs(a).
+
+            This method was copied from xarray.Dataset, but has been altered to call the
+                method on the Datasets stored in every node of the subtree. See the
+                `map_over_subtree` function for more details."""
+        )
+
+        actual_doc = insert_doc_addendum(mixin_doc, _MAPPED_DOCSTRING_ADDENDUM)
+        assert expected_doc == actual_doc
+
+    def test_none(self):
+        actual_doc = insert_doc_addendum(None, _MAPPED_DOCSTRING_ADDENDUM)
+        assert actual_doc is None