@@ -1742,6 +1742,33 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas
1742
1742
names : hashable or iterable of hashable
1743
1743
Name(s) of variables in this dataset to convert into coordinates.
1744
1744
1745
+ Examples
1746
+ --------
1747
+ >>> dataset = xr.Dataset(
1748
+ ... {
1749
+ ... "pressure": ("time", [1.013, 1.2, 3.5]),
1750
+ ... "time": pd.date_range("2023-01-01", periods=3),
1751
+ ... }
1752
+ ... )
1753
+ >>> dataset
1754
+ <xarray.Dataset>
1755
+ Dimensions: (time: 3)
1756
+ Coordinates:
1757
+ * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03
1758
+ Data variables:
1759
+ pressure (time) float64 1.013 1.2 3.5
1760
+
1761
+ >>> dataset.set_coords("pressure")
1762
+ <xarray.Dataset>
1763
+ Dimensions: (time: 3)
1764
+ Coordinates:
1765
+ pressure (time) float64 1.013 1.2 3.5
1766
+ * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03
1767
+ Data variables:
1768
+ *empty*
1769
+
1770
+ On calling ``set_coords`` , these data variables are converted to coordinates, as shown in the final dataset.
1771
+
1745
1772
Returns
1746
1773
-------
1747
1774
Dataset
@@ -1780,9 +1807,66 @@ def reset_coords(
1780
1807
If True, remove coordinates instead of converting them into
1781
1808
variables.
1782
1809
1810
+ Examples
1811
+ --------
1812
+ >>> dataset = xr.Dataset(
1813
+ ... {
1814
+ ... "temperature": (
1815
+ ... ["time", "lat", "lon"],
1816
+ ... [[[25, 26], [27, 28]], [[29, 30], [31, 32]]],
1817
+ ... ),
1818
+ ... "precipitation": (
1819
+ ... ["time", "lat", "lon"],
1820
+ ... [[[0.5, 0.8], [0.2, 0.4]], [[0.3, 0.6], [0.7, 0.9]]],
1821
+ ... ),
1822
+ ... },
1823
+ ... coords={
1824
+ ... "time": pd.date_range(start="2023-01-01", periods=2),
1825
+ ... "lat": [40, 41],
1826
+ ... "lon": [-80, -79],
1827
+ ... "altitude": 1000,
1828
+ ... },
1829
+ ... )
1830
+
1831
+ # Dataset before resetting coordinates
1832
+
1833
+ >>> dataset
1834
+ <xarray.Dataset>
1835
+ Dimensions: (time: 2, lat: 2, lon: 2)
1836
+ Coordinates:
1837
+ * time (time) datetime64[ns] 2023-01-01 2023-01-02
1838
+ * lat (lat) int64 40 41
1839
+ * lon (lon) int64 -80 -79
1840
+ altitude int64 1000
1841
+ Data variables:
1842
+ temperature (time, lat, lon) int64 25 26 27 28 29 30 31 32
1843
+ precipitation (time, lat, lon) float64 0.5 0.8 0.2 0.4 0.3 0.6 0.7 0.9
1844
+
1845
+ # Reset the 'altitude' coordinate
1846
+
1847
+ >>> dataset_reset = dataset.reset_coords("altitude")
1848
+
1849
+ # Dataset after resetting coordinates
1850
+
1851
+ >>> dataset_reset
1852
+ <xarray.Dataset>
1853
+ Dimensions: (time: 2, lat: 2, lon: 2)
1854
+ Coordinates:
1855
+ * time (time) datetime64[ns] 2023-01-01 2023-01-02
1856
+ * lat (lat) int64 40 41
1857
+ * lon (lon) int64 -80 -79
1858
+ Data variables:
1859
+ temperature (time, lat, lon) int64 25 26 27 28 29 30 31 32
1860
+ precipitation (time, lat, lon) float64 0.5 0.8 0.2 0.4 0.3 0.6 0.7 0.9
1861
+ altitude int64 1000
1862
+
1783
1863
Returns
1784
1864
-------
1785
1865
Dataset
1866
+
1867
+ See Also
1868
+ --------
1869
+ Dataset.set_coords
1786
1870
"""
1787
1871
if names is None :
1788
1872
names = self ._coord_names - set (self ._indexes )
@@ -2742,6 +2826,50 @@ def head(
2742
2826
The keyword arguments form of ``indexers``.
2743
2827
One of indexers or indexers_kwargs must be provided.
2744
2828
2829
+ Examples
2830
+ --------
2831
+ >>> dates = pd.date_range(start="2023-01-01", periods=5)
2832
+ >>> pageviews = [1200, 1500, 900, 1800, 2000]
2833
+ >>> visitors = [800, 1000, 600, 1200, 1500]
2834
+ >>> dataset = xr.Dataset(
2835
+ ... {
2836
+ ... "pageviews": (("date"), pageviews),
2837
+ ... "visitors": (("date"), visitors),
2838
+ ... },
2839
+ ... coords={"date": dates},
2840
+ ... )
2841
+ >>> busiest_days = dataset.sortby("pageviews", ascending=False)
2842
+ >>> busiest_days.head()
2843
+ <xarray.Dataset>
2844
+ Dimensions: (date: 5)
2845
+ Coordinates:
2846
+ * date (date) datetime64[ns] 2023-01-05 2023-01-04 ... 2023-01-03
2847
+ Data variables:
2848
+ pageviews (date) int64 2000 1800 1500 1200 900
2849
+ visitors (date) int64 1500 1200 1000 800 600
2850
+
2851
+ # Retrieve the 3 most busiest days in terms of pageviews
2852
+
2853
+ >>> busiest_days.head(3)
2854
+ <xarray.Dataset>
2855
+ Dimensions: (date: 3)
2856
+ Coordinates:
2857
+ * date (date) datetime64[ns] 2023-01-05 2023-01-04 2023-01-02
2858
+ Data variables:
2859
+ pageviews (date) int64 2000 1800 1500
2860
+ visitors (date) int64 1500 1200 1000
2861
+
2862
+ # Using a dictionary to specify the number of elements for specific dimensions
2863
+
2864
+ >>> busiest_days.head({"date": 3})
2865
+ <xarray.Dataset>
2866
+ Dimensions: (date: 3)
2867
+ Coordinates:
2868
+ * date (date) datetime64[ns] 2023-01-05 2023-01-04 2023-01-02
2869
+ Data variables:
2870
+ pageviews (date) int64 2000 1800 1500
2871
+ visitors (date) int64 1500 1200 1000
2872
+
2745
2873
See Also
2746
2874
--------
2747
2875
Dataset.tail
@@ -2788,6 +2916,48 @@ def tail(
2788
2916
The keyword arguments form of ``indexers``.
2789
2917
One of indexers or indexers_kwargs must be provided.
2790
2918
2919
+ Examples
2920
+ --------
2921
+ >>> activity_names = ["Walking", "Running", "Cycling", "Swimming", "Yoga"]
2922
+ >>> durations = [30, 45, 60, 45, 60] # in minutes
2923
+ >>> energies = [150, 300, 250, 400, 100] # in calories
2924
+ >>> dataset = xr.Dataset(
2925
+ ... {
2926
+ ... "duration": (["activity"], durations),
2927
+ ... "energy_expenditure": (["activity"], energies),
2928
+ ... },
2929
+ ... coords={"activity": activity_names},
2930
+ ... )
2931
+ >>> sorted_dataset = dataset.sortby("energy_expenditure", ascending=False)
2932
+ >>> sorted_dataset
2933
+ <xarray.Dataset>
2934
+ Dimensions: (activity: 5)
2935
+ Coordinates:
2936
+ * activity (activity) <U8 'Swimming' 'Running' ... 'Walking' 'Yoga'
2937
+ Data variables:
2938
+ duration (activity) int64 45 45 60 30 60
2939
+ energy_expenditure (activity) int64 400 300 250 150 100
2940
+
2941
+ # Activities with the least energy expenditures using tail()
2942
+
2943
+ >>> sorted_dataset.tail(3)
2944
+ <xarray.Dataset>
2945
+ Dimensions: (activity: 3)
2946
+ Coordinates:
2947
+ * activity (activity) <U8 'Cycling' 'Walking' 'Yoga'
2948
+ Data variables:
2949
+ duration (activity) int64 60 30 60
2950
+ energy_expenditure (activity) int64 250 150 100
2951
+
2952
+ >>> sorted_dataset.tail({"activity": 3})
2953
+ <xarray.Dataset>
2954
+ Dimensions: (activity: 3)
2955
+ Coordinates:
2956
+ * activity (activity) <U8 'Cycling' 'Walking' 'Yoga'
2957
+ Data variables:
2958
+ duration (activity) int64 60 30 60
2959
+ energy_expenditure (activity) int64 250 150 100
2960
+
2791
2961
See Also
2792
2962
--------
2793
2963
Dataset.head
@@ -5617,6 +5787,70 @@ def dropna(
5617
5787
Which variables to check for missing values. By default, all
5618
5788
variables in the dataset are checked.
5619
5789
5790
+ Examples
5791
+ --------
5792
+ >>> dataset = xr.Dataset(
5793
+ ... {
5794
+ ... "temperature": (
5795
+ ... ["time", "location"],
5796
+ ... [[23.4, 24.1], [np.nan, 22.1], [21.8, 24.2], [20.5, 25.3]],
5797
+ ... )
5798
+ ... },
5799
+ ... coords={"time": [1, 2, 3, 4], "location": ["A", "B"]},
5800
+ ... )
5801
+ >>> dataset
5802
+ <xarray.Dataset>
5803
+ Dimensions: (time: 4, location: 2)
5804
+ Coordinates:
5805
+ * time (time) int64 1 2 3 4
5806
+ * location (location) <U1 'A' 'B'
5807
+ Data variables:
5808
+ temperature (time, location) float64 23.4 24.1 nan 22.1 21.8 24.2 20.5 25.3
5809
+
5810
+ # Drop NaN values from the dataset
5811
+
5812
+ >>> dataset.dropna(dim="time")
5813
+ <xarray.Dataset>
5814
+ Dimensions: (time: 3, location: 2)
5815
+ Coordinates:
5816
+ * time (time) int64 1 3 4
5817
+ * location (location) <U1 'A' 'B'
5818
+ Data variables:
5819
+ temperature (time, location) float64 23.4 24.1 21.8 24.2 20.5 25.3
5820
+
5821
+ # Drop labels with any NAN values
5822
+
5823
+ >>> dataset.dropna(dim="time", how="any")
5824
+ <xarray.Dataset>
5825
+ Dimensions: (time: 3, location: 2)
5826
+ Coordinates:
5827
+ * time (time) int64 1 3 4
5828
+ * location (location) <U1 'A' 'B'
5829
+ Data variables:
5830
+ temperature (time, location) float64 23.4 24.1 21.8 24.2 20.5 25.3
5831
+
5832
+ # Drop labels with all NAN values
5833
+
5834
+ >>> dataset.dropna(dim="time", how="all")
5835
+ <xarray.Dataset>
5836
+ Dimensions: (time: 4, location: 2)
5837
+ Coordinates:
5838
+ * time (time) int64 1 2 3 4
5839
+ * location (location) <U1 'A' 'B'
5840
+ Data variables:
5841
+ temperature (time, location) float64 23.4 24.1 nan 22.1 21.8 24.2 20.5 25.3
5842
+
5843
+ # Drop labels with less than 2 non-NA values
5844
+
5845
+ >>> dataset.dropna(dim="time", thresh=2)
5846
+ <xarray.Dataset>
5847
+ Dimensions: (time: 3, location: 2)
5848
+ Coordinates:
5849
+ * time (time) int64 1 3 4
5850
+ * location (location) <U1 'A' 'B'
5851
+ Data variables:
5852
+ temperature (time, location) float64 23.4 24.1 21.8 24.2 20.5 25.3
5853
+
5620
5854
Returns
5621
5855
-------
5622
5856
Dataset
@@ -5877,18 +6111,56 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset
5877
6111
Parameters
5878
6112
----------
5879
6113
dim : Hashable
5880
- Specifies the dimension along which to propagate values when
5881
- filling.
6114
+ Specifies the dimension along which to propagate values when filling.
5882
6115
limit : int or None, optional
5883
6116
The maximum number of consecutive NaN values to forward fill. In
5884
6117
other words, if there is a gap with more than this number of
5885
6118
consecutive NaNs, it will only be partially filled. Must be greater
5886
6119
than 0 or None for no limit. Must be None or greater than or equal
5887
6120
to axis length if filling along chunked axes (dimensions).
5888
6121
6122
+ Examples
6123
+ --------
6124
+ >>> time = pd.date_range("2023-01-01", periods=10, freq="D")
6125
+ >>> data = np.array(
6126
+ ... [1, np.nan, np.nan, np.nan, 5, np.nan, np.nan, 8, np.nan, 10]
6127
+ ... )
6128
+ >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time})
6129
+ >>> dataset
6130
+ <xarray.Dataset>
6131
+ Dimensions: (time: 10)
6132
+ Coordinates:
6133
+ * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10
6134
+ Data variables:
6135
+ data (time) float64 1.0 nan nan nan 5.0 nan nan 8.0 nan 10.0
6136
+
6137
+ # Perform forward fill (ffill) on the dataset
6138
+
6139
+ >>> dataset.ffill(dim="time")
6140
+ <xarray.Dataset>
6141
+ Dimensions: (time: 10)
6142
+ Coordinates:
6143
+ * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10
6144
+ Data variables:
6145
+ data (time) float64 1.0 1.0 1.0 1.0 5.0 5.0 5.0 8.0 8.0 10.0
6146
+
6147
+ # Limit the forward filling to a maximum of 2 consecutive NaN values
6148
+
6149
+ >>> dataset.ffill(dim="time", limit=2)
6150
+ <xarray.Dataset>
6151
+ Dimensions: (time: 10)
6152
+ Coordinates:
6153
+ * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10
6154
+ Data variables:
6155
+ data (time) float64 1.0 1.0 1.0 nan 5.0 5.0 5.0 8.0 8.0 10.0
6156
+
5889
6157
Returns
5890
6158
-------
5891
6159
Dataset
6160
+
6161
+ See Also
6162
+ --------
6163
+ Dataset.bfill
5892
6164
"""
5893
6165
from xarray .core .missing import _apply_over_vars_with_dim , ffill
5894
6166
@@ -5912,9 +6184,48 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset
5912
6184
than 0 or None for no limit. Must be None or greater than or equal
5913
6185
to axis length if filling along chunked axes (dimensions).
5914
6186
6187
+ Examples
6188
+ --------
6189
+ >>> time = pd.date_range("2023-01-01", periods=10, freq="D")
6190
+ >>> data = np.array(
6191
+ ... [1, np.nan, np.nan, np.nan, 5, np.nan, np.nan, 8, np.nan, 10]
6192
+ ... )
6193
+ >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time})
6194
+ >>> dataset
6195
+ <xarray.Dataset>
6196
+ Dimensions: (time: 10)
6197
+ Coordinates:
6198
+ * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10
6199
+ Data variables:
6200
+ data (time) float64 1.0 nan nan nan 5.0 nan nan 8.0 nan 10.0
6201
+
6202
+ # filled dataset, fills NaN values by propagating values backward
6203
+
6204
+ >>> dataset.bfill(dim="time")
6205
+ <xarray.Dataset>
6206
+ Dimensions: (time: 10)
6207
+ Coordinates:
6208
+ * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10
6209
+ Data variables:
6210
+ data (time) float64 1.0 5.0 5.0 5.0 5.0 8.0 8.0 8.0 10.0 10.0
6211
+
6212
+ # Limit the backward filling to a maximum of 2 consecutive NaN values
6213
+
6214
+ >>> dataset.bfill(dim="time", limit=2)
6215
+ <xarray.Dataset>
6216
+ Dimensions: (time: 10)
6217
+ Coordinates:
6218
+ * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10
6219
+ Data variables:
6220
+ data (time) float64 1.0 nan 5.0 5.0 5.0 8.0 8.0 8.0 10.0 10.0
6221
+
5915
6222
Returns
5916
6223
-------
5917
6224
Dataset
6225
+
6226
+ See Also
6227
+ --------
6228
+ Dataset.ffill
5918
6229
"""
5919
6230
from xarray .core .missing import _apply_over_vars_with_dim , bfill
5920
6231
0 commit comments