From c75ac8b7ab33be95f74d9e6f10b8173c68828751 Mon Sep 17 00:00:00 2001 From: Mary Gathoni Date: Wed, 19 Apr 2023 05:26:20 +0300 Subject: [PATCH] Docstrings examples for string methods (#7669) * Use more descriptive link texts * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix link target name * Fix typos * Add example for DataArray.str.capitalize * Add example for DataArray.str.lower * Add docstring example for DataArray.str.swapcase * Add docstring example for DataArray.str.title * Add docstring example for DataArray.str.upper * Add docstring example for DataArray.str.casefold * Add docstring example for DataArray.str.isalnum * Add docstring example for DataArray.str.isalpha * Add docstring example for DataArray.str.isdecimal * Add docstring example for DataArray.str.isdigit * Add docstring example for DataArray.str.islower * Add docstring example for DataArray.str.isnumeric * Add docstring example for DataArray.str.isspace * Add docstring example of DataArray.str.istitle * Add docstring example for DataArray.str.isupper * Add a docstring example for DataArray.str.count * Add docstring example of DataArray.str.startswith * Add docstring example of DataArray.str.endswith * Use varied examples * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Escape \n and \t Solves the incosistent whitespace error * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fixes failing doctests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Replace "Example" with "Examples" To follow the Numpy docstrings format. * Delete unnecessary import * Pass a regex to the count function * Pass an array of strings to the count function This example demonstrates how the pattern is broadcasted. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add docstrings examples for DataArray.str.pad * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix invalid escape sequence warning * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix regex * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add data array and filled value * Correct spelling mistake * Modify wording to make it easier to understand * Fix indent errors * Add whats-new, fix docs --------- Co-authored-by: remigathoni Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 4 +- xarray/core/accessor_str.py | 354 +++++++++++++++++++++++++++++++++++- 2 files changed, 354 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 32e167c5ea1..cad2767018c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,7 +38,9 @@ Bug fixes Documentation ~~~~~~~~~~~~~ - +- Added examples to docstrings for :py:meth:`xarray.core.accessor_str.StringAccessor` methods. + (:pull:`7669`) . + By `Mary Gathoni `_. Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 16e22ec1c66..c6c4af87d1c 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -672,6 +672,23 @@ def capitalize(self) -> T_DataArray: Returns ------- capitalized : same type as values + + Examples + -------- + >>> da = xr.DataArray( + ... ["temperature", "PRESSURE", "PreCipiTation", "daily rainfall"], dims="x" + ... ) + >>> da + + array(['temperature', 'PRESSURE', 'PreCipiTation', 'daily rainfall'], + dtype='>> capitalized = da.str.capitalize() + >>> capitalized + + array(['Temperature', 'Pressure', 'Precipitation', 'Daily rainfall'], + dtype=' T_DataArray: Returns ------- - lowerd : same type as values + lowered : same type as values + + Examples + -------- + >>> da = xr.DataArray(["Temperature", "PRESSURE"], dims="x") + >>> da + + array(['Temperature', 'PRESSURE'], dtype='>> lowered = da.str.lower() + >>> lowered + + array(['temperature', 'pressure'], dtype=' T_DataArray: Returns ------- swapcased : same type as values + + Examples + -------- + >>> import xarray as xr + >>> da = xr.DataArray(["temperature", "PRESSURE", "HuMiDiTy"], dims="x") + >>> da + + array(['temperature', 'PRESSURE', 'HuMiDiTy'], dtype='>> swapcased = da.str.swapcase() + >>> swapcased + + array(['TEMPERATURE', 'pressure', 'hUmIdItY'], dtype=' T_DataArray: Returns ------- titled : same type as values + + Examples + -------- + >>> da = xr.DataArray(["temperature", "PRESSURE", "HuMiDiTy"], dims="x") + >>> da + + array(['temperature', 'PRESSURE', 'HuMiDiTy'], dtype='>> titled = da.str.title() + >>> titled + + array(['Temperature', 'Pressure', 'Humidity'], dtype=' T_DataArray: Returns ------- uppered : same type as values + + Examples + -------- + >>> da = xr.DataArray(["temperature", "HuMiDiTy"], dims="x") + >>> da + + array(['temperature', 'HuMiDiTy'], dtype='>> uppered = da.str.upper() + >>> uppered + + array(['TEMPERATURE', 'HUMIDITY'], dtype=' T_DataArray: Casefolding is similar to converting to lowercase, but removes all case distinctions. This is important in some languages that have more complicated - cases and case conversions. + cases and case conversions. For example, + the 'ß' character in German is case-folded to 'ss', whereas it is lowercased + to 'ß'. Returns ------- casefolded : same type as values + + Examples + -------- + >>> da = xr.DataArray(["TEMPERATURE", "HuMiDiTy"], dims="x") + >>> da + + array(['TEMPERATURE', 'HuMiDiTy'], dtype='>> casefolded = da.str.casefold() + >>> casefolded + + array(['temperature', 'humidity'], dtype='>> da = xr.DataArray(["ß", "İ"], dims="x") + >>> da + + array(['ß', 'İ'], dtype='>> casefolded = da.str.casefold() + >>> casefolded + + array(['ss', 'i̇'], dtype=' T_DataArray: ------- isalnum : array of bool Array of boolean values with the same shape as the original array. + + Examples + -------- + >>> da = xr.DataArray(["H2O", "NaCl-"], dims="x") + >>> da + + array(['H2O', 'NaCl-'], dtype='>> isalnum = da.str.isalnum() + >>> isalnum + + array([ True, False]) + Dimensions without coordinates: x """ return self._apply(func=lambda x: x.isalnum(), dtype=bool) @@ -771,6 +880,19 @@ def isalpha(self) -> T_DataArray: ------- isalpha : array of bool Array of boolean values with the same shape as the original array. + + Examples + -------- + >>> da = xr.DataArray(["Mn", "H2O", "NaCl-"], dims="x") + >>> da + + array(['Mn', 'H2O', 'NaCl-'], dtype='>> isalpha = da.str.isalpha() + >>> isalpha + + array([ True, False, False]) + Dimensions without coordinates: x """ return self._apply(func=lambda x: x.isalpha(), dtype=bool) @@ -782,6 +904,19 @@ def isdecimal(self) -> T_DataArray: ------- isdecimal : array of bool Array of boolean values with the same shape as the original array. + + Examples + -------- + >>> da = xr.DataArray(["2.3", "123", "0"], dims="x") + >>> da + + array(['2.3', '123', '0'], dtype='>> isdecimal = da.str.isdecimal() + >>> isdecimal + + array([False, True, True]) + Dimensions without coordinates: x """ return self._apply(func=lambda x: x.isdecimal(), dtype=bool) @@ -793,6 +928,19 @@ def isdigit(self) -> T_DataArray: ------- isdigit : array of bool Array of boolean values with the same shape as the original array. + + Examples + -------- + >>> da = xr.DataArray(["123", "1.2", "0", "CO2", "NaCl"], dims="x") + >>> da + + array(['123', '1.2', '0', 'CO2', 'NaCl'], dtype='>> isdigit = da.str.isdigit() + >>> isdigit + + array([ True, False, True, False, False]) + Dimensions without coordinates: x """ return self._apply(func=lambda x: x.isdigit(), dtype=bool) @@ -803,7 +951,21 @@ def islower(self) -> T_DataArray: Returns ------- islower : array of bool - Array of boolean values with the same shape as the original array. + Array of boolean values with the same shape as the original array indicating whether all characters of each + element of the string array are lowercase (True) or not (False). + + Examples + -------- + >>> da = xr.DataArray(["temperature", "HUMIDITY", "pREciPiTaTioN"], dims="x") + >>> da + + array(['temperature', 'HUMIDITY', 'pREciPiTaTioN'], dtype='>> islower = da.str.islower() + >>> islower + + array([ True, False, False]) + Dimensions without coordinates: x """ return self._apply(func=lambda x: x.islower(), dtype=bool) @@ -815,6 +977,19 @@ def isnumeric(self) -> T_DataArray: ------- isnumeric : array of bool Array of boolean values with the same shape as the original array. + + Examples + -------- + >>> da = xr.DataArray(["123", "2.3", "H2O", "NaCl-", "Mn"], dims="x") + >>> da + + array(['123', '2.3', 'H2O', 'NaCl-', 'Mn'], dtype='>> isnumeric = da.str.isnumeric() + >>> isnumeric + + array([ True, False, False, False, False]) + Dimensions without coordinates: x """ return self._apply(func=lambda x: x.isnumeric(), dtype=bool) @@ -826,6 +1001,19 @@ def isspace(self) -> T_DataArray: ------- isspace : array of bool Array of boolean values with the same shape as the original array. + + Examples + -------- + >>> da = xr.DataArray(["", " ", "\\t", "\\n"], dims="x") + >>> da + + array(['', ' ', '\\t', '\\n'], dtype='>> isspace = da.str.isspace() + >>> isspace + + array([False, True, True, True]) + Dimensions without coordinates: x """ return self._apply(func=lambda x: x.isspace(), dtype=bool) @@ -837,6 +1025,27 @@ def istitle(self) -> T_DataArray: ------- istitle : array of bool Array of boolean values with the same shape as the original array. + + Examples + -------- + >>> da = xr.DataArray( + ... [ + ... "The Evolution Of Species", + ... "The Theory of relativity", + ... "the quantum mechanics of atoms", + ... ], + ... dims="title", + ... ) + >>> da + + array(['The Evolution Of Species', 'The Theory of relativity', + 'the quantum mechanics of atoms'], dtype='>> istitle = da.str.istitle() + >>> istitle + + array([ True, False, False]) + Dimensions without coordinates: title """ return self._apply(func=lambda x: x.istitle(), dtype=bool) @@ -848,6 +1057,19 @@ def isupper(self) -> T_DataArray: ------- isupper : array of bool Array of boolean values with the same shape as the original array. + + Examples + -------- + >>> da = xr.DataArray(["TEMPERATURE", "humidity", "PreCIpiTAtioN"], dims="x") + >>> da + + array(['TEMPERATURE', 'humidity', 'PreCIpiTAtioN'], dtype='>> isupper = da.str.isupper() + >>> isupper + + array([ True, False, False]) + Dimensions without coordinates: x """ return self._apply(func=lambda x: x.isupper(), dtype=bool) @@ -883,6 +1105,46 @@ def count( Returns ------- counts : array of int + + Examples + -------- + >>> da = xr.DataArray(["jjklmn", "opjjqrs", "t-JJ99vwx"], dims="x") + >>> da + + array(['jjklmn', 'opjjqrs', 't-JJ99vwx'], dtype='>> da.str.count("jj") + + array([1, 1, 0]) + Dimensions without coordinates: x + + Enable case-insensitive matching by setting case to false: + >>> counts = da.str.count("jj", case=False) + >>> counts + + array([1, 1, 1]) + Dimensions without coordinates: x + + Using regex: + >>> pat = "JJ[0-9]{2}[a-z]{3}" + >>> counts = da.str.count(pat) + >>> counts + + array([0, 0, 1]) + Dimensions without coordinates: x + + Using an array of strings (the pattern will be broadcast against the array): + + >>> pat = xr.DataArray(["jj", "JJ"], dims="y") + >>> counts = da.str.count(pat) + >>> counts + + array([[1, 0], + [1, 0], + [0, 1]]) + Dimensions without coordinates: x, y """ pat = self._re_compile(pat=pat, flags=flags, case=case) @@ -907,6 +1169,19 @@ def startswith(self, pat: str | bytes | Any) -> T_DataArray: startswith : array of bool An array of booleans indicating whether the given pattern matches the start of each string element. + + Examples + -------- + >>> da = xr.DataArray(["$100", "£23", "100"], dims="x") + >>> da + + array(['$100', '£23', '100'], dtype='>> startswith = da.str.startswith("$") + >>> startswith + + array([ True, False, False]) + Dimensions without coordinates: x """ pat = self._stringify(pat) func = lambda x, y: x.startswith(y) @@ -930,6 +1205,19 @@ def endswith(self, pat: str | bytes | Any) -> T_DataArray: endswith : array of bool A Series of booleans indicating whether the given pattern matches the end of each string element. + + Examples + -------- + >>> da = xr.DataArray(["10C", "10c", "100F"], dims="x") + >>> da + + array(['10C', '10c', '100F'], dtype='>> endswith = da.str.endswith("C") + >>> endswith + + array([ True, False, False]) + Dimensions without coordinates: x """ pat = self._stringify(pat) func = lambda x, y: x.endswith(y) @@ -963,6 +1251,66 @@ def pad( ------- filled : same type as values Array with a minimum number of char in each element. + + Examples + -------- + Pad strings in the array with a single string on the left side. + + Define the string in the array. + + >>> da = xr.DataArray(["PAR184", "TKO65", "NBO9139", "NZ39"], dims="x") + >>> da + + array(['PAR184', 'TKO65', 'NBO9139', 'NZ39'], dtype='>> filled = da.str.pad(8, side="left", fillchar="0") + >>> filled + + array(['00PAR184', '000TKO65', '0NBO9139', '0000NZ39'], dtype='>> filled = da.str.pad(8, side="right", fillchar="0") + >>> filled + + array(['PAR18400', 'TKO65000', 'NBO91390', 'NZ390000'], dtype='>> filled = da.str.pad(8, side="both", fillchar="0") + >>> filled + + array(['0PAR1840', '0TKO6500', 'NBO91390', '00NZ3900'], dtype='>> width = xr.DataArray([8, 10], dims="y") + >>> filled = da.str.pad(width, side="left", fillchar="0") + >>> filled + + array([['00PAR184', '0000PAR184'], + ['000TKO65', '00000TKO65'], + ['0NBO9139', '000NBO9139'], + ['0000NZ39', '000000NZ39']], dtype='>> fillchar = xr.DataArray(["0", "-"], dims="y") + >>> filled = da.str.pad(8, side="left", fillchar=fillchar) + >>> filled + + array([['00PAR184', '--PAR184'], + ['000TKO65', '---TKO65'], + ['0NBO9139', '-NBO9139'], + ['0000NZ39', '----NZ39']], dtype='