Merge remote-tracking branch 'upstream/master' into mcmali-s3-pub-test

alimcmaster1 · alimcmaster1 · commit 07394f45095e · 2020-06-20T19:45:32.000+01:00
diff --git a/.travis.yml b/.travis.yml
@@ -69,9 +69,9 @@ matrix:
       env:
         - JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)"
     - dist: bionic
-      python: 3.9-dev
       env:
-        - JOB="3.9-dev" PATTERN="(not slow and not network)"
+        - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
+
 
 before_install:
   - echo "before_install"
diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst
@@ -13,6 +13,14 @@ when we use Cython and Numba on a test function operating row-wise on the
 ``DataFrame``. Using :func:`pandas.eval` we will speed up a sum by an order of
 ~2.
 
+.. note::
+
+   In addition to following the steps in this tutorial, users interested in enhancing
+   performance are highly encouraged to install the
+   :ref:`recommended dependencies<install.recommended_dependencies>` for pandas.
+   These dependencies are often not installed by default, but will offer speed
+   improvements if present.
+
 .. _enhancingperf.cython:
 
 Cython (writing C extensions for pandas)
diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst
@@ -443,9 +443,8 @@ Faceting, created by ``DataFrame.boxplot`` with the ``by``
 keyword, will affect the output type as well:
 
 ================ ======= ==========================
-``return_type=`` Faceted Output type
----------------- ------- --------------------------
-
+``return_type``  Faceted Output type
+================ ======= ==========================
 ``None``         No      axes
 ``None``         Yes     2-D ndarray of axes
 ``'axes'``       No      axes
@@ -1424,7 +1423,7 @@ Here is an example of one way to easily plot group means with standard deviation
    # Plot
    fig, ax = plt.subplots()
    @savefig errorbar_example.png
-   means.plot.bar(yerr=errors, ax=ax, capsize=4)
+   means.plot.bar(yerr=errors, ax=ax, capsize=4, rot=0)
 
 .. ipython:: python
    :suppress:
@@ -1445,9 +1444,9 @@ Plotting with matplotlib table is now supported in  :meth:`DataFrame.plot` and :
 
 .. ipython:: python
 
-   fig, ax = plt.subplots(1, 1)
+   fig, ax = plt.subplots(1, 1, figsize=(7, 6.5))
    df = pd.DataFrame(np.random.rand(5, 3), columns=['a', 'b', 'c'])
-   ax.get_xaxis().set_visible(False)   # Hide Ticks
+   ax.xaxis.tick_top()  # Display x-axis ticks on top.
 
    @savefig line_plot_table_true.png
    df.plot(table=True, ax=ax)
@@ -1464,8 +1463,9 @@ as seen in the example below.
 
 .. ipython:: python
 
-   fig, ax = plt.subplots(1, 1)
-   ax.get_xaxis().set_visible(False)   # Hide Ticks
+   fig, ax = plt.subplots(1, 1, figsize=(7, 6.75))
+   ax.xaxis.tick_top()  # Display x-axis ticks on top.
+
    @savefig line_plot_table_data.png
    df.plot(table=np.round(df.T, 2), ax=ax)
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -518,7 +518,12 @@ def is_(self, other) -> bool:
 
         Returns
         -------
-        True if both have same underlying data, False otherwise : bool
+        bool
+            True if both have same underlying data, False otherwise.
+
+        See Also
+        --------
+        Index.identical : Works like ``Index.is_`` but also checks metadata.
         """
         # use something other than None to be clearer
         return self._id is getattr(other, "_id", Ellipsis) and self._id is not None
diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -1295,3 +1295,15 @@ def test_map_missing():
 
     result = arr.map({0: 10, 1: 11})
     tm.assert_sp_array_equal(result, expected)
+
+
+@pytest.mark.parametrize("fill_value", [np.nan, 1])
+def test_dropna(fill_value):
+    # GH-28287
+    arr = SparseArray([np.nan, 1], fill_value=fill_value)
+    exp = SparseArray([1.0], fill_value=fill_value)
+    tm.assert_sp_array_equal(arr.dropna(), exp)
+
+    df = pd.DataFrame({"a": [0, 1], "b": arr})
+    expected_df = pd.DataFrame({"a": [1], "b": exp}, index=pd.Int64Index([1]))
+    tm.assert_equal(df.dropna(), expected_df)
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
@@ -179,13 +179,11 @@ def astype(self, dtype, copy=True):
     def unique(self):
         # Parent method doesn't work since np.array will try to infer
         # a 2-dim object.
-        return type(self)(
-            [dict(x) for x in list({tuple(d.items()) for d in self.data})]
-        )
+        return type(self)([dict(x) for x in {tuple(d.items()) for d in self.data}])
 
     @classmethod
     def _concat_same_type(cls, to_concat):
-        data = list(itertools.chain.from_iterable([x.data for x in to_concat]))
+        data = list(itertools.chain.from_iterable(x.data for x in to_concat))
         return cls(data)
 
     def _values_for_factorize(self):
diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py
@@ -169,3 +169,48 @@ def test_diff_sparse(self):
         )
 
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "axis,expected",
+        [
+            (
+                0,
+                pd.DataFrame(
+                    {
+                        "a": [np.nan, 0, 1, 0, np.nan, np.nan, np.nan, 0],
+                        "b": [np.nan, 1, np.nan, np.nan, -2, 1, np.nan, np.nan],
+                        "c": np.repeat(np.nan, 8),
+                        "d": [np.nan, 3, 5, 7, 9, 11, 13, 15],
+                    },
+                    dtype="Int64",
+                ),
+            ),
+            (
+                1,
+                pd.DataFrame(
+                    {
+                        "a": np.repeat(np.nan, 8),
+                        "b": [0, 1, np.nan, 1, np.nan, np.nan, np.nan, 0],
+                        "c": np.repeat(np.nan, 8),
+                        "d": np.repeat(np.nan, 8),
+                    },
+                    dtype="Int64",
+                ),
+            ),
+        ],
+    )
+    def test_diff_integer_na(self, axis, expected):
+        # GH#24171 IntegerNA Support for DataFrame.diff()
+        df = pd.DataFrame(
+            {
+                "a": np.repeat([0, 1, np.nan, 2], 2),
+                "b": np.tile([0, 1, np.nan, 2], 2),
+                "c": np.repeat(np.nan, 8),
+                "d": np.arange(1, 9) ** 2,
+            },
+            dtype="Int64",
+        )
+
+        # Test case for default behaviour of diff
+        result = df.diff(axis=axis)
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
@@ -115,6 +115,12 @@ def test_read_columns(self):
         columns = ["col1", "col3"]
         self.check_round_trip(df, expected=df[columns], columns=columns)
 
+    @td.skip_if_no("pyarrow", min_version="0.17.1")
+    def read_columns_different_order(self):
+        # GH 33878
+        df = pd.DataFrame({"A": [1, 2], "B": ["x", "y"], "C": [True, False]})
+        self.check_round_trip(df, columns=["B", "A"])
+
     def test_unsupported_other(self):
 
         # mixed python objects
diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py
@@ -148,6 +148,7 @@ def mock_read_gbq(sql, **kwargs):
 
 
 @pytest.mark.single
+@pytest.mark.xfail(reason="skipping gbq integration for now, xref #34779")
 class TestToGBQIntegrationWithServiceAccountKeyPath:
     @pytest.fixture()
     def gbq_dataset(self):