remove mixed-string

TomAugspurger · TomAugspurger · commit 5a9c306a0ee8 · 2019-11-22T14:52:24.000-06:00
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -237,7 +237,7 @@ The following methods now also correctly output values for unobserved categories
 :meth:`pandas.array` inference changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-:meth:`pandas.array` now infers pandas' new extension types in several cases:
+:meth:`pandas.array` now infers pandas' new extension types in several cases (:issue:`29791`):
 
 1. Sting data (including missing values) now returns a :class:`arrays.StringArray`.
 2. Integer data (including missing values) now returns a :class:`arrays.IntegerArray`.
@@ -350,7 +350,7 @@ Other API changes
 - :meth:`Series.dropna` has dropped its ``**kwargs`` argument in favor of a single ``how`` parameter.
   Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`)
 - When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`)
--
+- :meth:`pandas.api.types.infer_dtype` returns ``"string"`` rather than ``"mixed"`` for a mixture of strings and NA values (:issue:`29799`)
 
 
 .. _whatsnew_1000.api.documentation:
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -1113,7 +1113,6 @@ def infer_dtype(value: object, skipna: object=None) -> str:
     Results can include:
 
     - string
-    - mixed-string
     - unicode
     - bytes
     - floating
@@ -1320,11 +1319,9 @@ def infer_dtype(value: object, skipna: object=None) -> str:
             return 'boolean'
 
     elif isinstance(val, str):
+        # we deliberately ignore skipna
         if is_string_array(values, skipna=True):
-            if isnaobj(values).any():
-                return "mixed-string"
-            else:
-                return "string"
+            return "string"
 
     elif isinstance(val, bytes):
         if is_bytes_array(values, skipna=skipna):
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -302,7 +302,7 @@ def array(
             # timedelta, timedelta64
             return TimedeltaArray._from_sequence(data, copy=copy)
 
-        elif inferred_dtype in {"string", "mixed-string"}:
+        elif inferred_dtype == "string":
             return StringArray._from_sequence(data, copy=copy)
 
         elif inferred_dtype in {"integer", "mixed-integer"}:
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -732,7 +732,7 @@ def test_string(self):
     def test_unicode(self):
         arr = ["a", np.nan, "c"]
         result = lib.infer_dtype(arr, skipna=False)
-        assert result == "mixed-string"
+        assert result == "string"
 
         arr = ["a", np.nan, "c"]
         result = lib.infer_dtype(arr, skipna=True)
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -615,12 +615,12 @@ def test_constructor_no_pandas_array(self):
     def test_add_column_with_pandas_array(self):
         # GH 26390
         df = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
-        df["c"] = pd.array([1, 2, None, 3])
+        df["c"] = pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object))
         df2 = pd.DataFrame(
             {
                 "a": [1, 2, 3, 4],
                 "b": ["a", "b", "c", "d"],
-                "c": pd.array([1, 2, None, 3]),
+                "c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)),
             }
         )
         assert type(df["c"]._data.blocks[0]) == ObjectBlock
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -1268,7 +1268,7 @@ def test_block_shape():
 
 def test_make_block_no_pandas_array():
     # https://github.com/pandas-dev/pandas/pull/24866
-    arr = pd.array([1, 2])
+    arr = pd.arrays.PandasArray(np.array([1, 2]))
 
     # PandasArray, no dtype
     result = make_block(arr, slice(len(arr)))

Original file line number	Diff line number	Diff line change
`@@ -615,12 +615,12 @@ def test_constructor_no_pandas_array(self):`
`615`	`615`	`def test_add_column_with_pandas_array(self):`
`616`	`616`	`# GH 26390`
`617`	`617`	`df = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})`
`618`		`- df["c"] = pd.array([1, 2, None, 3])`
	`618`	`+ df["c"] = pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object))`
`619`	`619`	`df2 = pd.DataFrame(`
`620`	`620`	`{`
`621`	`621`	`"a": [1, 2, 3, 4],`
`622`	`622`	`"b": ["a", "b", "c", "d"],`
`623`		`- "c": pd.array([1, 2, None, 3]),`
	`623`	`+ "c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)),`
`624`	`624`	`}`
`625`	`625`	`)`
`626`	`626`	`assert type(df["c"]._data.blocks[0]) == ObjectBlock`