kostyafarber · kostyafarber · Apr 8, 2023 · Apr 3, 2023
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -279,7 +279,6 @@ Other enhancements
 - :meth:`Series.dropna` and :meth:`DataFrame.dropna` has gained ``ignore_index`` keyword to reset index (:issue:`31725`)
 - Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`)
 - Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`)
-- Performance improvement in :func:`to_datetime` when format is given or can be inferred (:issue:`50465`)
 - Added support for :meth:`Index.min` and :meth:`Index.max` for pyarrow string dtypes (:issue:`51397`)
 - Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`)
 - Added :meth:`Series.dt.unit` and :meth:`Series.dt.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`51223`)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -42,8 +42,8 @@ Other enhancements
 - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
 - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`)
 - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
-- Added ``calamine`` as an engine to ``read_excel`` (:issue:`50395`)
 - :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`)
+- Added ``calamine`` as an engine to ``read_excel`` (:issue:`50395`)
 - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`)
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -1456,7 +1456,7 @@ class ExcelFile:
             This is not supported, switch to using ``openpyxl`` instead.
     """
 
-    from pandas.io.excel._calaminereader import CalamineExcelReader
+    from pandas.io.excel._calamine import CalamineReader
     from pandas.io.excel._odfreader import ODFReader
     from pandas.io.excel._openpyxl import OpenpyxlReader
     from pandas.io.excel._pyxlsb import PyxlsbReader
@@ -1467,7 +1467,7 @@ class ExcelFile:
         "openpyxl": OpenpyxlReader,
         "odf": ODFReader,
         "pyxlsb": PyxlsbReader,
-        "calamine": CalamineExcelReader,
+        "calamine": CalamineReader,
     }
 
     def __init__(

diff --git a/pandas/io/excel/_calaminereader.py → pandas/io/excel/_calamine.py b/pandas/io/excel/_calaminereader.py → pandas/io/excel/_calamine.py
@@ -29,7 +29,7 @@
 _CellValueT = Union[int, float, str, bool, time, date, datetime]
 
 
-class CalamineExcelReader(BaseExcelReader):
+class CalamineReader(BaseExcelReader):
     @doc(storage_options=_shared_docs["storage_options"])
     def __init__(
         self,

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -451,6 +451,10 @@ def test_reader_special_dtypes(self, request, engine, read_ext):
                     reason="Calamine support parsing datetime only in xlsx"
                 )
             )
+        if engine == "calamine":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="Calamine can't parse this datetime format")
+            )
 
         expected = DataFrame.from_dict(
             {
@@ -584,11 +588,16 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
         actual = pd.read_excel(basename + read_ext, dtype=dtype)
         tm.assert_frame_equal(actual, expected)
 
-    def test_dtype_backend(self, read_ext, dtype_backend):
+    def test_dtype_backend(self, request, engine, read_ext, dtype_backend):
         # GH#36712
         if read_ext in (".xlsb", ".xls"):
             pytest.skip(f"No engine for filetype: '{read_ext}'")
 
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="Calamine doesn't support invalid ods")
+            )
+
         df = DataFrame(
             {
                 "a": Series([1, 3], dtype="Int64"),
@@ -629,11 +638,16 @@ def test_dtype_backend(self, read_ext, dtype_backend):
             expected = df
         tm.assert_frame_equal(result, expected)
 
-    def test_dtype_backend_and_dtype(self, read_ext):
+    def test_dtype_backend_and_dtype(self, request, engine, read_ext):
         # GH#36712
         if read_ext in (".xlsb", ".xls"):
             pytest.skip(f"No engine for filetype: '{read_ext}'")
 
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="Calamine doesn't support invalid ods")
+            )
+
         df = DataFrame({"a": [np.nan, 1.0], "b": [2.5, np.nan]})
         with tm.ensure_clean(read_ext) as file_path:
             df.to_excel(file_path, "test", index=False)
@@ -646,11 +660,16 @@ def test_dtype_backend_and_dtype(self, read_ext):
         tm.assert_frame_equal(result, df)
 
     @td.skip_if_no("pyarrow")
-    def test_dtype_backend_string(self, read_ext, string_storage):
+    def test_dtype_backend_string(self, request, engine, read_ext, string_storage):
         # GH#36712
         if read_ext in (".xlsb", ".xls"):
             pytest.skip(f"No engine for filetype: '{read_ext}'")
 
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="Calamine doesn't support invalid ods")
+            )
+
         import pyarrow as pa
 
         with pd.option_context("mode.string_storage", string_storage):
@@ -694,8 +713,15 @@ def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value):
         assert dtype_dict == dtype_dict_copy, "dtype dict changed"
         tm.assert_frame_equal(result, expected)
 
-    def test_reader_spaces(self, read_ext):
+    def test_reader_spaces(self, request, engine, read_ext):
         # see gh-32207
+
+        # https://github.com/tafia/calamine/pull/289
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="Calamine doesn't respect spaces in ods")
+            )
+
         basename = "test_spaces"
 
         actual = pd.read_excel(basename + read_ext)
@@ -790,12 +816,6 @@ def test_date_conversion_overflow(self, request, engine, read_ext):
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
             )
-        if engine == "calamine" and read_ext in {".xls", ".xlsb", ".ods"}:
-            request.node.add_marker(
-                pytest.mark.xfail(
-                    reason="Calamine support parsing datetime only in xlsx"
-                )
-            )
 
         expected = DataFrame(
             [
@@ -806,6 +826,11 @@ def test_date_conversion_overflow(self, request, engine, read_ext):
             columns=["DateColWithBigInt", "StringCol"],
         )
 
+        if engine == "calamine":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="Maybe not supported by calamine")
+            )
+
         if engine == "openpyxl":
             request.node.add_marker(
                 pytest.mark.xfail(reason="Maybe not supported by openpyxl")
@@ -1008,6 +1033,12 @@ def test_reader_seconds(self, request, engine, read_ext):
                     reason="Calamine support parsing datetime only in xlsx"
                 )
             )
+        if engine == "calamine":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="Calamine doesn't support parsing milliseconds in datetime"
+                )
+            )
 
         # Test reading times with and without milliseconds. GH5945.
         expected = DataFrame.from_dict(
@@ -1174,10 +1205,17 @@ def test_read_excel_multiindex_blank_after_name(
         )
         tm.assert_frame_equal(result, expected)
 
-    def test_read_excel_multiindex_header_only(self, read_ext):
+    def test_read_excel_multiindex_header_only(self, request, engine, read_ext):
         # see gh-11733.
         #
         # Don't try to parse a header name if there isn't one.
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="Calamine doesn't support 'number-rows-repeated' in ods"
+                )
+            )
+
         mi_file = "testmultiindex" + read_ext
         result = pd.read_excel(mi_file, sheet_name="index_col_none", header=[0, 1])
 
@@ -1418,8 +1456,15 @@ def test_deprecated_kwargs(self, read_ext):
         with pytest.raises(TypeError, match="but 3 positional arguments"):
             pd.read_excel("test1" + read_ext, "Sheet1", 0)
 
-    def test_no_header_with_list_index_col(self, read_ext):
+    def test_no_header_with_list_index_col(self, request, engine, read_ext):
         # GH 31783
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="Calamine doesn't support 'number-rows-repeated' in ods"
+                )
+            )
+
         file_name = "testmultiindex" + read_ext
         data = [("B", "B"), ("key", "val"), (3, 4), (3, 4)]
         idx = MultiIndex.from_tuples(
@@ -1439,8 +1484,15 @@ def test_one_col_noskip_blank_line(self, read_ext):
         result = pd.read_excel(file_name)
         tm.assert_frame_equal(result, expected)
 
-    def test_multiheader_two_blank_lines(self, read_ext):
+    def test_multiheader_two_blank_lines(self, request, engine, read_ext):
         # GH 40442
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="Calamine doesn't support 'number-rows-repeated' in ods"
+                )
+            )
+
         file_name = "testmultiindex" + read_ext
         columns = MultiIndex.from_tuples([("a", "A"), ("b", "B")])
         data = [[np.nan, np.nan], [np.nan, np.nan], [1, 3], [2, 4]]
@@ -1703,7 +1755,7 @@ def test_excel_read_binary(self, engine, read_ext):
     def test_excel_read_binary_via_read_excel(self, read_ext, engine):
         # GH 38424
         with open("test1" + read_ext, "rb") as f:
-            result = pd.read_excel(f)
+            result = pd.read_excel(f, engine=engine)
         expected = pd.read_excel("test1" + read_ext, engine=engine)
         tm.assert_frame_equal(result, expected)