[fix] Fix mypy issues

nabenabe0928 · nabenabe0928 · commit 9a7ce79fb334 · 2022-03-02T23:46:14.000+09:00
diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
@@ -21,6 +21,7 @@
 from autoPyTorch.data.utils import (
     DatasetCompressionInputType,
     DatasetDTypeContainerType,
+    ispandas,
     reduce_dataset_size_if_too_large
 )
 from autoPyTorch.utils.common import autoPyTorchEnum
@@ -211,7 +212,7 @@ def _fit(self, X: SupportedFeatTypes) -> BaseEstimator:
         if isinstance(X, np.ndarray):
             X = self.numpy_to_pandas(X)
 
-        if hasattr(X, "iloc") and not issparse(X):
+        if ispandas(X) and not issparse(X):
             X = cast(pd.DataFrame, X)
             X = self._convert_all_nan_columns_to_numeric(X, fit=True)
             self.enc_columns, self.feat_type = self._get_columns_to_encode(X)
@@ -328,7 +329,7 @@ def transform(self, X: SupportedFeatTypes) -> Union[np.ndarray, spmatrix, pd.Dat
 
         # If a list was provided, it will be converted to pandas
         X = self.list_to_pandas(X) if isinstance(X, list) else self.numpy_to_pandas(X)
-        if hasattr(X, "iloc") and not issparse(X):
+        if ispandas(X) and not issparse(X):
             X = self._convert_all_nan_columns_to_numeric(X)
         if len(self.categorical_columns) > 0:
             X = self._adapt_categorical_columns_to_train_data(X)
@@ -375,7 +376,7 @@ def _compress_dataset(self, X: DatasetCompressionInputType) -> DatasetCompressio
             DatasetCompressionInputType:
                 Compressed dataset.
         """
-        is_dataframe = hasattr(X, 'iloc')
+        is_dataframe = ispandas(X)
         is_reducible_type = isinstance(X, np.ndarray) or issparse(X) or is_dataframe
         if not is_reducible_type or self._dataset_compression is None:
             return X
@@ -431,17 +432,16 @@ def _check_data(self, X: SupportedFeatTypes) -> None:
                 f"but got type {str(type(X))} in the current features. This change might cause problems"
             )
 
-        # Do not support category/string numpy data. Only numbers
-        if hasattr(X, "dtype") and not np.issubdtype(X.dtype.type, np.number):  # type: ignore[union-attr]
+        if ispandas(X):  # For pandas, no support of nan in categorical cols
+            self._check_dataframe(X)
+
+        # For ndarray, no support of category/string
+        if isinstance(X, np.ndarray) and not np.issubdtype(X.dtype.type, np.number):
+            dt = X.dtype.type
             raise ValueError(
-                "AutoPyTorch does not support numpy.ndarray with non-numerical dtype, "
-                f"but got {X.dtype.type}"  # type: ignore[union-attr]
+                f"AutoPyTorch does not support numpy.ndarray with non-numerical dtype, but got {dt}"
             )
 
-        # Then for Pandas, we do not support Nan in categorical columns
-        if hasattr(X, "iloc"):
-            self._check_dataframe(X)
-
     def _get_columns_to_encode(
         self,
         X: pd.DataFrame,
diff --git a/autoPyTorch/data/utils.py b/autoPyTorch/data/utils.py
@@ -37,6 +37,11 @@
 }
 
 
+def ispandas(X: Any) -> bool:
+    """ Whether X is pandas.DataFrame or pandas.Series """
+    return hasattr(X, "iloc")
+
+
 def get_dataset_compression_mapping(
     memory_limit: int,
     dataset_compression: Union[bool, Mapping[str, Any]]