|
21 | 21 | from autoPyTorch.data.utils import (
|
22 | 22 | DatasetCompressionInputType,
|
23 | 23 | DatasetDTypeContainerType,
|
| 24 | + ispandas, |
24 | 25 | reduce_dataset_size_if_too_large
|
25 | 26 | )
|
26 | 27 | from autoPyTorch.utils.common import autoPyTorchEnum
|
@@ -211,7 +212,7 @@ def _fit(self, X: SupportedFeatTypes) -> BaseEstimator:
|
211 | 212 | if isinstance(X, np.ndarray):
|
212 | 213 | X = self.numpy_to_pandas(X)
|
213 | 214 |
|
214 |
| - if hasattr(X, "iloc") and not issparse(X): |
| 215 | + if ispandas(X) and not issparse(X): |
215 | 216 | X = cast(pd.DataFrame, X)
|
216 | 217 | X = self._convert_all_nan_columns_to_numeric(X, fit=True)
|
217 | 218 | self.enc_columns, self.feat_type = self._get_columns_to_encode(X)
|
@@ -328,7 +329,7 @@ def transform(self, X: SupportedFeatTypes) -> Union[np.ndarray, spmatrix, pd.Dat
|
328 | 329 |
|
329 | 330 | # If a list was provided, it will be converted to pandas
|
330 | 331 | X = self.list_to_pandas(X) if isinstance(X, list) else self.numpy_to_pandas(X)
|
331 |
| - if hasattr(X, "iloc") and not issparse(X): |
| 332 | + if ispandas(X) and not issparse(X): |
332 | 333 | X = self._convert_all_nan_columns_to_numeric(X)
|
333 | 334 | if len(self.categorical_columns) > 0:
|
334 | 335 | X = self._adapt_categorical_columns_to_train_data(X)
|
@@ -375,7 +376,7 @@ def _compress_dataset(self, X: DatasetCompressionInputType) -> DatasetCompressio
|
375 | 376 | DatasetCompressionInputType:
|
376 | 377 | Compressed dataset.
|
377 | 378 | """
|
378 |
| - is_dataframe = hasattr(X, 'iloc') |
| 379 | + is_dataframe = ispandas(X) |
379 | 380 | is_reducible_type = isinstance(X, np.ndarray) or issparse(X) or is_dataframe
|
380 | 381 | if not is_reducible_type or self._dataset_compression is None:
|
381 | 382 | return X
|
@@ -431,17 +432,16 @@ def _check_data(self, X: SupportedFeatTypes) -> None:
|
431 | 432 | f"but got type {str(type(X))} in the current features. This change might cause problems"
|
432 | 433 | )
|
433 | 434 |
|
434 |
| - # Do not support category/string numpy data. Only numbers |
435 |
| - if hasattr(X, "dtype") and not np.issubdtype(X.dtype.type, np.number): # type: ignore[union-attr] |
| 435 | + if ispandas(X): # For pandas, no support of nan in categorical cols |
| 436 | + self._check_dataframe(X) |
| 437 | + |
| 438 | + # For ndarray, no support of category/string |
| 439 | + if isinstance(X, np.ndarray) and not np.issubdtype(X.dtype.type, np.number): |
| 440 | + dt = X.dtype.type |
436 | 441 | raise ValueError(
|
437 |
| - "AutoPyTorch does not support numpy.ndarray with non-numerical dtype, " |
438 |
| - f"but got {X.dtype.type}" # type: ignore[union-attr] |
| 442 | + f"AutoPyTorch does not support numpy.ndarray with non-numerical dtype, but got {dt}" |
439 | 443 | )
|
440 | 444 |
|
441 |
| - # Then for Pandas, we do not support Nan in categorical columns |
442 |
| - if hasattr(X, "iloc"): |
443 |
| - self._check_dataframe(X) |
444 |
| - |
445 | 445 | def _get_columns_to_encode(
|
446 | 446 | self,
|
447 | 447 | X: pd.DataFrame,
|
|
0 commit comments