Skip to content

Commit

Permalink
[style] Modify data/
Browse files Browse the repository at this point in the history
[style] Modify data/base_feature_validator.py

[style] Modify data/base_validator.py and base_target_validator.py

[style] Modify data/base_validator.py and tabular_xxx_validator.py

[style] Modify base_validator.py
  • Loading branch information
nabenabe0928 committed Oct 26, 2021
1 parent 715438d commit f879bd3
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 105 deletions.
55 changes: 27 additions & 28 deletions autoPyTorch/data/base_feature_validator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
import typing
from typing import List, Optional, Set, Tuple, Union

import numpy as np

Expand All @@ -12,8 +12,8 @@
from autoPyTorch.utils.logging_ import PicklableClientLogger


SUPPORTED_FEAT_TYPES = typing.Union[
typing.List,
SUPPORTED_FEAT_TYPES = Union[
List,
pd.DataFrame,
np.ndarray,
scipy.sparse.bsr_matrix,
Expand All @@ -29,58 +29,57 @@
class BaseFeatureValidator(BaseEstimator):
"""
A class to pre-process features. In this regards, the format of the data is checked,
and if applicable, features are encoded
and if applicable, features are encoded.
Attributes:
feat_type (List[str]):
List of the column types found by this estimator during fit.
data_type (str):
Class name of the data type provided during fit.
encoder (typing.Optional[BaseEstimator])
encoder (Optional[BaseEstimator])
Host a encoder object if the data requires transformation (for example,
if provided a categorical column in a pandas DataFrame)
enc_columns (typing.List[str])
List of columns that were encoded.
if provided a categorical column in a pandas DataFrame).
"""
def __init__(self,
logger: typing.Optional[typing.Union[PicklableClientLogger, logging.Logger
]] = None,
) -> None:
def __init__(
self,
logger: Optional[Union[PicklableClientLogger, logging.Logger]] = None,
):
# Register types to detect unsupported data format changes
self.feat_type = None # type: typing.Optional[typing.List[str]]
self.data_type = None # type: typing.Optional[type]
self.dtypes = [] # type: typing.List[str]
self.column_order = [] # type: typing.List[str]
self.feat_type: Optional[List[str]] = None
self.data_type: Optional[type] = None
self.dtypes: List[str] = []
self.column_order: List[str] = []

self.encoder = None # type: typing.Optional[BaseEstimator]
self.enc_columns = [] # type: typing.List[str]
self.encoder = None # type: Optional[BaseEstimator]
self.enc_columns = [] # type: List[str]

self.logger: typing.Union[
self.logger: Union[
PicklableClientLogger, logging.Logger
] = logger if logger is not None else logging.getLogger(__name__)

# Required for dataset properties
self.num_features = None # type: typing.Optional[int]
self.categories = [] # type: typing.List[typing.List[int]]
self.categorical_columns: typing.List[int] = []
self.numerical_columns: typing.List[int] = []
self.num_features: Optional[int] = None
self.categories: List[List[int]] = []
self.categorical_columns: List[int] = []
self.numerical_columns: List[int] = []

self._is_fitted = False

def fit(
self,
X_train: SUPPORTED_FEAT_TYPES,
X_test: typing.Optional[SUPPORTED_FEAT_TYPES] = None,
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
) -> BaseEstimator:
"""
Validates and fit a categorical encoder (if needed) to the features.
The supported data types are List, numpy arrays and pandas DataFrames.
CSR sparse data types are also supported
Arguments:
Args:
X_train (SUPPORTED_FEAT_TYPES):
A set of features that are going to be validated (type and dimensionality
checks) and a encoder fitted in the case the data needs encoding
X_test (typing.Optional[SUPPORTED_FEAT_TYPES]):
X_test (Optional[SUPPORTED_FEAT_TYPES]):
A hold out set of data used for checking
"""

Expand Down Expand Up @@ -112,7 +111,7 @@ def _fit(
X: SUPPORTED_FEAT_TYPES,
) -> BaseEstimator:
"""
Arguments:
Args:
X (SUPPORTED_FEAT_TYPES):
A set of features that are going to be validated (type and dimensionality
checks) and a encoder fitted in the case the data needs encoding
Expand All @@ -127,7 +126,7 @@ def transform(
X: SUPPORTED_FEAT_TYPES,
) -> np.ndarray:
"""
Arguments:
Args:
X_train (SUPPORTED_FEAT_TYPES):
A set of features, whose categorical features are going to be
transformed
Expand Down
53 changes: 28 additions & 25 deletions autoPyTorch/data/base_target_validator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
import typing
from typing import List, Optional, Union, cast

import numpy as np

Expand All @@ -12,8 +12,8 @@
from autoPyTorch.utils.logging_ import PicklableClientLogger


SUPPORTED_TARGET_TYPES = typing.Union[
typing.List,
SUPPORTED_TARGET_TYPES = Union[
List,
pd.Series,
pd.DataFrame,
np.ndarray,
Expand All @@ -35,48 +35,50 @@ class BaseTargetValidator(BaseEstimator):
is_classification (bool):
A bool that indicates if the validator should operate in classification mode.
During classification, the targets are encoded.
encoder (typing.Optional[BaseEstimator]):
encoder (Optional[BaseEstimator]):
Host a encoder object if the data requires transformation (for example,
if provided a categorical column in a pandas DataFrame)
enc_columns (typing.List[str])
enc_columns (List[str])
List of columns that where encoded
"""
def __init__(self,
is_classification: bool = False,
logger: typing.Optional[typing.Union[PicklableClientLogger, logging.Logger
]] = None,
) -> None:
logger: Optional[Union[PicklableClientLogger,
logging.Logger
]
] = None,
):
self.is_classification = is_classification

self.data_type = None # type: typing.Optional[type]
self.data_type: Optional[type] = None

self.encoder = None # type: typing.Optional[BaseEstimator]
self.encoder: Optional[BaseEstimator] = None

self.out_dimensionality = None # type: typing.Optional[int]
self.type_of_target = None # type: typing.Optional[str]
self.out_dimensionality: Optional[int] = None
self.type_of_target: Optional[str] = None

self.logger: typing.Union[
self.logger: Union[
PicklableClientLogger, logging.Logger
] = logger if logger is not None else logging.getLogger(__name__)

# Store the dtype for remapping to correct type
self.dtype = None # type: typing.Optional[type]
self.dtype: Optional[type] = None

self._is_fitted = False

def fit(
self,
y_train: SUPPORTED_TARGET_TYPES,
y_test: typing.Optional[SUPPORTED_TARGET_TYPES] = None,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
) -> BaseEstimator:
"""
Validates and fit a categorical encoder (if needed) to the targets
The supported data types are List, numpy arrays and pandas DataFrames.
Arguments:
Args:
y_train (SUPPORTED_TARGET_TYPES)
A set of targets set aside for training
y_test (typing.Union[SUPPORTED_TARGET_TYPES])
y_test (Union[SUPPORTED_TARGET_TYPES])
A hold out set of data used of the targets. It is also used to fit the
categories of the encoder.
"""
Expand All @@ -95,7 +97,8 @@ def fit(
np.shape(y_test)
))
if isinstance(y_train, pd.DataFrame):
y_test = typing.cast(pd.DataFrame, y_test)
y_train = cast(pd.DataFrame, y_train)
y_test = cast(pd.DataFrame, y_test)
if y_train.columns.tolist() != y_test.columns.tolist():
raise ValueError(
"Train and test targets must both have the same columns, yet "
Expand Down Expand Up @@ -126,24 +129,24 @@ def fit(
def _fit(
self,
y_train: SUPPORTED_TARGET_TYPES,
y_test: typing.Optional[SUPPORTED_TARGET_TYPES] = None,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
) -> BaseEstimator:
"""
Arguments:
Args:
y_train (SUPPORTED_TARGET_TYPES)
The labels of the current task. They are going to be encoded in case
of classification
y_test (typing.Optional[SUPPORTED_TARGET_TYPES])
y_test (Optional[SUPPORTED_TARGET_TYPES])
A holdout set of labels
"""
raise NotImplementedError()

def transform(
self,
y: typing.Union[SUPPORTED_TARGET_TYPES],
y: Union[SUPPORTED_TARGET_TYPES],
) -> np.ndarray:
"""
Arguments:
Args:
y (SUPPORTED_TARGET_TYPES)
A set of targets that are going to be encoded if the current task
is classification
Expand All @@ -160,8 +163,8 @@ def inverse_transform(
"""
Revert any encoding transformation done on a target array
Arguments:
y (typing.Union[np.ndarray, pd.DataFrame, pd.Series]):
Args:
y (Union[np.ndarray, pd.DataFrame, pd.Series]):
Target array to be transformed back to original form before encoding
Returns:
np.ndarray:
Expand Down
20 changes: 10 additions & 10 deletions autoPyTorch/data/base_validator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- encoding: utf-8 -*-
import logging.handlers
import typing
from typing import Optional, Tuple

import numpy as np

Expand Down Expand Up @@ -34,16 +34,16 @@ class BaseInputValidator(BaseEstimator):
def __init__(
self,
is_classification: bool = False,
logger_port: typing.Optional[int] = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
logger_port: Optional[int] = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
) -> None:
raise NotImplementedError()

def fit(
self,
X_train: SUPPORTED_FEAT_TYPES,
y_train: SUPPORTED_TARGET_TYPES,
X_test: typing.Optional[SUPPORTED_FEAT_TYPES] = None,
y_test: typing.Optional[SUPPORTED_TARGET_TYPES] = None,
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
) -> BaseEstimator:
"""
Validates and fit a categorical encoder (if needed) to the features, and
Expand All @@ -58,14 +58,14 @@ def fit(
+ Checks for dimensionality as well as missing values are performed.
+ If performing a classification task, the data is going to be encoded
Arguments:
Args:
X_train (SUPPORTED_FEAT_TYPES):
A set of features that are going to be validated (type and dimensionality
checks). If this data contains categorical columns, an encoder is going to
be instantiated and trained with this data.
y_train (SUPPORTED_TARGET_TYPES):
A set of targets that are going to be encoded if the task is for classification
X_test (typing.Optional[SUPPORTED_FEAT_TYPES]):
X_test (Optional[SUPPORTED_FEAT_TYPES]):
A hold out set of features used for checking
y_test (SUPPORTED_TARGET_TYPES):
A hold out set of targets used for checking. Additionally, if the current task
Expand Down Expand Up @@ -97,15 +97,15 @@ def fit(
def transform(
self,
X: SUPPORTED_FEAT_TYPES,
y: typing.Optional[SUPPORTED_TARGET_TYPES] = None,
) -> typing.Tuple[np.ndarray, typing.Optional[np.ndarray]]:
y: Optional[SUPPORTED_TARGET_TYPES] = None,
) -> Tuple[np.ndarray, Optional[np.ndarray]]:
"""
Transform the given target or features to a numpy array
Arguments:
Args:
X (SUPPORTED_FEAT_TYPES):
A set of features to transform
y (typing.Optional[SUPPORTED_TARGET_TYPES]):
y (Optional[SUPPORTED_TARGET_TYPES]):
A set of targets to transform
Returns:
Expand Down
Loading

0 comments on commit f879bd3

Please sign in to comment.