Skip to content

Commit

Permalink
[style] Remove prefix typing and adapt to google style doc
Browse files Browse the repository at this point in the history
The changes are the following:
1. [google doc style] Arguments --> Args
2. [style] typing.xxx --> xxx
3. [mypy] torch.tensor --> torch.Tensor
  • Loading branch information
nabenabe0928 committed Nov 9, 2021
1 parent 4a29852 commit ea3dc91
Show file tree
Hide file tree
Showing 45 changed files with 390 additions and 299 deletions.
2 changes: 1 addition & 1 deletion autoPyTorch/api/base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def __init__(
self.resampling_strategy = resampling_strategy
self.resampling_strategy_args = resampling_strategy_args

self.stop_logging_server = None # type: Optional[multiprocessing.synchronize.Event]
self.stop_logging_server: Optional[multiprocessing.synchronize.Event] = None

# Single core, local runs should use fork
# to prevent the __main__ requirements in
Expand Down
54 changes: 29 additions & 25 deletions autoPyTorch/data/base_feature_validator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
import typing
from typing import List, Optional, Union

import numpy as np

Expand All @@ -12,8 +12,8 @@
from autoPyTorch.utils.logging_ import PicklableClientLogger


SUPPORTED_FEAT_TYPES = typing.Union[
typing.List,
SUPPORTED_FEAT_TYPES = Union[
List,
pd.DataFrame,
np.ndarray,
scipy.sparse.bsr_matrix,
Expand All @@ -29,54 +29,58 @@
class BaseFeatureValidator(BaseEstimator):
"""
A class to pre-process features. In this regards, the format of the data is checked,
and if applicable, features are encoded
and if applicable, features are encoded.
Attributes:
feat_type (List[str]):
List of the column types found by this estimator during fit.
data_type (str):
Class name of the data type provided during fit.
column_transformer (Optional[BaseEstimator])
Host a encoder object if the data requires transformation (for example,
if provided a categorical column in a pandas DataFrame)
transformed_columns (List[str])
List of columns that were encoded.
"""
def __init__(self,
logger: typing.Optional[typing.Union[PicklableClientLogger, logging.Logger
]] = None,
) -> None:
def __init__(
self,
logger: Optional[Union[PicklableClientLogger, logging.Logger]] = None,
):
# Register types to detect unsupported data format changes
self.feat_type = None # type: typing.Optional[typing.List[str]]
self.data_type = None # type: typing.Optional[type]
self.dtypes = [] # type: typing.List[str]
self.column_order = [] # type: typing.List[str]
self.feat_type: Optional[List[str]] = None
self.data_type: Optional[type] = None
self.dtypes: List[str] = []
self.column_order: List[str] = []

self.column_transformer = None # type: typing.Optional[BaseEstimator]
self.transformed_columns = [] # type: typing.List[str]
self.column_transformer: Optional[BaseEstimator] = None
self.transformed_columns: List[str] = []

self.logger: typing.Union[
self.logger: Union[
PicklableClientLogger, logging.Logger
] = logger if logger is not None else logging.getLogger(__name__)

# Required for dataset properties
self.num_features = None # type: typing.Optional[int]
self.categories = [] # type: typing.List[typing.List[int]]

self.categorical_columns: typing.List[int] = []
self.numerical_columns: typing.List[int] = []
self.num_features: Optional[int] = None
self.categories: List[List[int]] = []
self.categorical_columns: List[int] = []
self.numerical_columns: List[int] = []

self._is_fitted = False

def fit(
self,
X_train: SUPPORTED_FEAT_TYPES,
X_test: typing.Optional[SUPPORTED_FEAT_TYPES] = None,
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
) -> BaseEstimator:
"""
Validates and fit a categorical encoder (if needed) to the features.
The supported data types are List, numpy arrays and pandas DataFrames.
CSR sparse data types are also supported
Arguments:
Args:
X_train (SUPPORTED_FEAT_TYPES):
A set of features that are going to be validated (type and dimensionality
checks) and a encoder fitted in the case the data needs encoding
X_test (typing.Optional[SUPPORTED_FEAT_TYPES]):
X_test (Optional[SUPPORTED_FEAT_TYPES]):
A hold out set of data used for checking
"""

Expand Down Expand Up @@ -108,7 +112,7 @@ def _fit(
X: SUPPORTED_FEAT_TYPES,
) -> BaseEstimator:
"""
Arguments:
Args:
X (SUPPORTED_FEAT_TYPES):
A set of features that are going to be validated (type and dimensionality
checks) and a encoder fitted in the case the data needs encoding
Expand All @@ -123,7 +127,7 @@ def transform(
X: SUPPORTED_FEAT_TYPES,
) -> np.ndarray:
"""
Arguments:
Args:
X_train (SUPPORTED_FEAT_TYPES):
A set of features, whose categorical features are going to be
transformed
Expand Down
55 changes: 30 additions & 25 deletions autoPyTorch/data/base_target_validator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
import typing
from typing import List, Optional, Union, cast

import numpy as np

Expand All @@ -12,8 +12,8 @@
from autoPyTorch.utils.logging_ import PicklableClientLogger


SUPPORTED_TARGET_TYPES = typing.Union[
typing.List,
SUPPORTED_TARGET_TYPES = Union[
List,
pd.Series,
pd.DataFrame,
np.ndarray,
Expand All @@ -31,52 +31,55 @@ class BaseTargetValidator(BaseEstimator):
"""
A class to pre-process targets. It validates the data provided during fit (to make sure
it matches AutoPyTorch expectation) as well as encoding the targets in case of classification
Attributes:
is_classification (bool):
A bool that indicates if the validator should operate in classification mode.
During classification, the targets are encoded.
encoder (typing.Optional[BaseEstimator]):
encoder (Optional[BaseEstimator]):
Host a encoder object if the data requires transformation (for example,
if provided a categorical column in a pandas DataFrame)
enc_columns (typing.List[str])
enc_columns (List[str])
List of columns that where encoded
"""
def __init__(self,
is_classification: bool = False,
logger: typing.Optional[typing.Union[PicklableClientLogger, logging.Logger
]] = None,
) -> None:
logger: Optional[Union[PicklableClientLogger,
logging.Logger
]
] = None,
):
self.is_classification = is_classification

self.data_type = None # type: typing.Optional[type]
self.data_type: Optional[type] = None

self.encoder = None # type: typing.Optional[BaseEstimator]
self.encoder: Optional[BaseEstimator] = None

self.out_dimensionality = None # type: typing.Optional[int]
self.type_of_target = None # type: typing.Optional[str]
self.out_dimensionality: Optional[int] = None
self.type_of_target: Optional[str] = None

self.logger: typing.Union[
self.logger: Union[
PicklableClientLogger, logging.Logger
] = logger if logger is not None else logging.getLogger(__name__)

# Store the dtype for remapping to correct type
self.dtype = None # type: typing.Optional[type]
self.dtype: Optional[type] = None

self._is_fitted = False

def fit(
self,
y_train: SUPPORTED_TARGET_TYPES,
y_test: typing.Optional[SUPPORTED_TARGET_TYPES] = None,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
) -> BaseEstimator:
"""
Validates and fit a categorical encoder (if needed) to the targets
The supported data types are List, numpy arrays and pandas DataFrames.
Arguments:
Args:
y_train (SUPPORTED_TARGET_TYPES)
A set of targets set aside for training
y_test (typing.Union[SUPPORTED_TARGET_TYPES])
y_test (Union[SUPPORTED_TARGET_TYPES])
A hold out set of data used of the targets. It is also used to fit the
categories of the encoder.
"""
Expand All @@ -95,7 +98,7 @@ def fit(
np.shape(y_test)
))
if isinstance(y_train, pd.DataFrame):
y_test = typing.cast(pd.DataFrame, y_test)
y_test = cast(pd.DataFrame, y_test)
if y_train.columns.tolist() != y_test.columns.tolist():
raise ValueError(
"Train and test targets must both have the same columns, yet "
Expand Down Expand Up @@ -126,24 +129,24 @@ def fit(
def _fit(
self,
y_train: SUPPORTED_TARGET_TYPES,
y_test: typing.Optional[SUPPORTED_TARGET_TYPES] = None,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
) -> BaseEstimator:
"""
Arguments:
Args:
y_train (SUPPORTED_TARGET_TYPES)
The labels of the current task. They are going to be encoded in case
of classification
y_test (typing.Optional[SUPPORTED_TARGET_TYPES])
y_test (Optional[SUPPORTED_TARGET_TYPES])
A holdout set of labels
"""
raise NotImplementedError()

def transform(
self,
y: typing.Union[SUPPORTED_TARGET_TYPES],
y: Union[SUPPORTED_TARGET_TYPES],
) -> np.ndarray:
"""
Arguments:
Args:
y (SUPPORTED_TARGET_TYPES)
A set of targets that are going to be encoded if the current task
is classification
Expand All @@ -160,9 +163,10 @@ def inverse_transform(
"""
Revert any encoding transformation done on a target array
Arguments:
y (typing.Union[np.ndarray, pd.DataFrame, pd.Series]):
Args:
y (Union[np.ndarray, pd.DataFrame, pd.Series]):
Target array to be transformed back to original form before encoding
Returns:
np.ndarray:
The transformed array
Expand All @@ -176,6 +180,7 @@ def classes_(self) -> np.ndarray:
which consist of a ndarray of shape (n_classes,)
where n_classes are the number of classes seen while fitting
a encoder to the targets.
Returns:
classes_: np.ndarray
The unique classes seen during encoding of a classifier
Expand Down
20 changes: 10 additions & 10 deletions autoPyTorch/data/base_validator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- encoding: utf-8 -*-
import logging.handlers
import typing
from typing import Optional, Tuple

import numpy as np

Expand Down Expand Up @@ -34,16 +34,16 @@ class BaseInputValidator(BaseEstimator):
def __init__(
self,
is_classification: bool = False,
logger_port: typing.Optional[int] = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
logger_port: Optional[int] = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
) -> None:
raise NotImplementedError()

def fit(
self,
X_train: SUPPORTED_FEAT_TYPES,
y_train: SUPPORTED_TARGET_TYPES,
X_test: typing.Optional[SUPPORTED_FEAT_TYPES] = None,
y_test: typing.Optional[SUPPORTED_TARGET_TYPES] = None,
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
) -> BaseEstimator:
"""
Validates and fit a categorical encoder (if needed) to the features, and
Expand All @@ -58,14 +58,14 @@ def fit(
+ Checks for dimensionality as well as missing values are performed.
+ If performing a classification task, the data is going to be encoded
Arguments:
Args:
X_train (SUPPORTED_FEAT_TYPES):
A set of features that are going to be validated (type and dimensionality
checks). If this data contains categorical columns, an encoder is going to
be instantiated and trained with this data.
y_train (SUPPORTED_TARGET_TYPES):
A set of targets that are going to be encoded if the task is for classification
X_test (typing.Optional[SUPPORTED_FEAT_TYPES]):
X_test (Optional[SUPPORTED_FEAT_TYPES]):
A hold out set of features used for checking
y_test (SUPPORTED_TARGET_TYPES):
A hold out set of targets used for checking. Additionally, if the current task
Expand Down Expand Up @@ -97,15 +97,15 @@ def fit(
def transform(
self,
X: SUPPORTED_FEAT_TYPES,
y: typing.Optional[SUPPORTED_TARGET_TYPES] = None,
) -> typing.Tuple[np.ndarray, typing.Optional[np.ndarray]]:
y: Optional[SUPPORTED_TARGET_TYPES] = None,
) -> Tuple[np.ndarray, Optional[np.ndarray]]:
"""
Transform the given target or features to a numpy array
Arguments:
Args:
X (SUPPORTED_FEAT_TYPES):
A set of features to transform
y (typing.Optional[SUPPORTED_TARGET_TYPES]):
y (Optional[SUPPORTED_TARGET_TYPES]):
A set of targets to transform
Returns:
Expand Down
Loading

0 comments on commit ea3dc91

Please sign in to comment.