Skip to content

Commit

Permalink
Adapt the BooleanTransformer to the new BaseTransformer
Browse files Browse the repository at this point in the history
  • Loading branch information
csala committed Sep 23, 2021
1 parent 5f994f9 commit caa22d5
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 57 deletions.
34 changes: 19 additions & 15 deletions rdt/transformers/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ class BooleanTransformer(BaseTransformer):
If ``False``, do not create the new column.
Defaults to ``None``.
"""

INPUT_TYPE = 'boolean'
DETERMINISTIC_TRANSFORM = True
DETERMINISTIC_REVERSE = True
COMPOSITION_IS_IDENTITY = True

null_transformer = None

Expand All @@ -38,19 +38,19 @@ def __init__(self, nan=-1, null_column=None):
self.null_column = null_column

def get_output_types(self):
"""Return the output types supported by the transformer.
"""Return the output types returned by this transformer.
Returns:
dict:
Mapping from the transformed column names to supported data types.
Mapping from the transformed column names to the produced data types.
"""
if self.null_transformer._null_column: # whether an extra column is generated
return {
f'{self._columns[0]}': 'float',
f'{self._columns[0]}.is_null': 'bool',
}
output_types = {
'value': 'float',
}
if self.null_transformer and self.null_transformer.creates_null_column():
output_types['is_null'] = 'float'

return {self._columns[0]: 'float'}
return self._add_prefix(output_types)

def _fit(self, data):
"""Fit the transformer to the data.
Expand All @@ -63,7 +63,7 @@ def _fit(self, data):
data = pd.Series(data)

self.null_transformer = NullTransformer(self.nan, self.null_column, copy=True)
self.null_transformer._fit(data)
self.null_transformer.fit(data)

def _transform(self, data):
"""Transform boolean to float.
Expand All @@ -76,27 +76,31 @@ def _transform(self, data):
Data to transform.
Returns
numpy.ndarray
pandas.DataFrame or pandas.Series
"""
if isinstance(data, np.ndarray):
data = pd.Series(data)

data = pd.to_numeric(data, errors='coerce')

return self.null_transformer._transform(data).astype(float)
return self.null_transformer.transform(data).astype(float)

def _reverse_transform(self, data):
"""Transform float values back to the original boolean values.
Args:
data (numpy.ndarray):
data (pandas.DataFrame or pandas.Series):
Data to revert.
Returns:
pandas.Series
pandas.Series:
Reverted data.
"""
if not isinstance(data, np.ndarray):
data = data.values

if self.nan is not None:
data = self.null_transformer._reverse_transform(data)
data = self.null_transformer.reverse_transform(data)

if isinstance(data, np.ndarray):
if data.ndim == 2:
Expand Down
18 changes: 9 additions & 9 deletions tests/integration/transformers/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ def test_boolean_some_nans(self):
- The reversed transformed data
"""
# Setup
data = pd.Series([True, False, None, False])
data = pd.DataFrame([True, False, None, False], columns=['bool'])
transformer = BooleanTransformer()

# Run
transformer.fit(data)
transformer.fit(data, data.columns.to_list())
transformed = transformer.transform(data)
reverse = transformer.reverse_transform(transformed)

# Assert
pd.testing.assert_series_equal(reverse, data)
pd.testing.assert_frame_equal(reverse, data)

def test_boolean_all_nans(self):
"""Test BooleanTransformer on input with all nan values.
Expand All @@ -43,16 +43,16 @@ def test_boolean_all_nans(self):
- The reversed transformed data
"""
# Setup
data = pd.Series([None, None, None, None])
data = pd.DataFrame([None, None, None, None], columns=['bool'])
transformer = BooleanTransformer()

# Run
transformer.fit(data)
transformer.fit(data, data.columns.to_list())
transformed = transformer.transform(data)
reverse = transformer.reverse_transform(transformed)

# Assert
pd.testing.assert_series_equal(reverse, data)
pd.testing.assert_frame_equal(reverse, data)

def test_boolean_input_unchanged(self):
"""Test BooleanTransformer on input with some nan values.
Expand All @@ -69,15 +69,15 @@ def test_boolean_input_unchanged(self):
- The intermediate transformed data is unchanged.
"""
# Setup
data = pd.Series([True, False, None, False])
data = pd.DataFrame([True, False, None, False], columns=['bool'])
transformer = BooleanTransformer()

# Run
transformer.fit(data)
transformer.fit(data, data.columns.to_list())
transformed = transformer.transform(data)
unchanged_transformed = transformed.copy()
reverse = transformer.reverse_transform(transformed)

# Assert
pd.testing.assert_series_equal(reverse, data)
pd.testing.assert_frame_equal(reverse, data)
np.testing.assert_array_equal(unchanged_transformed, transformed)
66 changes: 33 additions & 33 deletions tests/unit/transformers/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ def test___init__(self):
self.assertEqual(transformer.nan, -1, "Unexpected nan")
self.assertIsNone(transformer.null_column, "null_column is None by default")

def test_fit_nan_ignore(self):
"""Test fit nan equal to ignore"""
def test__fit_nan_ignore(self):
"""Test _fit nan equal to ignore"""
# Setup
data = pd.Series([False, True, True, False, True])

# Run
transformer = BooleanTransformer(nan=None)
transformer.fit(data)
transformer._fit(data)

# Asserts
expect_fill_value = None
Expand All @@ -36,14 +36,14 @@ def test_fit_nan_ignore(self):
"Unexpected fill value"
)

def test_fit_nan_not_ignore(self):
"""Test fit nan not equal to ignore"""
def test__fit_nan_not_ignore(self):
"""Test _fit nan not equal to ignore"""
# Setup
data = pd.Series([False, True, True, False, True])

# Run
transformer = BooleanTransformer(nan=0)
transformer.fit(data)
transformer._fit(data)

# Asserts
expect_fill_value = 0
Expand All @@ -54,14 +54,14 @@ def test_fit_nan_not_ignore(self):
"Unexpected fill value"
)

def test_fit_array(self):
"""Test fit with numpy.array"""
def test__fit_array(self):
"""Test _fit with numpy.array"""
# Setup
data = np.array([False, True, True, False, True])

# Run
transformer = BooleanTransformer(nan=0)
transformer.fit(data)
transformer._fit(data)

# Asserts
expect_fill_value = 0
Expand All @@ -72,15 +72,15 @@ def test_fit_array(self):
"Unexpected fill value"
)

def test_transform_series(self):
def test__transform_series(self):
"""Test transform pandas.Series"""
# Setup
data = pd.Series([False, True, None, True, False])

# Run
transformer = Mock()

BooleanTransformer.transform(transformer, data)
BooleanTransformer._transform(transformer, data)

# Asserts
expect_call_count = 1
Expand All @@ -96,15 +96,15 @@ def test_transform_series(self):
expect_call_args
)

def test_transform_array(self):
def test__transform_array(self):
"""Test transform numpy.array"""
# Setup
data = np.array([False, True, None, True, False])

# Run
transformer = Mock()

BooleanTransformer.transform(transformer, data)
BooleanTransformer._transform(transformer, data)

# Asserts
expect_call_count = 1
Expand All @@ -120,16 +120,16 @@ def test_transform_array(self):
expect_call_args
)

def test_reverse_transform_nan_ignore(self):
"""Test reverse_transform with nan equal to ignore"""
def test__reverse_transform_nan_ignore(self):
"""Test _reverse_transform with nan equal to ignore"""
# Setup
data = np.array([0.0, 1.0, 0.0, 1.0, 0.0])

# Run
transformer = Mock()
transformer.nan = None

result = BooleanTransformer.reverse_transform(transformer, data)
result = BooleanTransformer._reverse_transform(transformer, data)

# Asserts
expect = np.array([False, True, False, True, False])
Expand All @@ -142,8 +142,8 @@ def test_reverse_transform_nan_ignore(self):
"NullTransformer.reverse_transform should not be called when nan is ignore"
)

def test_reverse_transform_nan_not_ignore(self):
"""Test reverse_transform with nan not equal to ignore"""
def test__reverse_transform_nan_not_ignore(self):
"""Test _reverse_transform with nan not equal to ignore"""
# Setup
data = np.array([0.0, 1.0, 0.0, 1.0, 0.0])
transformed_data = np.array([0.0, 1.0, 0.0, 1.0, 0.0])
Expand All @@ -153,7 +153,7 @@ def test_reverse_transform_nan_not_ignore(self):
transformer.nan = 0
transformer.null_transformer.reverse_transform.return_value = transformed_data

result = BooleanTransformer.reverse_transform(transformer, data)
result = BooleanTransformer._reverse_transform(transformer, data)

# Asserts
expect = np.array([False, True, False, True, False])
Expand All @@ -166,44 +166,44 @@ def test_reverse_transform_nan_not_ignore(self):
"NullTransformer.reverse_transform should not be called when nan is ignore"
)

def test_reverse_transform_not_null_values(self):
"""Test reverse_transform not null values correctly"""
def test__reverse_transform_not_null_values(self):
"""Test _reverse_transform not null values correctly"""
# Setup
data = np.array([1., 0., 1.])

# Run
transformer = Mock()
transformer.nan = None

result = BooleanTransformer.reverse_transform(transformer, data)
result = BooleanTransformer._reverse_transform(transformer, data)

# Asserts
expected = np.array([True, False, True])

assert isinstance(result, pd.Series)
np.testing.assert_equal(result.values, expected)

def test_reverse_transform_2d_ndarray(self):
"""Test reverse_transform not null values correctly"""
def test__reverse_transform_2d_ndarray(self):
"""Test _reverse_transform not null values correctly"""
# Setup
data = np.array([[1.], [0.], [1.]])

# Run
transformer = Mock()
transformer.nan = None

result = BooleanTransformer.reverse_transform(transformer, data)
result = BooleanTransformer._reverse_transform(transformer, data)

# Asserts
expected = np.array([True, False, True])

assert isinstance(result, pd.Series)
np.testing.assert_equal(result.values, expected)

def test_reverse_transform_float_values(self):
"""Test the ``reverse_transform`` method with decimals.
def test__reverse_transform_float_values(self):
"""Test the ``_reverse_transform`` method with decimals.
Expect that the ``reverse_transform`` method handles decimal inputs
Expect that the ``_reverse_transform`` method handles decimal inputs
correctly by rounding them.
Input:
Expand All @@ -217,18 +217,18 @@ def test_reverse_transform_float_values(self):
transformer.nan = None

# Run
result = BooleanTransformer.reverse_transform(transformer, data)
result = BooleanTransformer._reverse_transform(transformer, data)

# Asserts
expected = np.array([True, False, True])

assert isinstance(result, pd.Series)
np.testing.assert_equal(result.values, expected)

def test_reverse_transform_float_values_out_of_range(self):
"""Test the ``reverse_transform`` method with decimals that are out of range.
def test__reverse_transform_float_values_out_of_range(self):
"""Test the ``_reverse_transform`` method with decimals that are out of range.
Expect that the ``reverse_transform`` method handles decimal inputs
Expect that the ``_reverse_transform`` method handles decimal inputs
correctly by rounding them. If the rounded decimal inputs are < 0 or > 1, expect
expect them to be clipped.
Expand All @@ -243,7 +243,7 @@ def test_reverse_transform_float_values_out_of_range(self):
transformer.nan = None

# Run
result = BooleanTransformer.reverse_transform(transformer, data)
result = BooleanTransformer._reverse_transform(transformer, data)

# Asserts
expected = np.array([True, False, True])
Expand Down

0 comments on commit caa22d5

Please sign in to comment.