Skip to content

Commit

Permalink
add one indent
Browse files Browse the repository at this point in the history
  • Loading branch information
R-Palazzo committed Aug 13, 2024
1 parent fc75a82 commit ccef23b
Showing 1 changed file with 118 additions and 118 deletions.
236 changes: 118 additions & 118 deletions tests/integration/test_hyper_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2103,121 +2103,121 @@ def test_detect_unsigned_integer_dtypes(self):
assert config['transformers'][column_name].__class__.__name__ == 'FloatFormatter'


def test_numerical_dtype_handling():
"""Test that the HyperTransformer correctly handle all numerical dtypes."""
# Setup
original_data = pd.DataFrame({
'Int8': pd.Series([1, 2, 3, pd.NA], dtype='Int8'),
'Int16': pd.Series([1, 2, 3, pd.NA], dtype='Int16'),
'Int32': pd.Series([1, 2, 3, pd.NA], dtype='Int32'),
'Int64': pd.Series([1, 2, 3, pd.NA], dtype='Int64'),
'UInt8': pd.Series([1, 2, 3, pd.NA], dtype='UInt8'),
'UInt16': pd.Series([1, 2, 3, pd.NA], dtype='UInt16'),
'UInt32': pd.Series([1, 2, 3, pd.NA], dtype='UInt32'),
'UInt64': pd.Series([1, 2, 3, pd.NA], dtype='UInt64'),
'Float32': pd.Series([1.1, 2.2, 3.3, pd.NA], dtype='Float32'),
'Float64': pd.Series([1.1, 2.2, 3.3, pd.NA], dtype='Float64'),
'uint8': np.array([1, 2, 3, 4], dtype='uint8'),
'uint16': np.array([1, 2, 3, 4], dtype='uint16'),
'uint32': np.array([1, 2, 3, 4], dtype='uint32'),
'uint64': np.array([1, 2, 3, 4], dtype='uint64'),
'float': np.array([1.1, 2.2, 3.3, 4.4], dtype='float'),
'int8': np.array([1, 2, 3, 4], dtype='int8'),
'int16': np.array([1, 2, 3, 4], dtype='int16'),
'int32': np.array([1, 2, 3, 4], dtype='int32'),
'int64': np.array([1, 2, 3, 4], dtype='int64'),
})

ht = HyperTransformer()

# Run
ht.detect_initial_config(original_data)
ht.fit(original_data)
transformed_data = ht.transform(original_data)
reverse_transformed_data = ht.reverse_transform(transformed_data)

# Assert
assert transformed_data.dtypes.unique() == 'float'
for column in original_data.columns:
assert reverse_transformed_data[column].dtype == column


def test_numerical_handling_with_nans():
"""Test all numerical dtypes handling when there is NaN in the transformed data."""
# Setup
original_data = pd.DataFrame({
'Int8': pd.Series([1, 2, 3, pd.NA], dtype='Int8'),
'Int16': pd.Series([1, 2, 3, pd.NA], dtype='Int16'),
'Int32': pd.Series([1, 2, 3, pd.NA], dtype='Int32'),
'Int64': pd.Series([1, 2, 3, pd.NA], dtype='Int64'),
'UInt8': pd.Series([1, 2, 3, pd.NA], dtype='UInt8'),
'UInt16': pd.Series([1, 2, 3, pd.NA], dtype='UInt16'),
'UInt32': pd.Series([1, 2, 3, pd.NA], dtype='UInt32'),
'UInt64': pd.Series([1, 2, 3, pd.NA], dtype='UInt64'),
'Float32': pd.Series([1.1, 2.2, 3.3, pd.NA], dtype='Float32'),
'Float64': pd.Series([1.1, 2.2, 3.3, pd.NA], dtype='Float64'),
'uint8': np.array([1, 2, 3, 4], dtype='uint8'),
'uint16': np.array([1, 2, 3, 4], dtype='uint16'),
'uint32': np.array([1, 2, 3, 4], dtype='uint32'),
'uint64': np.array([1, 2, 3, 4], dtype='uint64'),
'float': np.array([1.1, 2.2, 3.3, 4.4], dtype='float'),
'int8': np.array([1, 2, 3, 4], dtype='int8'),
'int16': np.array([1, 2, 3, 4], dtype='int16'),
'int32': np.array([1, 2, 3, 4], dtype='int32'),
'int64': np.array([1, 2, 3, 4], dtype='int64'),
})

data_with_nans = pd.DataFrame({
'Int8': [1.1, 2.2, 3.3, np.nan],
'Int16': [1.1, 2.2, 3.3, np.nan],
'Int32': [1.1, 2.2, 3.3, np.nan],
'Int64': [1.1, 2.2, 3.3, np.nan],
'UInt8': [1.1, 2.2, 3.3, np.nan],
'UInt16': [1.1, 2.2, 3.3, np.nan],
'UInt32': [1.1, 2.2, 3.3, np.nan],
'UInt64': [1.1, 2.2, 3.3, np.nan],
'Float32': [1.1, 2.2, 3.3, np.nan],
'Float64': [1.1, 2.2, 3.3, np.nan],
'uint8': [1.1, 2.2, 3.3, np.nan],
'uint16': [1.1, 2.2, 3.3, np.nan],
'uint32': [1.1, 2.2, 3.3, np.nan],
'uint64': [1.1, 2.2, 3.3, np.nan],
'float': [1.1, 2.2, 3.3, np.nan],
'int8': [1.1, 2.2, 3.3, np.nan],
'int16': [1.1, 2.2, 3.3, np.nan],
'int32': [1.1, 2.2, 3.3, np.nan],
'int64': [1.1, 2.2, 3.3, np.nan],
})

ht = HyperTransformer()
ht.detect_initial_config(original_data)
ht.fit(original_data)

# Run
reverse_transformed_data = ht.reverse_transform(data_with_nans)

# Assert
expected_output_dtypes = {
'Int8': 'Int8',
'Int16': 'Int16',
'Int32': 'Int32',
'Int64': 'Int64',
'UInt8': 'UInt8',
'UInt16': 'UInt16',
'UInt32': 'UInt32',
'UInt64': 'UInt64',
'Float32': 'Float32',
'Float64': 'Float64',
'uint8': 'float',
'uint16': 'float',
'uint32': 'float',
'uint64': 'float',
'float': 'float',
'int8': 'float',
'int16': 'float',
'int32': 'float',
'int64': 'float',
}
assert data_with_nans.dtypes.unique() == 'float'
for column_name, expected_dtype in expected_output_dtypes.items():
assert reverse_transformed_data[column_name].dtype == expected_dtype
def test_numerical_dtype_handling(self):
"""Test that the HyperTransformer correctly handle all numerical dtypes."""
# Setup
original_data = pd.DataFrame({
'Int8': pd.Series([1, 2, 3, pd.NA], dtype='Int8'),
'Int16': pd.Series([1, 2, 3, pd.NA], dtype='Int16'),
'Int32': pd.Series([1, 2, 3, pd.NA], dtype='Int32'),
'Int64': pd.Series([1, 2, 3, pd.NA], dtype='Int64'),
'UInt8': pd.Series([1, 2, 3, pd.NA], dtype='UInt8'),
'UInt16': pd.Series([1, 2, 3, pd.NA], dtype='UInt16'),
'UInt32': pd.Series([1, 2, 3, pd.NA], dtype='UInt32'),
'UInt64': pd.Series([1, 2, 3, pd.NA], dtype='UInt64'),
'Float32': pd.Series([1.1, 2.2, 3.3, pd.NA], dtype='Float32'),
'Float64': pd.Series([1.1, 2.2, 3.3, pd.NA], dtype='Float64'),
'uint8': np.array([1, 2, 3, 4], dtype='uint8'),
'uint16': np.array([1, 2, 3, 4], dtype='uint16'),
'uint32': np.array([1, 2, 3, 4], dtype='uint32'),
'uint64': np.array([1, 2, 3, 4], dtype='uint64'),
'float': np.array([1.1, 2.2, 3.3, 4.4], dtype='float'),
'int8': np.array([1, 2, 3, 4], dtype='int8'),
'int16': np.array([1, 2, 3, 4], dtype='int16'),
'int32': np.array([1, 2, 3, 4], dtype='int32'),
'int64': np.array([1, 2, 3, 4], dtype='int64'),
})

ht = HyperTransformer()

# Run
ht.detect_initial_config(original_data)
ht.fit(original_data)
transformed_data = ht.transform(original_data)
reverse_transformed_data = ht.reverse_transform(transformed_data)

# Assert
assert transformed_data.dtypes.unique() == 'float'
for column in original_data.columns:
assert reverse_transformed_data[column].dtype == column


def test_numerical_handling_with_nans(self):
"""Test all numerical dtypes handling when there is NaN in the transformed data."""
# Setup
original_data = pd.DataFrame({
'Int8': pd.Series([1, 2, 3, pd.NA], dtype='Int8'),
'Int16': pd.Series([1, 2, 3, pd.NA], dtype='Int16'),
'Int32': pd.Series([1, 2, 3, pd.NA], dtype='Int32'),
'Int64': pd.Series([1, 2, 3, pd.NA], dtype='Int64'),
'UInt8': pd.Series([1, 2, 3, pd.NA], dtype='UInt8'),
'UInt16': pd.Series([1, 2, 3, pd.NA], dtype='UInt16'),
'UInt32': pd.Series([1, 2, 3, pd.NA], dtype='UInt32'),
'UInt64': pd.Series([1, 2, 3, pd.NA], dtype='UInt64'),
'Float32': pd.Series([1.1, 2.2, 3.3, pd.NA], dtype='Float32'),
'Float64': pd.Series([1.1, 2.2, 3.3, pd.NA], dtype='Float64'),
'uint8': np.array([1, 2, 3, 4], dtype='uint8'),
'uint16': np.array([1, 2, 3, 4], dtype='uint16'),
'uint32': np.array([1, 2, 3, 4], dtype='uint32'),
'uint64': np.array([1, 2, 3, 4], dtype='uint64'),
'float': np.array([1.1, 2.2, 3.3, 4.4], dtype='float'),
'int8': np.array([1, 2, 3, 4], dtype='int8'),
'int16': np.array([1, 2, 3, 4], dtype='int16'),
'int32': np.array([1, 2, 3, 4], dtype='int32'),
'int64': np.array([1, 2, 3, 4], dtype='int64'),
})

data_with_nans = pd.DataFrame({
'Int8': [1.1, 2.2, 3.3, np.nan],
'Int16': [1.1, 2.2, 3.3, np.nan],
'Int32': [1.1, 2.2, 3.3, np.nan],
'Int64': [1.1, 2.2, 3.3, np.nan],
'UInt8': [1.1, 2.2, 3.3, np.nan],
'UInt16': [1.1, 2.2, 3.3, np.nan],
'UInt32': [1.1, 2.2, 3.3, np.nan],
'UInt64': [1.1, 2.2, 3.3, np.nan],
'Float32': [1.1, 2.2, 3.3, np.nan],
'Float64': [1.1, 2.2, 3.3, np.nan],
'uint8': [1.1, 2.2, 3.3, np.nan],
'uint16': [1.1, 2.2, 3.3, np.nan],
'uint32': [1.1, 2.2, 3.3, np.nan],
'uint64': [1.1, 2.2, 3.3, np.nan],
'float': [1.1, 2.2, 3.3, np.nan],
'int8': [1.1, 2.2, 3.3, np.nan],
'int16': [1.1, 2.2, 3.3, np.nan],
'int32': [1.1, 2.2, 3.3, np.nan],
'int64': [1.1, 2.2, 3.3, np.nan],
})

ht = HyperTransformer()
ht.detect_initial_config(original_data)
ht.fit(original_data)

# Run
reverse_transformed_data = ht.reverse_transform(data_with_nans)

# Assert
expected_output_dtypes = {
'Int8': 'Int8',
'Int16': 'Int16',
'Int32': 'Int32',
'Int64': 'Int64',
'UInt8': 'UInt8',
'UInt16': 'UInt16',
'UInt32': 'UInt32',
'UInt64': 'UInt64',
'Float32': 'Float32',
'Float64': 'Float64',
'uint8': 'float',
'uint16': 'float',
'uint32': 'float',
'uint64': 'float',
'float': 'float',
'int8': 'float',
'int16': 'float',
'int32': 'float',
'int64': 'float',
}
assert data_with_nans.dtypes.unique() == 'float'
for column_name, expected_dtype in expected_output_dtypes.items():
assert reverse_transformed_data[column_name].dtype == expected_dtype

0 comments on commit ccef23b

Please sign in to comment.