@@ -2103,121 +2103,121 @@ def test_detect_unsigned_integer_dtypes(self):
2103
2103
assert config ['transformers' ][column_name ].__class__ .__name__ == 'FloatFormatter'
2104
2104
2105
2105
2106
- def test_numerical_dtype_handling ():
2107
- """Test that the HyperTransformer correctly handle all numerical dtypes."""
2108
- # Setup
2109
- original_data = pd .DataFrame ({
2110
- 'Int8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int8' ),
2111
- 'Int16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int16' ),
2112
- 'Int32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int32' ),
2113
- 'Int64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int64' ),
2114
- 'UInt8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt8' ),
2115
- 'UInt16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt16' ),
2116
- 'UInt32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt32' ),
2117
- 'UInt64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt64' ),
2118
- 'Float32' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float32' ),
2119
- 'Float64' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float64' ),
2120
- 'uint8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint8' ),
2121
- 'uint16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint16' ),
2122
- 'uint32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint32' ),
2123
- 'uint64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint64' ),
2124
- 'float' : np .array ([1.1 , 2.2 , 3.3 , 4.4 ], dtype = 'float' ),
2125
- 'int8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int8' ),
2126
- 'int16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int16' ),
2127
- 'int32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int32' ),
2128
- 'int64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int64' ),
2129
- })
2130
-
2131
- ht = HyperTransformer ()
2132
-
2133
- # Run
2134
- ht .detect_initial_config (original_data )
2135
- ht .fit (original_data )
2136
- transformed_data = ht .transform (original_data )
2137
- reverse_transformed_data = ht .reverse_transform (transformed_data )
2138
-
2139
- # Assert
2140
- assert transformed_data .dtypes .unique () == 'float'
2141
- for column in original_data .columns :
2142
- assert reverse_transformed_data [column ].dtype == column
2143
-
2144
-
2145
- def test_numerical_handling_with_nans ():
2146
- """Test all numerical dtypes handling when there is NaN in the transformed data."""
2147
- # Setup
2148
- original_data = pd .DataFrame ({
2149
- 'Int8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int8' ),
2150
- 'Int16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int16' ),
2151
- 'Int32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int32' ),
2152
- 'Int64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int64' ),
2153
- 'UInt8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt8' ),
2154
- 'UInt16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt16' ),
2155
- 'UInt32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt32' ),
2156
- 'UInt64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt64' ),
2157
- 'Float32' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float32' ),
2158
- 'Float64' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float64' ),
2159
- 'uint8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint8' ),
2160
- 'uint16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint16' ),
2161
- 'uint32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint32' ),
2162
- 'uint64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint64' ),
2163
- 'float' : np .array ([1.1 , 2.2 , 3.3 , 4.4 ], dtype = 'float' ),
2164
- 'int8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int8' ),
2165
- 'int16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int16' ),
2166
- 'int32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int32' ),
2167
- 'int64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int64' ),
2168
- })
2169
-
2170
- data_with_nans = pd .DataFrame ({
2171
- 'Int8' : [1.1 , 2.2 , 3.3 , np .nan ],
2172
- 'Int16' : [1.1 , 2.2 , 3.3 , np .nan ],
2173
- 'Int32' : [1.1 , 2.2 , 3.3 , np .nan ],
2174
- 'Int64' : [1.1 , 2.2 , 3.3 , np .nan ],
2175
- 'UInt8' : [1.1 , 2.2 , 3.3 , np .nan ],
2176
- 'UInt16' : [1.1 , 2.2 , 3.3 , np .nan ],
2177
- 'UInt32' : [1.1 , 2.2 , 3.3 , np .nan ],
2178
- 'UInt64' : [1.1 , 2.2 , 3.3 , np .nan ],
2179
- 'Float32' : [1.1 , 2.2 , 3.3 , np .nan ],
2180
- 'Float64' : [1.1 , 2.2 , 3.3 , np .nan ],
2181
- 'uint8' : [1.1 , 2.2 , 3.3 , np .nan ],
2182
- 'uint16' : [1.1 , 2.2 , 3.3 , np .nan ],
2183
- 'uint32' : [1.1 , 2.2 , 3.3 , np .nan ],
2184
- 'uint64' : [1.1 , 2.2 , 3.3 , np .nan ],
2185
- 'float' : [1.1 , 2.2 , 3.3 , np .nan ],
2186
- 'int8' : [1.1 , 2.2 , 3.3 , np .nan ],
2187
- 'int16' : [1.1 , 2.2 , 3.3 , np .nan ],
2188
- 'int32' : [1.1 , 2.2 , 3.3 , np .nan ],
2189
- 'int64' : [1.1 , 2.2 , 3.3 , np .nan ],
2190
- })
2191
-
2192
- ht = HyperTransformer ()
2193
- ht .detect_initial_config (original_data )
2194
- ht .fit (original_data )
2195
-
2196
- # Run
2197
- reverse_transformed_data = ht .reverse_transform (data_with_nans )
2198
-
2199
- # Assert
2200
- expected_output_dtypes = {
2201
- 'Int8' : 'Int8' ,
2202
- 'Int16' : 'Int16' ,
2203
- 'Int32' : 'Int32' ,
2204
- 'Int64' : 'Int64' ,
2205
- 'UInt8' : 'UInt8' ,
2206
- 'UInt16' : 'UInt16' ,
2207
- 'UInt32' : 'UInt32' ,
2208
- 'UInt64' : 'UInt64' ,
2209
- 'Float32' : 'Float32' ,
2210
- 'Float64' : 'Float64' ,
2211
- 'uint8' : 'float' ,
2212
- 'uint16' : 'float' ,
2213
- 'uint32' : 'float' ,
2214
- 'uint64' : 'float' ,
2215
- 'float' : 'float' ,
2216
- 'int8' : 'float' ,
2217
- 'int16' : 'float' ,
2218
- 'int32' : 'float' ,
2219
- 'int64' : 'float' ,
2220
- }
2221
- assert data_with_nans .dtypes .unique () == 'float'
2222
- for column_name , expected_dtype in expected_output_dtypes .items ():
2223
- assert reverse_transformed_data [column_name ].dtype == expected_dtype
2106
+ def test_numerical_dtype_handling (self ):
2107
+ """Test that the HyperTransformer correctly handle all numerical dtypes."""
2108
+ # Setup
2109
+ original_data = pd .DataFrame ({
2110
+ 'Int8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int8' ),
2111
+ 'Int16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int16' ),
2112
+ 'Int32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int32' ),
2113
+ 'Int64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int64' ),
2114
+ 'UInt8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt8' ),
2115
+ 'UInt16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt16' ),
2116
+ 'UInt32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt32' ),
2117
+ 'UInt64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt64' ),
2118
+ 'Float32' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float32' ),
2119
+ 'Float64' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float64' ),
2120
+ 'uint8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint8' ),
2121
+ 'uint16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint16' ),
2122
+ 'uint32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint32' ),
2123
+ 'uint64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint64' ),
2124
+ 'float' : np .array ([1.1 , 2.2 , 3.3 , 4.4 ], dtype = 'float' ),
2125
+ 'int8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int8' ),
2126
+ 'int16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int16' ),
2127
+ 'int32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int32' ),
2128
+ 'int64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int64' ),
2129
+ })
2130
+
2131
+ ht = HyperTransformer ()
2132
+
2133
+ # Run
2134
+ ht .detect_initial_config (original_data )
2135
+ ht .fit (original_data )
2136
+ transformed_data = ht .transform (original_data )
2137
+ reverse_transformed_data = ht .reverse_transform (transformed_data )
2138
+
2139
+ # Assert
2140
+ assert transformed_data .dtypes .unique () == 'float'
2141
+ for column in original_data .columns :
2142
+ assert reverse_transformed_data [column ].dtype == column
2143
+
2144
+
2145
+ def test_numerical_handling_with_nans (self ):
2146
+ """Test all numerical dtypes handling when there is NaN in the transformed data."""
2147
+ # Setup
2148
+ original_data = pd .DataFrame ({
2149
+ 'Int8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int8' ),
2150
+ 'Int16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int16' ),
2151
+ 'Int32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int32' ),
2152
+ 'Int64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int64' ),
2153
+ 'UInt8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt8' ),
2154
+ 'UInt16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt16' ),
2155
+ 'UInt32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt32' ),
2156
+ 'UInt64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt64' ),
2157
+ 'Float32' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float32' ),
2158
+ 'Float64' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float64' ),
2159
+ 'uint8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint8' ),
2160
+ 'uint16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint16' ),
2161
+ 'uint32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint32' ),
2162
+ 'uint64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint64' ),
2163
+ 'float' : np .array ([1.1 , 2.2 , 3.3 , 4.4 ], dtype = 'float' ),
2164
+ 'int8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int8' ),
2165
+ 'int16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int16' ),
2166
+ 'int32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int32' ),
2167
+ 'int64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int64' ),
2168
+ })
2169
+
2170
+ data_with_nans = pd .DataFrame ({
2171
+ 'Int8' : [1.1 , 2.2 , 3.3 , np .nan ],
2172
+ 'Int16' : [1.1 , 2.2 , 3.3 , np .nan ],
2173
+ 'Int32' : [1.1 , 2.2 , 3.3 , np .nan ],
2174
+ 'Int64' : [1.1 , 2.2 , 3.3 , np .nan ],
2175
+ 'UInt8' : [1.1 , 2.2 , 3.3 , np .nan ],
2176
+ 'UInt16' : [1.1 , 2.2 , 3.3 , np .nan ],
2177
+ 'UInt32' : [1.1 , 2.2 , 3.3 , np .nan ],
2178
+ 'UInt64' : [1.1 , 2.2 , 3.3 , np .nan ],
2179
+ 'Float32' : [1.1 , 2.2 , 3.3 , np .nan ],
2180
+ 'Float64' : [1.1 , 2.2 , 3.3 , np .nan ],
2181
+ 'uint8' : [1.1 , 2.2 , 3.3 , np .nan ],
2182
+ 'uint16' : [1.1 , 2.2 , 3.3 , np .nan ],
2183
+ 'uint32' : [1.1 , 2.2 , 3.3 , np .nan ],
2184
+ 'uint64' : [1.1 , 2.2 , 3.3 , np .nan ],
2185
+ 'float' : [1.1 , 2.2 , 3.3 , np .nan ],
2186
+ 'int8' : [1.1 , 2.2 , 3.3 , np .nan ],
2187
+ 'int16' : [1.1 , 2.2 , 3.3 , np .nan ],
2188
+ 'int32' : [1.1 , 2.2 , 3.3 , np .nan ],
2189
+ 'int64' : [1.1 , 2.2 , 3.3 , np .nan ],
2190
+ })
2191
+
2192
+ ht = HyperTransformer ()
2193
+ ht .detect_initial_config (original_data )
2194
+ ht .fit (original_data )
2195
+
2196
+ # Run
2197
+ reverse_transformed_data = ht .reverse_transform (data_with_nans )
2198
+
2199
+ # Assert
2200
+ expected_output_dtypes = {
2201
+ 'Int8' : 'Int8' ,
2202
+ 'Int16' : 'Int16' ,
2203
+ 'Int32' : 'Int32' ,
2204
+ 'Int64' : 'Int64' ,
2205
+ 'UInt8' : 'UInt8' ,
2206
+ 'UInt16' : 'UInt16' ,
2207
+ 'UInt32' : 'UInt32' ,
2208
+ 'UInt64' : 'UInt64' ,
2209
+ 'Float32' : 'Float32' ,
2210
+ 'Float64' : 'Float64' ,
2211
+ 'uint8' : 'float' ,
2212
+ 'uint16' : 'float' ,
2213
+ 'uint32' : 'float' ,
2214
+ 'uint64' : 'float' ,
2215
+ 'float' : 'float' ,
2216
+ 'int8' : 'float' ,
2217
+ 'int16' : 'float' ,
2218
+ 'int32' : 'float' ,
2219
+ 'int64' : 'float' ,
2220
+ }
2221
+ assert data_with_nans .dtypes .unique () == 'float'
2222
+ for column_name , expected_dtype in expected_output_dtypes .items ():
2223
+ assert reverse_transformed_data [column_name ].dtype == expected_dtype
0 commit comments