@@ -2103,121 +2103,121 @@ def test_detect_unsigned_integer_dtypes(self):
21032103 assert config ['transformers' ][column_name ].__class__ .__name__ == 'FloatFormatter'
21042104
21052105
2106- def test_numerical_dtype_handling ():
2107- """Test that the HyperTransformer correctly handle all numerical dtypes."""
2108- # Setup
2109- original_data = pd .DataFrame ({
2110- 'Int8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int8' ),
2111- 'Int16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int16' ),
2112- 'Int32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int32' ),
2113- 'Int64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int64' ),
2114- 'UInt8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt8' ),
2115- 'UInt16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt16' ),
2116- 'UInt32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt32' ),
2117- 'UInt64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt64' ),
2118- 'Float32' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float32' ),
2119- 'Float64' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float64' ),
2120- 'uint8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint8' ),
2121- 'uint16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint16' ),
2122- 'uint32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint32' ),
2123- 'uint64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint64' ),
2124- 'float' : np .array ([1.1 , 2.2 , 3.3 , 4.4 ], dtype = 'float' ),
2125- 'int8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int8' ),
2126- 'int16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int16' ),
2127- 'int32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int32' ),
2128- 'int64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int64' ),
2129- })
2130-
2131- ht = HyperTransformer ()
2132-
2133- # Run
2134- ht .detect_initial_config (original_data )
2135- ht .fit (original_data )
2136- transformed_data = ht .transform (original_data )
2137- reverse_transformed_data = ht .reverse_transform (transformed_data )
2138-
2139- # Assert
2140- assert transformed_data .dtypes .unique () == 'float'
2141- for column in original_data .columns :
2142- assert reverse_transformed_data [column ].dtype == column
2143-
2144-
2145- def test_numerical_handling_with_nans ():
2146- """Test all numerical dtypes handling when there is NaN in the transformed data."""
2147- # Setup
2148- original_data = pd .DataFrame ({
2149- 'Int8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int8' ),
2150- 'Int16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int16' ),
2151- 'Int32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int32' ),
2152- 'Int64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int64' ),
2153- 'UInt8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt8' ),
2154- 'UInt16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt16' ),
2155- 'UInt32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt32' ),
2156- 'UInt64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt64' ),
2157- 'Float32' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float32' ),
2158- 'Float64' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float64' ),
2159- 'uint8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint8' ),
2160- 'uint16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint16' ),
2161- 'uint32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint32' ),
2162- 'uint64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint64' ),
2163- 'float' : np .array ([1.1 , 2.2 , 3.3 , 4.4 ], dtype = 'float' ),
2164- 'int8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int8' ),
2165- 'int16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int16' ),
2166- 'int32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int32' ),
2167- 'int64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int64' ),
2168- })
2169-
2170- data_with_nans = pd .DataFrame ({
2171- 'Int8' : [1.1 , 2.2 , 3.3 , np .nan ],
2172- 'Int16' : [1.1 , 2.2 , 3.3 , np .nan ],
2173- 'Int32' : [1.1 , 2.2 , 3.3 , np .nan ],
2174- 'Int64' : [1.1 , 2.2 , 3.3 , np .nan ],
2175- 'UInt8' : [1.1 , 2.2 , 3.3 , np .nan ],
2176- 'UInt16' : [1.1 , 2.2 , 3.3 , np .nan ],
2177- 'UInt32' : [1.1 , 2.2 , 3.3 , np .nan ],
2178- 'UInt64' : [1.1 , 2.2 , 3.3 , np .nan ],
2179- 'Float32' : [1.1 , 2.2 , 3.3 , np .nan ],
2180- 'Float64' : [1.1 , 2.2 , 3.3 , np .nan ],
2181- 'uint8' : [1.1 , 2.2 , 3.3 , np .nan ],
2182- 'uint16' : [1.1 , 2.2 , 3.3 , np .nan ],
2183- 'uint32' : [1.1 , 2.2 , 3.3 , np .nan ],
2184- 'uint64' : [1.1 , 2.2 , 3.3 , np .nan ],
2185- 'float' : [1.1 , 2.2 , 3.3 , np .nan ],
2186- 'int8' : [1.1 , 2.2 , 3.3 , np .nan ],
2187- 'int16' : [1.1 , 2.2 , 3.3 , np .nan ],
2188- 'int32' : [1.1 , 2.2 , 3.3 , np .nan ],
2189- 'int64' : [1.1 , 2.2 , 3.3 , np .nan ],
2190- })
2191-
2192- ht = HyperTransformer ()
2193- ht .detect_initial_config (original_data )
2194- ht .fit (original_data )
2195-
2196- # Run
2197- reverse_transformed_data = ht .reverse_transform (data_with_nans )
2198-
2199- # Assert
2200- expected_output_dtypes = {
2201- 'Int8' : 'Int8' ,
2202- 'Int16' : 'Int16' ,
2203- 'Int32' : 'Int32' ,
2204- 'Int64' : 'Int64' ,
2205- 'UInt8' : 'UInt8' ,
2206- 'UInt16' : 'UInt16' ,
2207- 'UInt32' : 'UInt32' ,
2208- 'UInt64' : 'UInt64' ,
2209- 'Float32' : 'Float32' ,
2210- 'Float64' : 'Float64' ,
2211- 'uint8' : 'float' ,
2212- 'uint16' : 'float' ,
2213- 'uint32' : 'float' ,
2214- 'uint64' : 'float' ,
2215- 'float' : 'float' ,
2216- 'int8' : 'float' ,
2217- 'int16' : 'float' ,
2218- 'int32' : 'float' ,
2219- 'int64' : 'float' ,
2220- }
2221- assert data_with_nans .dtypes .unique () == 'float'
2222- for column_name , expected_dtype in expected_output_dtypes .items ():
2223- assert reverse_transformed_data [column_name ].dtype == expected_dtype
2106+ def test_numerical_dtype_handling (self ):
2107+ """Test that the HyperTransformer correctly handle all numerical dtypes."""
2108+ # Setup
2109+ original_data = pd .DataFrame ({
2110+ 'Int8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int8' ),
2111+ 'Int16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int16' ),
2112+ 'Int32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int32' ),
2113+ 'Int64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int64' ),
2114+ 'UInt8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt8' ),
2115+ 'UInt16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt16' ),
2116+ 'UInt32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt32' ),
2117+ 'UInt64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt64' ),
2118+ 'Float32' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float32' ),
2119+ 'Float64' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float64' ),
2120+ 'uint8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint8' ),
2121+ 'uint16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint16' ),
2122+ 'uint32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint32' ),
2123+ 'uint64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint64' ),
2124+ 'float' : np .array ([1.1 , 2.2 , 3.3 , 4.4 ], dtype = 'float' ),
2125+ 'int8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int8' ),
2126+ 'int16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int16' ),
2127+ 'int32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int32' ),
2128+ 'int64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int64' ),
2129+ })
2130+
2131+ ht = HyperTransformer ()
2132+
2133+ # Run
2134+ ht .detect_initial_config (original_data )
2135+ ht .fit (original_data )
2136+ transformed_data = ht .transform (original_data )
2137+ reverse_transformed_data = ht .reverse_transform (transformed_data )
2138+
2139+ # Assert
2140+ assert transformed_data .dtypes .unique () == 'float'
2141+ for column in original_data .columns :
2142+ assert reverse_transformed_data [column ].dtype == column
2143+
2144+
2145+ def test_numerical_handling_with_nans (self ):
2146+ """Test all numerical dtypes handling when there is NaN in the transformed data."""
2147+ # Setup
2148+ original_data = pd .DataFrame ({
2149+ 'Int8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int8' ),
2150+ 'Int16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int16' ),
2151+ 'Int32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int32' ),
2152+ 'Int64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'Int64' ),
2153+ 'UInt8' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt8' ),
2154+ 'UInt16' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt16' ),
2155+ 'UInt32' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt32' ),
2156+ 'UInt64' : pd .Series ([1 , 2 , 3 , pd .NA ], dtype = 'UInt64' ),
2157+ 'Float32' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float32' ),
2158+ 'Float64' : pd .Series ([1.1 , 2.2 , 3.3 , pd .NA ], dtype = 'Float64' ),
2159+ 'uint8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint8' ),
2160+ 'uint16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint16' ),
2161+ 'uint32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint32' ),
2162+ 'uint64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'uint64' ),
2163+ 'float' : np .array ([1.1 , 2.2 , 3.3 , 4.4 ], dtype = 'float' ),
2164+ 'int8' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int8' ),
2165+ 'int16' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int16' ),
2166+ 'int32' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int32' ),
2167+ 'int64' : np .array ([1 , 2 , 3 , 4 ], dtype = 'int64' ),
2168+ })
2169+
2170+ data_with_nans = pd .DataFrame ({
2171+ 'Int8' : [1.1 , 2.2 , 3.3 , np .nan ],
2172+ 'Int16' : [1.1 , 2.2 , 3.3 , np .nan ],
2173+ 'Int32' : [1.1 , 2.2 , 3.3 , np .nan ],
2174+ 'Int64' : [1.1 , 2.2 , 3.3 , np .nan ],
2175+ 'UInt8' : [1.1 , 2.2 , 3.3 , np .nan ],
2176+ 'UInt16' : [1.1 , 2.2 , 3.3 , np .nan ],
2177+ 'UInt32' : [1.1 , 2.2 , 3.3 , np .nan ],
2178+ 'UInt64' : [1.1 , 2.2 , 3.3 , np .nan ],
2179+ 'Float32' : [1.1 , 2.2 , 3.3 , np .nan ],
2180+ 'Float64' : [1.1 , 2.2 , 3.3 , np .nan ],
2181+ 'uint8' : [1.1 , 2.2 , 3.3 , np .nan ],
2182+ 'uint16' : [1.1 , 2.2 , 3.3 , np .nan ],
2183+ 'uint32' : [1.1 , 2.2 , 3.3 , np .nan ],
2184+ 'uint64' : [1.1 , 2.2 , 3.3 , np .nan ],
2185+ 'float' : [1.1 , 2.2 , 3.3 , np .nan ],
2186+ 'int8' : [1.1 , 2.2 , 3.3 , np .nan ],
2187+ 'int16' : [1.1 , 2.2 , 3.3 , np .nan ],
2188+ 'int32' : [1.1 , 2.2 , 3.3 , np .nan ],
2189+ 'int64' : [1.1 , 2.2 , 3.3 , np .nan ],
2190+ })
2191+
2192+ ht = HyperTransformer ()
2193+ ht .detect_initial_config (original_data )
2194+ ht .fit (original_data )
2195+
2196+ # Run
2197+ reverse_transformed_data = ht .reverse_transform (data_with_nans )
2198+
2199+ # Assert
2200+ expected_output_dtypes = {
2201+ 'Int8' : 'Int8' ,
2202+ 'Int16' : 'Int16' ,
2203+ 'Int32' : 'Int32' ,
2204+ 'Int64' : 'Int64' ,
2205+ 'UInt8' : 'UInt8' ,
2206+ 'UInt16' : 'UInt16' ,
2207+ 'UInt32' : 'UInt32' ,
2208+ 'UInt64' : 'UInt64' ,
2209+ 'Float32' : 'Float32' ,
2210+ 'Float64' : 'Float64' ,
2211+ 'uint8' : 'float' ,
2212+ 'uint16' : 'float' ,
2213+ 'uint32' : 'float' ,
2214+ 'uint64' : 'float' ,
2215+ 'float' : 'float' ,
2216+ 'int8' : 'float' ,
2217+ 'int16' : 'float' ,
2218+ 'int32' : 'float' ,
2219+ 'int64' : 'float' ,
2220+ }
2221+ assert data_with_nans .dtypes .unique () == 'float'
2222+ for column_name , expected_dtype in expected_output_dtypes .items ():
2223+ assert reverse_transformed_data [column_name ].dtype == expected_dtype
0 commit comments