Skip to content

Commit e6ee5c7

Browse files
committed
add more test for imputer function
1 parent 095df79 commit e6ee5c7

File tree

1 file changed

+160
-80
lines changed

1 file changed

+160
-80
lines changed

tests/test_eda_utils_py.py

Lines changed: 160 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66

77

8-
9-
108
def test_imputer():
119
data = pd.DataFrame(
1210
{"col1": [None, 4, 4, 7], "col2": [2, None, None, 2], "col3": [3, None, 6, 6]}
@@ -64,6 +62,9 @@ def test_imputer():
6462
with raises(Exception):
6563
eda_utils_py.imputer(data, strategy="median", fill_value=3)
6664

65+
with raises(Exception):
66+
eda_utils_py.imputer(data, strategy="others")
67+
6768
assert pd.DataFrame.equals(
6869
eda_utils_py.imputer(data), imp_mean
6970
), "The returned dataframe using mean inputer is not correct"
@@ -101,7 +102,7 @@ def test_cor_map():
101102

102103
# Tests whether or not there are NaNs produced in the correlation values
103104
assert (
104-
plot.data["cor"].isnull().sum() == 0
105+
plot.data["cor"].isnull().sum() == 0
105106
), "There are NaN produced as correlation values"
106107

107108
# Tests whether plot output scheme is one of the three given color schemes
@@ -117,20 +118,20 @@ def test_cor_map():
117118

118119
# Tests whether heatmap and correlation values have the same referenced var column
119120
assert (
120-
plot_dict["layer"][0]["encoding"]["x"]["field"]
121-
== plot_dict["layer"][1]["encoding"]["x"]["field"]
121+
plot_dict["layer"][0]["encoding"]["x"]["field"]
122+
== plot_dict["layer"][1]["encoding"]["x"]["field"]
122123
), "The heatmap and the correlation values are not referring to the same corresponding underlying variable x"
123124
assert (
124-
plot_dict["layer"][0]["encoding"]["y"]["field"]
125-
== plot_dict["layer"][1]["encoding"]["y"]["field"]
125+
plot_dict["layer"][0]["encoding"]["y"]["field"]
126+
== plot_dict["layer"][1]["encoding"]["y"]["field"]
126127
), "The heatmap and the correlation values are not referring to the same corresponding underlying variable y"
127128

128129
# Tests whether axes is using correct calculated var column as reference
129130
assert (
130-
plot_dict["layer"][0]["encoding"]["x"]["field"] == "var1"
131+
plot_dict["layer"][0]["encoding"]["x"]["field"] == "var1"
131132
), "x should be referring to var1"
132133
assert (
133-
plot_dict["layer"][0]["encoding"]["y"]["field"] == "var2"
134+
plot_dict["layer"][0]["encoding"]["y"]["field"] == "var2"
134135
), "y should be referring to var2"
135136

136137
# Testing the Exception Errors
@@ -173,56 +174,68 @@ def test_cor_map():
173174

174175
def test_scaler():
175176
mock_df_1 = pd.DataFrame(
176-
{"col1": [1, 0, 0, 3, 4],
177-
"col2": [4, 1, 1, 0, 1],
178-
"col3": [2, 0, 0, 2, 1]}
177+
{"col1": [1, 0, 0, 3, 4], "col2": [4, 1, 1, 0, 1], "col3": [2, 0, 0, 2, 1]}
179178
)
180179

181-
mock_df_2 = pd.DataFrame(
182-
{"col1": [1, 2, 1],
183-
"col2": [0, 1, 2]}
184-
)
180+
mock_df_2 = pd.DataFrame({"col1": [1, 2, 1], "col2": [0, 1, 2]})
185181

186182
mock_df_1_standard = pd.DataFrame(
187-
{"col1": [-0.3302891295379082, -0.8807710121010884, -0.8807710121010884, 0.7706746355884523,
188-
1.3211565181516325],
189-
"col2": [1.714389230829046, -0.26375218935831474, -0.26375218935831474, -0.9231326627541017,
190-
-0.26375218935831474],
191-
"col3": [1.0, -1.0, -1.0, 1.0, 0.0]}
183+
{
184+
"col1": [
185+
-0.3302891295379082,
186+
-0.8807710121010884,
187+
-0.8807710121010884,
188+
0.7706746355884523,
189+
1.3211565181516325,
190+
],
191+
"col2": [
192+
1.714389230829046,
193+
-0.26375218935831474,
194+
-0.26375218935831474,
195+
-0.9231326627541017,
196+
-0.26375218935831474,
197+
],
198+
"col3": [1.0, -1.0, -1.0, 1.0, 0.0],
199+
}
192200
)
193201

194202
mock_df_1_minmax = pd.DataFrame(
195-
{"col1": [0.25, 0.00, 0.00, 0.75, 1.00],
196-
"col2": [1.00, 0.25, 0.25, 0.00, 0.25],
197-
"col3": [1.0, 0.0, 0.0, 1.0, 0.5]}
203+
{
204+
"col1": [0.25, 0.00, 0.00, 0.75, 1.00],
205+
"col2": [1.00, 0.25, 0.25, 0.00, 0.25],
206+
"col3": [1.0, 0.0, 0.0, 1.0, 0.5],
207+
}
198208
)
199209

200210
mock_df_2_standard = pd.DataFrame(
201-
{"col1": [-0.5773502691896256, 1.1547005383792517, -0.5773502691896256],
202-
"col2": [-1.0, 0.0, 1.0]}
211+
{
212+
"col1": [-0.5773502691896256, 1.1547005383792517, -0.5773502691896256],
213+
"col2": [-1.0, 0.0, 1.0],
214+
}
203215
)
204216

205-
mock_df_2_minmax = pd.DataFrame(
206-
{"col1": [0.0, 1.0, 0.0],
207-
"col2": [0.0, 0.5, 1.0]}
208-
)
217+
mock_df_2_minmax = pd.DataFrame({"col1": [0.0, 1.0, 0.0], "col2": [0.0, 0.5, 1.0]})
209218

210-
standard_scaled_mock_df_1 = eda_utils_py.scale(mock_df_1, ['col1', 'col2', 'col3'])
211-
standard_scaled_mock_df_2 = eda_utils_py.scale(mock_df_2, ['col1', 'col2'])
212-
minmax_scaled_mock_df_1 = eda_utils_py.scale(mock_df_1, ['col1', 'col2', 'col3'], scaler="minmax")
213-
minmax_scaled_mock_df_2 = eda_utils_py.scale(mock_df_2, ['col1', 'col2'], scaler="minmax")
219+
standard_scaled_mock_df_1 = eda_utils_py.scale(mock_df_1, ["col1", "col2", "col3"])
220+
standard_scaled_mock_df_2 = eda_utils_py.scale(mock_df_2, ["col1", "col2"])
221+
minmax_scaled_mock_df_1 = eda_utils_py.scale(
222+
mock_df_1, ["col1", "col2", "col3"], scaler="minmax"
223+
)
224+
minmax_scaled_mock_df_2 = eda_utils_py.scale(
225+
mock_df_2, ["col1", "col2"], scaler="minmax"
226+
)
214227

215228
# Tests whether data is not of type pd.Dataframe raises TypeError
216229
with raises(TypeError):
217230
eda_utils_py.scale([14, None, 3, 27])
218231

219232
# Tests whether scaler of incorrect method raises TypeError
220233
with raises(TypeError):
221-
eda_utils_py.scale(mock_df_1, ['col1', 'col2'], scaler=1)
234+
eda_utils_py.scale(mock_df_1, ["col1", "col2"], scaler=1)
222235

223236
# Tests whether columns of incorrect type raises TypeError
224237
with raises(TypeError):
225-
eda_utils_py.scale(mock_df_1, {'col1': 1, 'col2': 3})
238+
eda_utils_py.scale(mock_df_1, {"col1": 1, "col2": 3})
226239

227240
assert pd.DataFrame.equals(
228241
standard_scaled_mock_df_1, mock_df_1_standard
@@ -240,47 +253,109 @@ def test_scaler():
240253

241254

242255
def test_outlier_identifier():
243-
test_df = pd.DataFrame({
244-
'SepalLengthCm': [5.1, 4.9, 4.7, 5.5, 5.1, 50, 5.4, 5.0, 5.2, 5.3, 5.1],
245-
'SepalWidthCm': [1.4, 1.4, 20, 2.0, 0.7, 1.6, 1.2, 1.4, 1.8, 1.5, 2.1],
246-
'PetalWidthCm': [0.2, 0.2, 0.2, 0.3, 0.4, 0.5, 0.5, 0.6, 0.4, 0.2, 5],
247-
'Species': ['Iris-setosa', 'Iris-virginica', 'Iris-germanica', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
248-
'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa']
249-
})
250-
251-
test_column = ['SepalLengthCm', 'SepalWidthCm', 'PetalWidthCm']
252-
253-
median_output = pd.DataFrame({
254-
'SepalLengthCm': [5.1, 4.9, 4.7, 5.5, 5.1, 5.1, 5.4, 5.0, 5.2, 5.3, 5.1],
255-
'SepalWidthCm': [1.4, 1.4, 1.5, 2.0, 0.7, 1.6, 1.2, 1.4, 1.8, 1.5, 2.1],
256-
'PetalWidthCm': [0.2, 0.2, 0.2, 0.3, 0.4, 0.5, 0.5, 0.6, 0.4, 0.2, 0.4],
257-
'Species': ['Iris-setosa', 'Iris-virginica', 'Iris-germanica', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
258-
'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa']
259-
})
260-
261-
trim_output = pd.DataFrame({
262-
'SepalLengthCm': [5.1, 4.9, 5.5, 5.1, 5.4, 5.0, 5.2, 5.3],
263-
'SepalWidthCm': [1.4, 1.4, 2.0, 0.7, 1.2, 1.4, 1.8, 1.5],
264-
'PetalWidthCm': [0.2, 0.2, 0.3, 0.4, 0.5, 0.6, 0.4, 0.2],
265-
'Species': ['Iris-setosa', 'Iris-virginica', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
266-
'Iris-setosa', 'Iris-setosa']
267-
})
268-
269-
mean_output = pd.DataFrame({
270-
'SepalLengthCm': [5.1, 4.9, 4.7, 5.5, 5.1, 9.21, 5.4, 5.0, 5.2, 5.3, 5.1],
271-
'SepalWidthCm': [1.4, 1.4, 3.19, 2.0, 0.7, 1.6, 1.2, 1.4, 1.8, 1.5, 2.1],
272-
'PetalWidthCm': [0.2, 0.2, 0.2, 0.3, 0.4, 0.5, 0.5, 0.6, 0.4, 0.2, 0.77],
273-
'Species': ['Iris-setosa', 'Iris-virginica', 'Iris-germanica', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
274-
'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa']
275-
})
276-
277-
column_output = pd.DataFrame({
278-
'SepalLengthCm': [5.1, 4.9, 4.7, 5.5, 5.1, 9.21, 5.4, 5.0, 5.2, 5.3, 5.1],
279-
'SepalWidthCm': [1.4, 1.4, 20, 2.0, 0.7, 1.6, 1.2, 1.4, 1.8, 1.5, 2.1],
280-
'PetalWidthCm': [0.2, 0.2, 0.2, 0.3, 0.4, 0.5, 0.5, 0.6, 0.4, 0.2, 5],
281-
'Species': ['Iris-setosa', 'Iris-virginica', 'Iris-germanica', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
282-
'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa']
283-
})
256+
test_df = pd.DataFrame(
257+
{
258+
"SepalLengthCm": [5.1, 4.9, 4.7, 5.5, 5.1, 50, 5.4, 5.0, 5.2, 5.3, 5.1],
259+
"SepalWidthCm": [1.4, 1.4, 20, 2.0, 0.7, 1.6, 1.2, 1.4, 1.8, 1.5, 2.1],
260+
"PetalWidthCm": [0.2, 0.2, 0.2, 0.3, 0.4, 0.5, 0.5, 0.6, 0.4, 0.2, 5],
261+
"Species": [
262+
"Iris-setosa",
263+
"Iris-virginica",
264+
"Iris-germanica",
265+
"Iris-setosa",
266+
"Iris-setosa",
267+
"Iris-setosa",
268+
"Iris-setosa",
269+
"Iris-setosa",
270+
"Iris-setosa",
271+
"Iris-setosa",
272+
"Iris-setosa",
273+
],
274+
}
275+
)
276+
277+
test_column = ["SepalLengthCm", "SepalWidthCm", "PetalWidthCm"]
278+
279+
median_output = pd.DataFrame(
280+
{
281+
"SepalLengthCm": [5.1, 4.9, 4.7, 5.5, 5.1, 5.1, 5.4, 5.0, 5.2, 5.3, 5.1],
282+
"SepalWidthCm": [1.4, 1.4, 1.5, 2.0, 0.7, 1.6, 1.2, 1.4, 1.8, 1.5, 2.1],
283+
"PetalWidthCm": [0.2, 0.2, 0.2, 0.3, 0.4, 0.5, 0.5, 0.6, 0.4, 0.2, 0.4],
284+
"Species": [
285+
"Iris-setosa",
286+
"Iris-virginica",
287+
"Iris-germanica",
288+
"Iris-setosa",
289+
"Iris-setosa",
290+
"Iris-setosa",
291+
"Iris-setosa",
292+
"Iris-setosa",
293+
"Iris-setosa",
294+
"Iris-setosa",
295+
"Iris-setosa",
296+
],
297+
}
298+
)
299+
300+
trim_output = pd.DataFrame(
301+
{
302+
"SepalLengthCm": [5.1, 4.9, 5.5, 5.1, 5.4, 5.0, 5.2, 5.3],
303+
"SepalWidthCm": [1.4, 1.4, 2.0, 0.7, 1.2, 1.4, 1.8, 1.5],
304+
"PetalWidthCm": [0.2, 0.2, 0.3, 0.4, 0.5, 0.6, 0.4, 0.2],
305+
"Species": [
306+
"Iris-setosa",
307+
"Iris-virginica",
308+
"Iris-setosa",
309+
"Iris-setosa",
310+
"Iris-setosa",
311+
"Iris-setosa",
312+
"Iris-setosa",
313+
"Iris-setosa",
314+
],
315+
}
316+
)
317+
318+
mean_output = pd.DataFrame(
319+
{
320+
"SepalLengthCm": [5.1, 4.9, 4.7, 5.5, 5.1, 9.21, 5.4, 5.0, 5.2, 5.3, 5.1],
321+
"SepalWidthCm": [1.4, 1.4, 3.19, 2.0, 0.7, 1.6, 1.2, 1.4, 1.8, 1.5, 2.1],
322+
"PetalWidthCm": [0.2, 0.2, 0.2, 0.3, 0.4, 0.5, 0.5, 0.6, 0.4, 0.2, 0.77],
323+
"Species": [
324+
"Iris-setosa",
325+
"Iris-virginica",
326+
"Iris-germanica",
327+
"Iris-setosa",
328+
"Iris-setosa",
329+
"Iris-setosa",
330+
"Iris-setosa",
331+
"Iris-setosa",
332+
"Iris-setosa",
333+
"Iris-setosa",
334+
"Iris-setosa",
335+
],
336+
}
337+
)
338+
339+
column_output = pd.DataFrame(
340+
{
341+
"SepalLengthCm": [5.1, 4.9, 4.7, 5.5, 5.1, 9.21, 5.4, 5.0, 5.2, 5.3, 5.1],
342+
"SepalWidthCm": [1.4, 1.4, 20, 2.0, 0.7, 1.6, 1.2, 1.4, 1.8, 1.5, 2.1],
343+
"PetalWidthCm": [0.2, 0.2, 0.2, 0.3, 0.4, 0.5, 0.5, 0.6, 0.4, 0.2, 5],
344+
"Species": [
345+
"Iris-setosa",
346+
"Iris-virginica",
347+
"Iris-germanica",
348+
"Iris-setosa",
349+
"Iris-setosa",
350+
"Iris-setosa",
351+
"Iris-setosa",
352+
"Iris-setosa",
353+
"Iris-setosa",
354+
"Iris-setosa",
355+
"Iris-setosa",
356+
],
357+
}
358+
)
284359

285360
# Test if the imput is not dataFrame
286361
with raises(TypeError):
@@ -306,11 +381,16 @@ def test_outlier_identifier():
306381
eda_utils_py.outlier_identifier(test_df, test_column), trim_output
307382
), "Default test not pass"
308383
assert pd.DataFrame.equals(
309-
eda_utils_py.outlier_identifier(test_df, test_column, method="median"), median_output
384+
eda_utils_py.outlier_identifier(test_df, test_column, method="median"),
385+
median_output,
310386
), "The median method is not correct"
311387
assert pd.DataFrame.equals(
312-
eda_utils_py.outlier_identifier(test_df, test_column, method="mean"), mean_output
388+
eda_utils_py.outlier_identifier(test_df, test_column, method="mean"),
389+
mean_output,
313390
), "The mean method is not correct"
314391
assert pd.DataFrame.equals(
315-
eda_utils_py.outlier_identifier(test_df, columns=["SepalLengthCm"], method="mean"), column_output
392+
eda_utils_py.outlier_identifier(
393+
test_df, columns=["SepalLengthCm"], method="mean"
394+
),
395+
column_output,
316396
), "The selected column method is not correct"

0 commit comments

Comments
 (0)