@@ -55,7 +55,7 @@ def imputer(df, strategy="mean", fill_value=None):
5555
5656 # Tests whether input fill_value is of type numbers or None
5757 if not isinstance (fill_value , type (None )) and not isinstance (
58- fill_value , numbers .Number
58+ fill_value , numbers .Number
5959 ):
6060 raise TypeError ("fill_value must be of type None or numeric type" )
6161
@@ -159,13 +159,17 @@ def cor_map(dataframe, num_col, col_scheme="purpleorange"):
159159
160160 plot = (
161161 alt .Chart (corr_matrix )
162- .mark_rect ()
163- .encode (
162+ .mark_rect ()
163+ .encode (
164164 x = alt .X ("var1" , title = None ),
165165 y = alt .Y ("var2" , title = None ),
166- color = alt .Color ("cor" , title = 'Correlation' , scale = alt .Scale (scheme = col_scheme , domain = (- 1 ,1 ))),
166+ color = alt .Color (
167+ "cor" ,
168+ title = "Correlation" ,
169+ scale = alt .Scale (scheme = col_scheme , domain = (- 1 , 1 )),
170+ ),
167171 )
168- .properties (title = "Correlation Matrix" , width = 400 , height = 400 )
172+ .properties (title = "Correlation Matrix" , width = 400 , height = 400 )
169173 )
170174
171175 text = plot .mark_text (size = 15 ).encode (
@@ -195,7 +199,7 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
195199 - if "trim" : we completely remove data points that are outliers.
196200 - if "median" : we replace outliers with median values
197201 - if "mean" : we replace outliers with mean values
198-
202+
199203
200204 Returns
201205 -------
@@ -206,7 +210,7 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
206210 --------
207211 >> import pandas as pd
208212 >> from eda_utils_py import cor_map
209-
213+
210214 >> data = pd.DataFrame({
211215 >> 'SepalLengthCm':[5.1, 4.9, 4.7],
212216 >> 'SepalWidthCm':[1.4, 1.4, 99],
@@ -224,24 +228,30 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
224228 if columns is None :
225229 for col in dataframe .columns :
226230 if not is_numeric_dtype (dataframe [col ]):
227- raise Exception ("The given dataframe contains column that is not numeric column." )
231+ raise Exception (
232+ "The given dataframe contains column that is not numeric column."
233+ )
228234
229235 if columns is not None :
230236 if not isinstance (columns , list ):
231237 raise TypeError ("The argument @columns must be of type list" )
232238
233239 for col in columns :
234240 if col not in list (dataframe .columns ):
235- raise Exception ("The given column list contains column that is not exist in the given dataframe." )
241+ raise Exception (
242+ "The given column list contains column that is not exist in the given dataframe."
243+ )
236244 if not is_numeric_dtype (dataframe [col ]):
237- raise Exception ("The given column list contains column that is not numeric column." )
245+ raise Exception (
246+ "The given column list contains column that is not numeric column."
247+ )
238248
239249 if method not in ("trim" , "median" , "mean" ):
240250 raise Exception ("The method must be -trim- or -median- or -mean-" )
241251
242252 df = dataframe .copy ()
243253 target_columns = []
244- if ( columns is None ) :
254+ if columns is None :
245255 target_columns = list (df .columns .values .tolist ())
246256 else :
247257 target_columns = columns
@@ -257,14 +267,14 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
257267 current_item = current_column [i ]
258268 z = (current_item - mean ) / std
259269 if z >= threshold :
260- if ( i not in outlier_index ) :
270+ if i not in outlier_index :
261271 outlier_index .append (i )
262- if ( method == "mean" ) :
272+ if method == "mean" :
263273 df .at [i , column ] = round (mean , 2 )
264- if ( method == "median" ) :
274+ if method == "median" :
265275 df .at [i , column ] = np .median (current_column )
266276
267- if ( method == "trim" ) :
277+ if method == "trim" :
268278 df = df .drop (outlier_index )
269279
270280 df .index = range (len (df ))
@@ -307,12 +317,12 @@ def scale(dataframe, columns, scaler="standard"):
307317
308318 >> scale(data, numerical_columns, scaler="minmax")
309319
310- SepalLengthCm SepalWidthCm PetalWidthCm
311- 0 0.25 1.00 1.0
312- 1 0.00 0.25 0.0
313- 2 0.00 0.25 0.0
314- 3 0.75 0.00 1.0
315- 4 1.00 0.25 0.5
320+ >> SepalLengthCm SepalWidthCm PetalWidthCm
321+ >> 0 0.25 1.00 1.0
322+ >> 1 0.00 0.25 0.0
323+ >> 2 0.00 0.25 0.0
324+ >> 3 0.75 0.00 1.0
325+ >> 4 1.00 0.25 0.5
316326 """
317327
318328 # Check if input data is of pd.DataFrame type
@@ -379,24 +389,24 @@ def _standardize(dataframe):
379389
380390def _minmax (dataframe ):
381391 """Transform features by rescaling each feature to the range between 0 and 1.
382- The transformation is given by:
392+ The transformation is given by:
383393
384- scaled_value = (feature_value - min) / (mix - min)
394+ scaled_value = (feature_value - min) / (mix - min)
385395
386- where min, max = feature_range.
396+ where min, max = feature_range.
387397
388- This transformation is often used as an alternative to zero mean,
389- unit variance scaling.
398+ This transformation is often used as an alternative to zero mean,
399+ unit variance scaling.
390400
391- Parameters
392- ----------
393- dataframe : pandas.DataFrame
394- The data frame to be used for EDA.
395- Returns
396- -------
397- res : pandas.core.frame.DataFrame
398- Scaled dataset
399- """
401+ Parameters
402+ ----------
403+ dataframe : pandas.DataFrame
404+ The data frame to be used for EDA.
405+ Returns
406+ -------
407+ res : pandas.core.frame.DataFrame
408+ Scaled dataset
409+ """
400410
401411 res = dataframe .copy ()
402412 for feature_name in dataframe .columns :
0 commit comments