diff --git a/DeepResampling_students.py b/DeepResampling_students.py
new file mode 100644
index 0000000..3bcdb35
--- /dev/null
+++ b/DeepResampling_students.py
@@ -0,0 +1,634 @@
+import numpy as np
+import pandas as pd
+
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+import genai_evaluation as ge
+from matplotlib import pyplot
+from statsmodels.distributions.empirical_distribution import ECDF
+import warnings
+warnings.simplefilter("ignore")
+
+
+#--- [1] read data and only keep features and observations we want
+
+def category_to_integer(category):
+    if category == 'Enrolled':
+        integer = 1
+    elif category == 'Dropout':
+        integer = 0
+    else:
+        integer = 2
+    return(integer)
+
+#- [1.1] read data
+
+url = "https://raw.githubusercontent.com/VincentGranville/Main/main/students.csv"
+# data = pd.read_csv('students_C2_full_nogan.csv')
+data = pd.read_csv(url)
+print(data)
+
+# all features used here
+
+features = [
+    'Application_mode',                      #  0, categorical
+    'Course',                                #  1, categorical
+    'Previous_qualification_grade',          #  2, ordinal
+    'Mother_qualification',                  #  3, categorical
+    'Father_qualification',                  #  4, categorical
+    'Mother_occupation',                     #  5, categorical
+    'Father_occupation',                     #  6, categorical
+    'Admission_grade',                       #  7, ordinal
+    'Tuition_fees_up_to_date',               #  8, binary
+    'Age_at_enrollment',                     #  9, ordinal [outliers]
+    'Curricular_units_1st_sem_evaluations',  # 10, ordinal [0 = missing]
+    'Curricular_units_1st_sem_approved',     # 11, ordinal [0 = missing]
+    'Curricular_units_1st_sem_grade',        # 12, ordinal [0 = missing]
+    'Curricular_units_2nd_sem_enrolled',     # 13, ordinal [0 = missing]
+    'Curricular_units_2nd_sem_evaluations',  # 14, ordinal [0 = missing]
+    'Curricular_units_2nd_sem_approved',     # 15, ordinal [0 = missing]
+    'Curricular_units_2nd_sem_grade',        # 16, ordinal [0 = missing]
+    'Unemployment_rate',                     # 17, ordinal
+    'Inflation_rate',                        # 18, ordinal, can be < 0
+    'GDP',                                   # 19, ordinal, can be < 0
+    'Target'                                 # 20, categorical [outcome]
+ ]
+
+data['Target'] = data['Target'].map(category_to_integer) 
+
+# remove rows with missing values
+data = data[data['Curricular_units_1st_sem_evaluations'] != 0] 
+
+print(data.head()) 
+print (data.shape)
+print (data.columns)
+
+#- [1.2] set seed for replicability
+
+pd.core.common.random_state(None)
+seed = 106 ## 105
+np.random.seed(seed)
+
+#- [1.3] select features
+
+data = data[features]
+data = data.sample(frac = 1)  # shuffle rows to break artificial sorting
+
+#- [1.4] split real dataset into training and validation sets
+
+data_training = data.sample(frac = 0.5)
+data_validation = data.drop(data_training.index)
+data_training.to_csv('training_vg2.csv')
+data_validation.to_csv('validation_vg2.csv')
+data_train = pd.DataFrame.to_numpy(data_training) 
+
+nobs = len(data_training)
+n_features = len(features)
+
+
+#--- [2] create initial synthetic data  
+
+def create_initial_synth(nobs_synth):
+
+    eps = 0.000000001
+    n_features = len(features)
+    data_synth = np.empty(shape=(nobs_synth,n_features))
+
+    for i in range(nobs_synth):
+        pc = np.random.uniform(0, 1 + eps, n_features)
+        for k in range(n_features):
+            label = features[k]
+            data_synth[i, k] = np.quantile(data_training[label], pc[k], axis=0)
+    return(data_synth)
+
+
+#--- [3] loss functions Part 1
+
+def compute_univariate_stats():
+
+    # 'dt' for training data, 'ds' for synth. data
+
+    # for first tem in loss function
+    dt_mean  = np.mean(data_train, axis=0)
+    dt_stdev = np.std(data_train, axis=0)
+    ds_mean  = np.mean(data_synth, axis=0)
+    ds_stdev = np.std(data_synth, axis=0)
+
+    # for g(arr)
+    dt_mean1  = np.mean(g(data_train), axis=0)
+    dt_stdev1 = np.std(g(data_train), axis=0)
+    ds_mean1  = np.mean(g(data_synth), axis=0)
+    ds_stdev1 = np.std(g(data_synth), axis=0)
+
+    # for f(arr)
+    dt_mean2  = np.mean(h(data_train), axis=0)
+    dt_stdev2 = np.std(h(data_train), axis=0)
+    ds_mean2  = np.mean(h(data_synth), axis=0)
+    ds_stdev2 = np.std(h(data_synth), axis=0)
+
+    values = [dt_mean, dt_stdev, ds_mean, ds_stdev,
+              dt_mean1, dt_stdev1, ds_mean1, ds_stdev1,
+              dt_mean2, dt_stdev2, ds_mean2, ds_stdev2]
+    return(values)
+
+def initialize_cross_products_tables():
+
+    # the core structure for fast computation when swapping 2 values
+    # 'dt' for training data, 'ds' for synth. data
+    # 'prod' is for 1st term in loss, 'prod12' for 2nd term
+
+    dt_prod = np.empty(shape=(n_features,n_features))
+    ds_prod = np.empty(shape=(n_features,n_features))
+    dt_prod12 = np.empty(shape=(n_features,n_features))
+    ds_prod12 = np.empty(shape=(n_features,n_features))
+
+    for k in range(n_features):
+        for l in range(n_features):
+            dt_prod[l, k] = np.dot(data_train[:,l], data_train[:,k])  
+            ds_prod[l, k] = np.dot(data_synth[:,l], data_synth[:,k])   
+            dt_prod12[l, k] = np.dot(g(data_train[:,l]), h(data_train[:,k])) 
+            ds_prod12[l, k] = np.dot(g(data_synth[:,l]), h(data_synth[:,k])) 
+    products = [dt_prod, ds_prod, dt_prod12, ds_prod12]
+    return(products)
+    
+
+#--- [4] loss function Part 2: managing loss function
+
+# Weights hyperparameter:
+#
+#    1st value is for 1st term in loss function, 2nd value for 2nd term
+#    each value should be between 0 and 1, all adding to 1
+#    works best when loss contributions from each term are about the same
+
+#- [4.1] loss function contribution from features (k, l) jointly
+
+# before calling functions in sections [4.1], [4.2] and [4.3], first intialize
+# by calling compute_univariate_stats() and compute_cross_products() before;
+# this initialization needs to be done only once at the beginning
+
+def get_distance(k, l, weights):
+
+    dt_prodn = dt_prod[k, l] / nobs
+    ds_prodn = ds_prod[k, l] / nobs_synth
+    dt_r = (dt_prodn - dt_mean[k]*dt_mean[l]) / (dt_stdev[k]*dt_stdev[l])
+    ds_r = (ds_prodn - ds_mean[k]*ds_mean[l]) / (ds_stdev[k]*ds_stdev[l])
+
+    dt_prodn12 = dt_prod12[k, l] / nobs  
+    ds_prodn12 = ds_prod12[k, l] / nobs_synth 
+    dt_r12 = (dt_prodn12 - dt_mean1[k]*dt_mean2[l]) / (dt_stdev1[k]*dt_stdev2[l])
+    ds_r12 = (ds_prodn12 - ds_mean1[k]*ds_mean2[l]) / (ds_stdev1[k]*ds_stdev2[l])
+
+    # dist = weights[0]*abs(dt_r - ds_r) + weights[1]*abs(dt_r12 - ds_r12) 
+    dist = max(weights[0]*abs(dt_r - ds_r), weights[1]*abs(dt_r12 - ds_r12)) 
+    return(dist, dt_r, ds_r, dt_r12, ds_r12)
+ 
+def total_distance(weights, flagParam): 
+
+    eval = 0
+    max_dist = 0
+    super_max = 0
+    lmax = n_features
+
+    for k in range(n_features):
+        if symmetric:
+            lmax = k
+        for l in range(lmax):
+            if l != k and flagParam[k] > 0 and flagParam[l] >0: 
+                values = get_distance(k, l, weights) 
+                dist2 = max(abs(values[1] - values[2]), abs(values[3] - values[4]))
+                eval += values[0]
+                if values[0] > max_dist:
+                    max_dist = values[0]
+                if dist2 > super_max:
+                    super_max = dist2
+    return(eval, max_dist, super_max)
+
+#- [4.2] updated loss function when swapping rows idx1 and idx2 in feature k
+#        contribution from feature l jointly with k
+
+def get_new_distance(k, l, idx1, idx2, weights):
+
+    tmp1_k = data_synth[idx1, k]
+    tmp2_k = data_synth[idx2, k]
+    tmp1_l = data_synth[idx1, l]
+    tmp2_l = data_synth[idx2, l]
+
+    #-- first term of loss function
+
+    remove1 = tmp1_k * tmp1_l
+    remove2 = tmp2_k * tmp2_l
+    add1 = tmp1_k * tmp2_l
+    add2 = tmp2_k * tmp1_l
+    new_ds_prod = ds_prod[l, k] - remove1 - remove2 + add1 + add2 
+
+    dt_prodn = dt_prod[k, l] / nobs   
+    ds_prodn = new_ds_prod / nobs_synth
+    dt_r = (dt_prodn - dt_mean[k]*dt_mean[l]) / (dt_stdev[k]*dt_stdev[l])
+    ds_r = (ds_prodn - ds_mean[k]*ds_mean[l]) / (ds_stdev[k]*ds_stdev[l])
+
+    #-- second term of loss function
+
+    remove1 = g(tmp1_k) * h(tmp1_l)
+    remove2 = g(tmp2_k) * h(tmp2_l)
+    add1 = g(tmp1_k) * h(tmp2_l)
+    add2 = g(tmp2_k) * h(tmp1_l)
+    new_ds_prod12 = ds_prod12[k, l] - remove1 - remove2 + add1 + add2       
+
+    dt_prodn12 = dt_prod12[k, l] / nobs         
+    ds_prodn12 = new_ds_prod12 / nobs_synth
+    dt_r12 = (dt_prodn12 - dt_mean1[k]*dt_mean2[l]) / (dt_stdev1[k]*dt_stdev2[l])
+    ds_r12 = (ds_prodn12 - ds_mean1[k]*ds_mean2[l]) / (ds_stdev1[k]*ds_stdev2[l])
+ 
+    #--
+
+    # new_dist = weights[0]*abs(dt_r - ds_r) + weights[1]*abs(dt_r12 - ds_r12)   
+    new_dist = max(weights[0]*abs(dt_r - ds_r), weights[1]*abs(dt_r12 - ds_r12)) 
+    return(new_dist, dt_r, ds_r, dt_r12, ds_r12)
+
+
+#- [4.3] update prod tables after swapping rows idx1 and idx2 in feature k
+#        update impacting feature l jointly with k
+
+def update_product(k, l, idx1, idx2):  
+    
+    tmp1_k = data_synth[idx1, k]
+    tmp2_k = data_synth[idx2, k]
+    tmp1_l = data_synth[idx1, l]
+    tmp2_l = data_synth[idx2, l]
+
+    #-- first term of loss function
+
+    remove1 = tmp1_k * tmp1_l
+    remove2 = tmp2_k * tmp2_l
+    add1 = tmp1_k * tmp2_l
+    add2 = tmp2_k * tmp1_l
+    ds_prod[k, l] = ds_prod[k, l] - remove1 - remove2 + add1 + add2
+    ds_prod[l, k] = ds_prod[k, l] 
+
+    #-- second term of loss function
+
+    remove1 = g(tmp1_k) * h(tmp1_l)
+    remove2 = g(tmp2_k) * h(tmp2_l)
+    add1 = g(tmp1_k) * h(tmp2_l)
+    add2 = g(tmp2_k) * h(tmp1_l)
+    ds_prod12[k, l] = ds_prod12[k, l] - remove1 - remove2 + add1 + add2
+
+    remove1 = h(tmp1_k) * g(tmp1_l)
+    remove2 = h(tmp2_k) * g(tmp2_l)
+    add1 = h(tmp1_k) * g(tmp2_l)
+    add2 = h(tmp2_k) * g(tmp1_l)
+    ds_prod12[l, k] = ds_prod12[l, k] - remove1 - remove2 + add1 + add2
+
+    return()
+
+
+#--- [5] feature sampling 
+
+def sample_feature(mode, hyperParameter):
+    
+    # Randomly pick up one column (a feature) to swap 2 values from 2 random rows 
+    # One feature is assumed to be in the right order, thus ignored
+
+    if mode == 'Equalized': 
+        u = np.random.uniform(0, 1)
+        cutoff = hyperParam[0]
+        feature = 0
+        while cutoff < u:
+            feature += 1
+            cutoff += hyperParam[feature]
+    else:
+        feature = np.random.randint(1, n_features)  # ignore feature 0
+    return(feature)
+
+
+#--- [6] functions: deep synthetization, plot history, print stats 
+
+#- [6.1] main function
+ 
+def deep_resampling(hyperParameter, run, loss_type, n_batches, 
+                    n_iter, nobs_synth, weights, flagParam, mode):
+  
+    # main function
+
+    batch = 0
+    batch_size = nobs_synth // n_batches  
+    niter_per_batch = n_iter // n_batches
+    lower_row = 0
+    upper_row = batch_size
+    nswaps = 0
+    cgain = 0  # cumulative gain
+
+    arr_swaps = []
+    arr_history_quality = []
+    arr_history_max_dist = []
+    arr_time = []
+    print()
+
+    for iter in range(n_iter): 
+
+        k = sample_feature(mode, hyperParameter)    
+        batch = iter // niter_per_batch
+        lower_row = batch * batch_size
+        upper_row = lower_row + batch_size 
+        idx1 = np.random.randint(lower_row, upper_row) % nobs_synth
+        tmp1 = data_synth[idx1, k]
+        tmp2 = tmp1
+        counter = 0
+        while tmp2 == tmp1 and counter < 20:  
+            idx2 = np.random.randint(lower_row, upper_row) % nobs_synth
+            tmp2 = data_synth[idx2, k]
+            counter += 1
+
+        g_param = 0.5
+        h_param = g_param
+
+        delta = 0
+        delta2 = 0
+        for l in range(n_features):  
+            if l != k and flagParam[l] > 0: 
+                values = get_distance(k, l, weights)
+                delta += values[0] 
+                if values[0] > delta2:
+                    delta2 = values[0]
+                if not symmetric:  # if functions g, h are different
+                    values = get_distance(l, k, weights)
+                    delta += values[0] 
+                    if values[0] > delta2:
+                        delta2 = values[0]
+
+        new_delta = 0
+        new_delta2 = 0
+        for l in range(n_features): 
+            if l != k  and flagParam[l] > 0: 
+                values = get_new_distance(k, l, idx1, idx2, weights)
+                new_delta += values[0] 
+                if values[0] > new_delta2:
+                    new_delta2 = values[0]
+                if not symmetric:  # if functions g, h are different
+                    values = get_new_distance(l, k, idx1, idx2, weights)
+                    new_delta += values[0]
+                    if values[0] > new_delta2:
+                        new_delta2 = values[0]
+
+        if loss_type == 'sum_loss':
+            gain = delta - new_delta
+        elif loss_type == 'max_loss':
+            gain = delta2 - new_delta2
+        if gain > 0: 
+            cgain += gain
+            for l in range(n_features):
+                if l != k:
+                    update_product(k, l, idx1, idx2) 
+                    # update_product(l, k, idx1, idx2) 
+            data_synth[idx1, k] = tmp2
+            data_synth[idx2, k] = tmp1
+            nswaps += 1
+
+        if iter % 500 == 0: 
+            quality, max_dist, super_max = total_distance(weights, flagParam)
+            arr_swaps.append(nswaps)
+            arr_history_quality.append(quality)
+            arr_history_max_dist.append(max_dist)
+            arr_time.append(iter)
+            if iter % 5000 == 0:
+                print("Iter: %6d    Distance: %8.4f    SupDist: %8.4f    Gain: %8.4f    Swaps: %6d"  
+                        %(iter, quality, super_max, cgain, nswaps)) 
+
+    return(nswaps, arr_swaps, arr_history_quality, arr_history_max_dist, arr_time)
+
+#- [6.2] save synthetic data, show some stats
+
+def evaluate_and_save(filename, weights, run, flagParam): 
+
+    print("\nMetrics after deep resampling\n")
+    quality, max_dist, super_max = total_distance(weights, flagParam)
+    print("Distance: %8.4f" %(quality)) 
+    print("Max Dist: %8.4f" %(max_dist)) 
+
+    data_synthetic = pd.DataFrame(data_synth, columns = features)
+    data_synthetic.to_csv(filename)
+    print("\nSynthetic data, first 10 rows\n",data_synthetic.head(10))
+
+    print("\nBivariate feature correlation:")
+    print("....dt_xx for training set, ds_xx for synthetic data")
+    print("....xx_r for correl[k, l], xx_r12 for correl[g(k), h(l)]\n")
+    print("%2s %2s %8s %8s %8s %8s %8s" 
+             % ('k', 'l', 'dist', 'dt_r', 'ds_r', 'dt_r12', 'ds_r12'))
+    print("--------------------------------------------------")
+    for k in range(n_features):
+        for l in range(n_features):
+            condition = (flagParam[k] >0 and flagParam[l] > 0) 
+            if k != l and condition:
+                values = get_distance(l, k, weights)
+                dist = values[0]
+                dt_r = values[1]    # training, 1st term of loss function
+                ds_r = values[2]    # synth., 1st term of loss function
+                dt_r12 = values[3]  # training, 2nd term of loss function
+                ds_r12 = values[4]  # synth., 2nd term of loss function
+                print("%2d %2d %8.4f %8.4f %8.4f %8.4f %8.4f" 
+                       % (k, l, dist, dt_r, ds_r, dt_r12, ds_r12)) 
+    return()
+
+#- [6.3] plot history of loss function, and cumulated number of swaps
+
+def plot_history(history):
+
+    arr_swaps = history[1]
+    arr_history_quality = history[2]
+    arr_history_max_dist = history[3]
+    arr_time = history[4]
+
+    mpl.rcParams['axes.linewidth'] = 0.3
+    plt.rc('xtick',labelsize=7)
+    plt.rc('ytick',labelsize=7)
+    plt.xticks(fontsize=7)
+    plt.yticks(fontsize=7)
+    plt.subplot(1, 2, 1)
+    plt.plot(arr_time, arr_swaps, linewidth = 0.3)
+    plt.legend(['cumulated swaps'], fontsize="7", 
+        loc ="upper center", ncol=1)
+    plt.subplot(1, 2, 2)
+    plt.plot(arr_time, arr_history_quality, linewidth = 0.3)
+    # plt.plot(arr_time, arr_history_max_dist, linewidth = 0.3)
+    plt.legend(['distance'], fontsize="7", 
+        loc ="upper center", ncol=1)
+    plt.show()
+    return()
+
+
+#--- [7] initializations 
+
+#- create intitial synthetization 
+
+nobs_synth = 2000 
+data_synth = create_initial_synth(nobs_synth)
+
+#- specify 2nd part of loss function (argument is a number or array)
+
+# do not use g(arr) = f(arr) = arr: this is pre-built already as 1st term in loss fct
+# these two functions f, g are for the second term in the loss function
+
+def g(arr):
+    return(1/(0.01 + np.absolute(arr)))
+def h(arr):
+    return(1/(0.01 + np.absolute(arr)))  
+
+symmetric = True # set to True if functions g and h are identical
+# 'symmetric = True' twice as fast as 'symmetric = False'
+
+#- initializations: product tables and univariate stats
+
+products = initialize_cross_products_tables()
+dt_prod   = products[0] 
+ds_prod   = products[1] 
+dt_prod12 = products[2] 
+ds_prod12 = products[3]
+
+values = compute_univariate_stats()
+dt_mean   = values[0] 
+dt_stdev  = values[1]
+ds_mean   = values[2]
+ds_stdev  = values[3]
+dt_mean1  = values[4]
+dt_stdev1 = values[5]
+ds_mean1  = values[6] 
+ds_stdev1 = values[7]
+dt_mean2  = values[8] 
+dt_stdev2 = values[9] 
+ds_mean2  = values[10] 
+ds_stdev2 = values[11]
+
+
+#--- [8] deep resampling 
+
+mode = 'Equalized'   # options: 'Standard', 'Equalized' 
+eps2 = 0.0  
+
+#- first run
+
+run = 1
+n_iter = 500001
+n_batches = 8
+loss_type = 'sum_loss'  # options: 'max_loss' or 'sum_loss' 
+weights = [0.90, 0.10]  
+hyperParam = np.zeros(len(features))
+hyperParam[8]  = 1
+hyperParam[10] = 1
+hyperParam[11] = 1
+hyperParam[12] = 1
+hyperParam[13] = 1
+hyperParam[14] = 1
+hyperParam[15] = 1
+hyperParam[16] = 1
+hyperParam = hyperParam / np.sum(hyperParam)
+flagParam = np.ones(len(features))  
+history = deep_resampling(hyperParam, run, loss_type, n_batches, n_iter, 
+                          nobs_synth, weights, flagParam, mode)
+evaluate_and_save('synth_vg2.csv', weights, run, flagParam)
+plot_history(history)
+
+#- second run
+
+run = 2
+n_iter = 500001
+n_batches = 8
+loss_type = 'sum_loss'  # options: 'max_loss' or 'sum_loss' 
+weights = [0.90, 0.10]  
+hyperParam = np.ones(len(features))
+hyperParam[8]  = 0
+hyperParam[10] = 0
+hyperParam[11] = 0
+hyperParam[12] = 0
+hyperParam[13] = 0
+hyperParam[14] = 0
+hyperParam[15] = 0
+hyperParam[16] = 0
+hyperParam = hyperParam / np.sum(hyperParam)
+flagParam = np.ones(len(features))   
+history = deep_resampling(hyperParam, run, loss_type, n_batches, n_iter, 
+                          nobs_synth, weights, flagParam, mode)
+evaluate_and_save('synth_vg2.csv', weights, run, flagParam)
+plot_history(history)
+
+
+#--- [9] Evaluation synthetization using joint ECDF & Kolmogorov-Smirnov distance
+
+#        dataframes: df = synthetic; data = real data,
+#        compute multivariate ecdf on validation set, sort it by value (from 0 to 1) 
+
+print("\nMultivariate ECDF computations...\n")
+n_nodes = 1000   # number of random locations in feature space, where ecdf is computed
+seed = 50
+np.random.seed(seed) 
+
+df_validation = pd.DataFrame(data_validation, columns = features)
+df_synthetic = pd.DataFrame(data_synth, columns = features)
+df_training = pd.DataFrame(data_train, columns = features) 
+query_lst, ecdf_val, ecdf_synth = ge.multivariate_ecdf(df_validation, df_synthetic, n_nodes, verbose = True) 
+query_lst, ecdf_val, ecdf_train = ge.multivariate_ecdf(df_validation, df_training, n_nodes, verbose = True) 
+
+ks = ge.ks_statistic(ecdf_val, ecdf_synth)
+ks_base = ge.ks_statistic(ecdf_val, ecdf_train)
+print("Test ECDF Kolmogorof-Smirnov dist. (synth. vs valid.): %6.4f" %(ks))
+print("Base ECDF Kolmogorof-Smirnov dist. (train. vs valid.): %6.4f" %(ks_base))
+
+
+#--- [10] visualizations (based on MatPlotLib version: 3.7.1)
+
+def vg_scatter(df, feature1, feature2, counter):
+
+    # customized plots, subplot position based on counter
+
+    label = feature1 + " vs " + feature2
+    x = df[feature1].to_numpy()
+    y = df[feature2].to_numpy()
+    plt.subplot(3, 2, counter)
+    plt.scatter(x, y, s = 0.1, c ="blue")
+    plt.xlabel(label, fontsize = 7)
+    plt.xticks([])
+    plt.yticks([])
+    #plt.ylim(0,70000)
+    #plt.xlim(18,64)
+    return()
+
+def vg_histo(df, feature, counter):
+
+    # customized plots, subplot position based on counter
+
+    y = df[feature].to_numpy()
+    plt.subplot(2, 3, counter)
+    min = np.min(y)
+    max = np.max(y)
+    binBoundaries = np.linspace(min, max, 30)
+    plt.hist(y, bins=binBoundaries, color='white', align='mid',edgecolor='red',
+              linewidth = 0.3) 
+    plt.xlabel(feature, fontsize = 7)
+    plt.xticks([])
+    plt.yticks([])
+    return()
+
+mpl.rcParams['axes.linewidth'] = 0.3
+
+#- [10.1] scatterplots 
+
+dfs = pd.read_csv('synth_vg2.csv')
+dfv = pd.read_csv('validation_vg2.csv')
+vg_scatter(dfs, features[0], features[1], 1)
+vg_scatter(dfv, features[0], features[1], 2)
+vg_scatter(dfs, features[0], features[2], 3)
+vg_scatter(dfv, features[0], features[2], 4)
+vg_scatter(dfs, features[1], features[2], 5)
+vg_scatter(dfv, features[1], features[2], 6)
+plt.show()
+
+#- [10.2] histograms
+
+dfs = pd.read_csv('synth_vg2.csv')
+dfv = pd.read_csv('validation_vg2.csv')
+vg_histo(dfs, features[0], 1)
+vg_histo(dfs, features[1], 2)
+vg_histo(dfs, features[2], 3)
+vg_histo(dfv, features[0], 4)
+vg_histo(dfv, features[1], 5)
+vg_histo(dfv, features[2], 6)
+plt.show()