Skip to content

Commit d9a34d3

Browse files
committed
Added RMSE and Residual metrics + Code optimization
1 parent 1fa8900 commit d9a34d3

File tree

5 files changed

+98
-46
lines changed

5 files changed

+98
-46
lines changed

autoencoders/autoencoder.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,14 @@ def train(self, test_set, epochs):
6464

6565
start = time.perf_counter() # Starts timer
6666
# train our autoencoder
67-
learn.fit_one_cycle(epochs, 0.014, cbs=[ShowGraphCallback()])
67+
learn.fit_one_cycle(epochs, lr_min, cbs=[ShowGraphCallback()])
6868
end = time.perf_counter() # Ends timer
6969
delta_t = end - start
7070
print('Training took', delta_t, 'seconds')
7171

7272
plt.figure()
7373
recorder.plot_loss()
7474
plt.show()
75-
print(learn.validate())
7675

7776
data = torch.tensor(test_set.values, dtype=torch.float)
7877

create_plots.py

Lines changed: 62 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,45 +6,71 @@
66
#import arviz as az
77

88

9-
def plot_initial_data(input_data, normalized=False):
9+
def plot_initial_data(input_data, num_variables, normalized=False):
1010
input_data = input_data.sort_values(by=['ak5PFJets_pt_'])
1111

1212
if normalized:
1313
save_dir = "D:\Desktop\GSoC-ATLAS\preprocessed_data_plots"
1414
else:
1515
save_dir = "D:\Desktop\GSoC-ATLAS\initial_data_plots"
16+
17+
prefix = 'ak5PFJets_'
18+
19+
if num_variables == 24:
20+
save_dir = "D:\Desktop\GSoC-ATLAS\preprocessed_data_plots\d24"
21+
22+
variable_list = ['pt_', 'eta_', 'phi_', 'mass_', 'mJetArea',
23+
'mChargedHadronEnergy', 'mNeutralHadronEnergy',
24+
'mPhotonEnergy',
25+
'mElectronEnergy', 'mMuonEnergy', 'mHFHadronEnergy',
26+
'mHFEMEnergy', 'mChargedHadronMultiplicity',
27+
'mNeutralHadronMultiplicity',
28+
'mPhotonMultiplicity', 'mElectronMultiplicity',
29+
'mMuonMultiplicity',
30+
'mHFHadronMultiplicity', 'mHFEMMultiplicity', 'mChargedEmEnergy',
31+
'mChargedMuEnergy', 'mNeutralEmEnergy', 'mChargedMultiplicity',
32+
'mNeutralMultiplicity']
33+
34+
branches = [prefix + 'pt_', prefix + 'eta_', prefix + 'phi_', prefix + 'mass_',
35+
prefix + 'mJetArea', prefix + 'mChargedHadronEnergy', prefix + 'mNeutralHadronEnergy',
36+
prefix + 'mPhotonEnergy',
37+
prefix + 'mElectronEnergy', prefix + 'mMuonEnergy', prefix + 'mHFHadronEnergy',
38+
prefix + 'mHFEMEnergy', prefix + 'mChargedHadronMultiplicity',
39+
prefix + 'mNeutralHadronMultiplicity',
40+
prefix + 'mPhotonMultiplicity', prefix + 'mElectronMultiplicity',
41+
prefix + 'mMuonMultiplicity',
42+
prefix + 'mHFHadronMultiplicity', prefix + 'mHFEMMultiplicity', prefix + 'mChargedEmEnergy',
43+
prefix + 'mChargedMuEnergy', prefix + 'mNeutralEmEnergy', prefix + 'mChargedMultiplicity',
44+
prefix + 'mNeutralMultiplicity']
45+
else:
46+
save_dir = "D:\Desktop\GSoC-ATLAS\preprocessed_data_plots\d19"
47+
48+
variable_list = ['pt_', 'eta_', 'phi_', 'mass_', 'mJetArea',
49+
'mChargedHadronEnergy', 'mNeutralHadronEnergy',
50+
'mPhotonEnergy', 'mHFHadronEnergy',
51+
'mHFEMEnergy', 'mChargedHadronMultiplicity',
52+
'mNeutralHadronMultiplicity',
53+
'mPhotonMultiplicity', 'mElectronMultiplicity',
54+
'mHFHadronMultiplicity', 'mHFEMMultiplicity', 'mNeutralEmEnergy', 'mChargedMultiplicity',
55+
'mNeutralMultiplicity']
56+
57+
branches = [prefix + 'pt_', prefix + 'eta_', prefix + 'phi_', prefix + 'mass_',
58+
prefix + 'mJetArea', prefix + 'mChargedHadronEnergy', prefix + 'mNeutralHadronEnergy',
59+
prefix + 'mPhotonEnergy', prefix + 'mHFHadronEnergy',
60+
prefix + 'mHFEMEnergy', prefix + 'mChargedHadronMultiplicity',
61+
prefix + 'mNeutralHadronMultiplicity',
62+
prefix + 'mPhotonMultiplicity', prefix + 'mElectronMultiplicity',
63+
prefix + 'mHFHadronMultiplicity', prefix + 'mHFEMMultiplicity',
64+
prefix + 'mNeutralEmEnergy', prefix + 'mChargedMultiplicity',
65+
prefix + 'mNeutralMultiplicity']
66+
1667
if not os.path.exists(save_dir):
1768
os.makedirs(save_dir)
1869

19-
variable_list = ['pt_', 'eta_', 'phi_', 'mass_', 'mJetArea',
20-
'mChargedHadronEnergy', 'mNeutralHadronEnergy',
21-
'mPhotonEnergy',
22-
'mElectronEnergy', 'mMuonEnergy', 'mHFHadronEnergy',
23-
'mHFEMEnergy', 'mChargedHadronMultiplicity',
24-
'mNeutralHadronMultiplicity',
25-
'mPhotonMultiplicity', 'mElectronMultiplicity',
26-
'mMuonMultiplicity',
27-
'mHFHadronMultiplicity', 'mHFEMMultiplicity', 'mChargedEmEnergy',
28-
'mChargedMuEnergy', 'mNeutralEmEnergy', 'mChargedMultiplicity',
29-
'mNeutralMultiplicity']
30-
31-
prefix = 'ak5PFJets_'
3270
n_bins = 100
3371
save = True # Option to save figure
3472

35-
branches = [prefix + 'pt_', prefix + 'eta_', prefix + 'phi_', prefix + 'mass_',
36-
prefix + 'mJetArea', prefix + 'mChargedHadronEnergy', prefix + 'mNeutralHadronEnergy',
37-
prefix + 'mPhotonEnergy',
38-
prefix + 'mElectronEnergy', prefix + 'mMuonEnergy', prefix + 'mHFHadronEnergy',
39-
prefix + 'mHFEMEnergy', prefix + 'mChargedHadronMultiplicity',
40-
prefix + 'mNeutralHadronMultiplicity',
41-
prefix + 'mPhotonMultiplicity', prefix + 'mElectronMultiplicity',
42-
prefix + 'mMuonMultiplicity',
43-
prefix + 'mHFHadronMultiplicity', prefix + 'mHFEMMultiplicity', prefix + 'mChargedEmEnergy',
44-
prefix + 'mChargedMuEnergy', prefix + 'mNeutralEmEnergy', prefix + 'mChargedMultiplicity',
45-
prefix + 'mNeutralMultiplicity']
46-
47-
for kk in range(0, 24):
73+
for kk in range(0, num_variables):
4874
if branches[kk] == prefix + 'pt_' or branches[kk] == prefix + 'mass_':
4975
n_hist_data, bin_edges, _ = plt.hist(input_data[branches[kk]], color='orange', label='Input', alpha=1,
5076
bins=n_bins, log=True)
@@ -73,12 +99,12 @@ def plot_initial_data(input_data, normalized=False):
7399

74100

75101
def plot_test_pred_data(test_data, predicted_data, num_variables, vae=False):
76-
if vae:
77-
save_dir = "D:\Desktop\GSoC-ATLAS\VAE_plots"
78-
79102

80103
if num_variables == 24:
81-
save_dir = "D:\Desktop\GSoC-ATLAS\AE_plots\d24"
104+
if vae:
105+
save_dir = "D:\Desktop\GSoC-ATLAS\VAE_plots\d24"
106+
else:
107+
save_dir = "D:\Desktop\GSoC-ATLAS\AE_plots\d24"
82108

83109
variable_list = ['pt_', 'eta_', 'phi_', 'mass_', 'mJetArea',
84110
'mChargedHadronEnergy', 'mNeutralHadronEnergy',
@@ -92,7 +118,11 @@ def plot_test_pred_data(test_data, predicted_data, num_variables, vae=False):
92118
'mChargedMuEnergy', 'mNeutralEmEnergy', 'mChargedMultiplicity',
93119
'mNeutralMultiplicity']
94120
else:
95-
save_dir = "D:\Desktop\GSoC-ATLAS\AE_plots\d19"
121+
if vae:
122+
save_dir = "D:\Desktop\GSoC-ATLAS\VAE_plots\d19"
123+
else:
124+
save_dir = "D:\Desktop\GSoC-ATLAS\AE_plots\d19"
125+
96126
variable_list = ['pt_', 'eta_', 'phi_', 'mass_', 'mJetArea',
97127
'mChargedHadronEnergy', 'mNeutralHadronEnergy',
98128
'mPhotonEnergy', 'mHFHadronEnergy',
@@ -106,7 +136,6 @@ def plot_test_pred_data(test_data, predicted_data, num_variables, vae=False):
106136
os.makedirs(save_dir)
107137

108138
colors = ['pink', 'green']
109-
prefix = 'ak5PFJets_'
110139
n_bins = 100
111140
save = True # Option to save figure
112141

@@ -127,8 +156,6 @@ def plot_test_pred_data(test_data, predicted_data, num_variables, vae=False):
127156
plt.savefig(os.path.join(save_dir, variable_list[kk] + '.png'))
128157

129158

130-
# plot(data_df = pd.read_csv('27D_openCMS_preprocessed_data.csv'))
131-
132159
def plot_4D_data(test_data, predicted_data):
133160

134161
save_dir = "D:\Desktop\GSoC-ATLAS\AE_4D_plots"

data_processing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"""
1010

1111

12-
def preprocess_28D(data_df, num_variables, min_max_all):
12+
def preprocess_28D(data_df, num_variables, custom_norm):
1313
data_df = data_df.drop(['entry', 'subentry'], axis=1)
1414
data_df = data_df.sort_values(by=['ak5PFJets_pt_'])
1515

@@ -29,7 +29,7 @@ def preprocess_28D(data_df, num_variables, min_max_all):
2929
# data_df[data_df.columns] = scaler.fit_transform(data_df)
3030
min_max_scaler = MinMaxScaler()
3131

32-
if min_max_all:
32+
if not custom_norm:
3333
# Normalize all variables in the range (0, 1) using MinMax Scaler from sklearn
3434
data_df[data_df.columns] = min_max_scaler.fit_transform(data_df)
3535
else:

evaluate.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import numpy as np
2+
from sklearn.metrics import mean_squared_error
3+
4+
5+
def evaluate_model(y_true, y_predicted):
6+
# MSE
7+
mse = mean_squared_error(y_true, y_predicted)
8+
# RMSE
9+
rmse = mean_squared_error(y_true, y_predicted, squared=False)
10+
# Residuals of the different variables [(output-input)/input]
11+
residuals = np.absolute((y_true - y_predicted))
12+
residuals = residuals.sum()/residuals.size
13+
14+
print("---------------------------")
15+
print("MSE: {:.6f}".format(mse))
16+
print("RMSE: {:.6f}".format(rmse))
17+
print("Residual: {:.6f}".format(residuals))
18+
print("---------------------------")

main.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import autoencoders.autoencoder as ae
33
import autoencoders.variational_autoencoder as vae
44
from create_plots import plot_initial_data, plot_test_pred_data, plot_4D_data, plot_residuals, correlation_plots
5+
from evaluate import evaluate_model
56
import pandas as pd
67
from data_loader import load_cms_data
78

@@ -10,22 +11,29 @@
1011
use_vae = False
1112
# cms_data_df = load_cms_data(filename="open_cms_data.root")
1213
data_df = pd.read_csv('27D_openCMS_data.csv')
13-
14+
custom_norm = False
15+
num_of_variables = 24
1416
# Plot the original data
1517
#plot_initial_data(input_data=data_df)
1618

1719
if openCMS_data:
18-
data_df, train_data, test_data, scaler = preprocess_28D(data_df=data_df, num_variables=24, min_max_all=False)
19-
data_df = data_df.sort_values(by=['ak5PFJets_pt_'])
20+
21+
# Preprocess data
22+
data_df, train_data, test_data, scaler = preprocess_28D(data_df=data_df, num_variables=num_of_variables, custom_norm=custom_norm)
2023
# Plot preprocessed data
21-
plot_initial_data(input_data=data_df, normalized=True)
24+
#plot_initial_data(input_data=data_df, num_variables=num_of_variables, normalized=True)
2225

23-
# Run the Autoencoder and obtain the reconstructed data
24-
standard_ae = ae.Autoencoder(train_data, test_data, num_variables=24)
26+
# Initial the Autoencoder
27+
standard_ae = ae.Autoencoder(train_data, test_data, num_variables=num_of_variables)
2528

29+
# Train the standard Autoencoder and obtain the reconstructions
2630
test_data, reconstructed_data = standard_ae.train(test_data, epochs=30)
2731

28-
plot_test_pred_data(test_data, reconstructed_data, num_variables=24)
32+
# Evaluate the reconstructions of the network based on various metrics
33+
evaluate_model(y_true=test_data, y_predicted=reconstructed_data)
34+
35+
# Plot the reconstructed along with the initial data
36+
plot_test_pred_data(test_data, reconstructed_data, num_variables=num_of_variables)
2937

3038
else:
3139
# Dark machines data

0 commit comments

Comments
 (0)