Skip to content

Commit 8f698dc

Browse files
committed
Completed plot pipeline.
1 parent 7bfff2a commit 8f698dc

File tree

6 files changed

+126
-59
lines changed

6 files changed

+126
-59
lines changed

autoencoders/standard_AE.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def reconstruct_variables(sess=None, op=None, data=None):
8686
# run the trained AE for predictions on the test data
8787
reconstructed_data = sess.run(op, feed_dict={x_input: data})
8888
print('Reconstructed data shape: {}'.format(reconstructed_data.shape))
89-
# We are going to plot the reconstructed data below
89+
return reconstructed_data
9090

9191

9292
def train(train_model=True, train_data=None, test_data=None):
@@ -143,8 +143,9 @@ def train(train_model=True, train_data=None, test_data=None):
143143

144144
# print("Saved Model Path: {}".format(saved_model_path))
145145

146-
if recontruct == True:
147-
reconstruct_variables(sess=sess, op=decoder_output, data=test_data)
146+
if recontruct:
147+
reconstructed_data = reconstruct_variables(sess=sess, op=decoder_output, data=test_data)
148+
return reconstructed_data
148149

149150
else:
150151
all_results = os.listdir(results_path)

create_plots.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import os
2+
import matplotlib.pyplot as plt
3+
import pandas as pd
4+
import numpy as np
5+
6+
7+
def plot_initial_data(input_data):
8+
save_dir = "D:\Desktop\GSoC-ATLAS\initial_data_plots"
9+
if not os.path.exists(save_dir):
10+
os.makedirs(save_dir)
11+
12+
variable_list = ['pt_', 'eta_', 'phi_', 'mass_',
13+
'fX', 'fY', 'fZ', 'mJetArea',
14+
'mPileupEnergy', 'mChargedHadronEnergy', 'mNeutralHadronEnergy',
15+
'mPhotonEnergy',
16+
'mElectronEnergy', 'mMuonEnergy', 'mHFHadronEnergy',
17+
'mHFEMEnergy', 'mChargedHadronMultiplicity',
18+
'mNeutralHadronMultiplicity',
19+
'mPhotonMultiplicity', 'mElectronMultiplicity',
20+
'mMuonMultiplicity',
21+
'mHFHadronMultiplicity', 'mHFEMMultiplicity', 'mChargedEmEnergy',
22+
'mChargedMuEnergy', 'mNeutralEmEnergy', 'mChargedMultiplicity',
23+
'mNeutralMultiplicity']
24+
25+
prefix = 'ak5PFJets_'
26+
n_bins = 100
27+
save = True # Option to save figure
28+
29+
branches = [prefix + 'pt_', prefix + 'eta_', prefix + 'phi_', prefix + 'mass_',
30+
prefix + 'fX', prefix + 'fY', prefix + 'fZ', prefix + 'mJetArea',
31+
prefix + 'mPileupEnergy', prefix + 'mChargedHadronEnergy', prefix + 'mNeutralHadronEnergy',
32+
prefix + 'mPhotonEnergy',
33+
prefix + 'mElectronEnergy', prefix + 'mMuonEnergy', prefix + 'mHFHadronEnergy',
34+
prefix + 'mHFEMEnergy', prefix + 'mChargedHadronMultiplicity',
35+
prefix + 'mNeutralHadronMultiplicity',
36+
prefix + 'mPhotonMultiplicity', prefix + 'mElectronMultiplicity',
37+
prefix + 'mMuonMultiplicity',
38+
prefix + 'mHFHadronMultiplicity', prefix + 'mHFEMMultiplicity', prefix + 'mChargedEmEnergy',
39+
prefix + 'mChargedMuEnergy', prefix + 'mNeutralEmEnergy', prefix + 'mChargedMultiplicity',
40+
prefix + 'mNeutralMultiplicity']
41+
42+
for kk in range(0, 28):
43+
if branches[kk] == prefix + 'pt_' or branches[kk] == prefix + 'mass_':
44+
n_hist_data, bin_edges, _ = plt.hist(input_data[branches[kk]], color='orange', label='Input', alpha=1,
45+
bins=n_bins, log=True)
46+
plt.xlabel(xlabel=variable_list[kk])
47+
plt.ylabel('# of jets')
48+
elif branches[kk] == prefix + 'phi_':
49+
n_hist_data, bin_edges, _ = plt.hist(input_data[branches[kk]], color='orange', label='Input', alpha=1,
50+
bins=50)
51+
plt.xlabel(xlabel=variable_list[kk])
52+
plt.ylabel('# of jets')
53+
else:
54+
n_hist_data, bin_edges, _ = plt.hist(input_data[branches[kk]], color='orange', label='Input', alpha=1,
55+
bins=n_bins)
56+
plt.xlabel(xlabel=variable_list[kk])
57+
plt.ylabel('# of jets')
58+
plt.show()
59+
if save:
60+
plt.savefig(os.path.join(save_dir, variable_list[kk] + '.png'))
61+
62+
63+
def plot_test_pred_data(test_data, predicted_data):
64+
save_dir = "D:\Desktop\GSoC-ATLAS\AE_plots"
65+
if not os.path.exists(save_dir):
66+
os.makedirs(save_dir)
67+
68+
variable_list = ['pt_', 'eta_', 'phi_', 'mass_',
69+
'fX', 'fY', 'fZ', 'mJetArea',
70+
'mPileupEnergy', 'mChargedHadronEnergy', 'mNeutralHadronEnergy',
71+
'mPhotonEnergy',
72+
'mElectronEnergy', 'mMuonEnergy', 'mHFHadronEnergy',
73+
'mHFEMEnergy', 'mChargedHadronMultiplicity',
74+
'mNeutralHadronMultiplicity',
75+
'mPhotonMultiplicity', 'mElectronMultiplicity',
76+
'mMuonMultiplicity',
77+
'mHFHadronMultiplicity', 'mHFEMMultiplicity', 'mChargedEmEnergy',
78+
'mChargedMuEnergy', 'mNeutralEmEnergy', 'mChargedMultiplicity',
79+
'mNeutralMultiplicity']
80+
81+
colors = ['pink', 'green']
82+
prefix = 'ak5PFJets_'
83+
n_bins = 100
84+
save = True # Option to save figure
85+
86+
#predicted_data = predicted_data.detach().numpy()
87+
test_data = test_data.values
88+
89+
# plot the input data along with the reconstructed from the AE
90+
for kk in np.arange(28):
91+
plt.figure()
92+
n_hist_data, bin_edges, _ = plt.hist(test_data[:, kk], color=colors[1], label='Input', alpha=1, bins=n_bins)
93+
n_hist_pred, _, _ = plt.hist(predicted_data[:, kk], color=colors[0], label='Output', alpha=0.8, bins=bin_edges)
94+
plt.suptitle(variable_list[kk])
95+
plt.xlabel(xlabel=variable_list[kk])
96+
plt.ylabel('Number of jets')
97+
# ms.sciy()
98+
plt.yscale('log')
99+
plt.legend()
100+
if save:
101+
plt.savefig(os.path.join(save_dir, variable_list[kk] + '.png'))
102+
103+
104+
# plot(data_df = pd.read_csv('27D_openCMS_preprocessed_data.csv'))

data_loader.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def load_cms_data(filename="open_cms_data.root"):
3131
prefix + 'm_specific.mChargedEmEnergy', prefix + 'm_specific.mChargedMuEnergy', prefix + 'm_specific.mNeutralEmEnergy',
3232
prefix + 'm_specific.mChargedMultiplicity', prefix + 'm_specific.mNeutralMultiplicity'], library="pd")
3333

34-
prefix2 = 'ak5PFJets.'
34+
prefix2 = 'ak5PFJets_'
3535
# Rename the column names to be shorter
3636
dataframe.columns = [prefix2 + 'pt_', prefix2 + 'eta_', prefix2 + 'phi_', prefix2 + 'mass_',
3737
prefix2 + 'fX', prefix2 + 'fY', prefix2 + 'fZ', prefix2 + 'mJetArea', prefix2 + 'mPileupEnergy',
@@ -45,6 +45,7 @@ def load_cms_data(filename="open_cms_data.root"):
4545

4646

4747
print("\nDataframe:")
48+
dataframe.sort_values(by=[prefix2 + 'pt_'])
4849
print(dataframe.head())
4950
dataframe.to_csv('27D_openCMS_data.csv')
5051
return dataframe

data_preprocessing.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@
44
from sklearn.utils import shuffle
55

66

7-
def preprocess(filename='27D_openCMS_data.csv'):
8-
data_df = pd.read_csv(filename)
7+
def preprocess(data_df):
8+
99
data_df = data_df.drop(['entry', 'subentry'], axis=1)
10-
# data_df = data_df.astype('float32')
10+
# data_df = data_df.sort_values(by=['ak5PFJets_pt_'])
1111

12+
# filter out jets having pT > 8 TeV
13+
data_df = data_df[data_df.ak5PFJets_pt_ < 8000]
14+
1215
# Standardize our data using Standard Scalar from sklearn
1316
data_df[data_df.columns] = StandardScaler().fit_transform(data_df)
1417
print('Normalized data:')
@@ -27,6 +30,8 @@ def preprocess(filename='27D_openCMS_data.csv'):
2730

2831
data_df.to_csv('27D_openCMS_preprocessed_data.csv')
2932

30-
return train_set, test_set
33+
return data_df, train_set, test_set
34+
3135

36+
#data_df, train_set, test_set = preprocess(data_df=pd.read_csv('27D_openCMS_data.csv'))
3237

main.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
from data_preprocessing import preprocess
22
from autoencoders import standard_AE
3+
from create_plots import plot_initial_data, plot_test_pred_data
4+
import pandas as pd
35
from data_loader import load_cms_data
46

57
#cms_data_df = load_cms_data(filename="open_cms_data.root")
6-
train_data, test_data = preprocess()
8+
data_df = pd.read_csv('27D_openCMS_data.csv')
79

8-
standard_AE.train(train_data=train_data, test_data=test_data)
10+
data_df, train_data, test_data = preprocess(data_df)
11+
#plot_initial_data(data_df)
12+
reconstructed_data = standard_AE.train(train_data=train_data, test_data=test_data)
13+
plot_test_pred_data(test_data, reconstructed_data)

plot_data.py

Lines changed: 0 additions & 49 deletions
This file was deleted.

0 commit comments

Comments
 (0)