Skip to content

Commit 7acf174

Browse files
committed
Started Implementation of Standard Autoencoder
1 parent fc10e13 commit 7acf174

File tree

6 files changed

+150
-10
lines changed

6 files changed

+150
-10
lines changed

autoencoders/__init__.py

Whitespace-only changes.

autoencoders/standard_AE.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import pandas as pd
2+
import tensorflow as tf
3+
import numpy as np
4+
import datetime
5+
import os
6+
import matplotlib.pyplot as plt
7+
from data_preprocessing import preprocess
8+
from sklearn.utils import shuffle
9+
10+
11+
# Parameters
12+
input_dim = 28
13+
hidden_size1 = 100
14+
hidden_size2 = 100
15+
z_dim = 20
16+
17+
batch_size = 100
18+
n_epochs = 1000
19+
learning_rate = 0.001
20+
beta1 = 0.9
21+
results_path = './autoencoders/Results/Standard_AE'
22+
saved_model_path = results_path + '/Saved_models/'
23+
24+
# Placeholders for input data and the targets
25+
x_input = tf.placeholder(dtype=tf.float32, shape=[batch_size, input_dim], name='Input')
26+
x_target = tf.placeholder(dtype=tf.float32, shape=[batch_size, input_dim], name='Target')
27+
decoder_input = tf.placeholder(dtype=tf.float32, shape=[1, z_dim], name='Decoder_input')
28+
29+
30+
def dense(x, n1, n2, name):
31+
"""
32+
Used to create a dense layer.
33+
:param x: input tensor to the dense layer
34+
:param n1: no. of input neurons
35+
:param n2: no. of output neurons
36+
:param name: name of the entire dense layer.i.e, variable scope name.
37+
:return: tensor with shape [batch_size, n2]
38+
"""
39+
with tf.variable_scope(name, reuse=None):
40+
weights = tf.get_variable("weights", shape=[n1, n2],
41+
initializer=tf.random_normal_initializer(mean=0., stddev=0.01))
42+
bias = tf.get_variable("bias", shape=[n2], initializer=tf.constant_initializer(0.0))
43+
out = tf.add(tf.matmul(x, weights), bias, name='matmul')
44+
return out
45+
46+
47+
# The Encoder of the network
48+
def encoder(x, reuse=False):
49+
"""
50+
Encode part of the autoencoder
51+
:param x: input to the autoencoder
52+
:param reuse: True -> Reuse the encoder variables, False -> Create or search of variables before creating
53+
:return: tensor which is the hidden latent variable of the autoencoder.
54+
"""
55+
if reuse:
56+
tf.get_variable_scope().reuse_variables()
57+
with tf.name_scope('Encoder'):
58+
e_dense_1 = tf.nn.relu(dense(x, input_dim, hidden_size1, 'e_dense_1'))
59+
e_dense_2 = tf.nn.relu(dense(e_dense_1, hidden_size1, hidden_size2, 'e_dense_2'))
60+
latent_variable = dense(e_dense_2, hidden_size2, z_dim, 'e_latent_variable')
61+
return latent_variable
62+
63+
64+
# The Decoder of the network
65+
def decoder(x, reuse=False):
66+
"""
67+
Decoder part of the autoencoder
68+
:param x: input to the decoder
69+
:param reuse: True -> Reuse the decoder variables, False -> Create or search of variables before creating
70+
:return: tensor which should ideally be the input given to the encoder.
71+
"""
72+
if reuse:
73+
tf.get_variable_scope().reuse_variables()
74+
with tf.name_scope('Decoder'):
75+
d_dense_1 = tf.nn.relu(dense(x, z_dim, hidden_size2, 'd_dense_1'))
76+
d_dense_2 = tf.nn.relu(dense(d_dense_1, hidden_size2, hidden_size1, 'd_dense_2'))
77+
output = tf.nn.sigmoid(dense(d_dense_2, hidden_size1, input_dim, 'd_output'))
78+
return output
79+
80+
81+
def train(train_model=True, train_data=None, test_data=None):
82+
"""
83+
Used to train the autoencoder by passing in the necessary inputs.
84+
:param train_model: True -> Train the model, False -> Load the latest trained model and show the reconstructed variables.
85+
"""
86+
with tf.variable_scope(tf.get_variable_scope()):
87+
encoder_output = encoder(x_input)
88+
decoder_output = decoder(encoder_output)
89+
90+
with tf.variable_scope(tf.get_variable_scope()):
91+
reconstructed_variables = decoder(decoder_input, reuse=True)
92+
93+
# Loss
94+
loss = tf.reduce_mean(tf.square(x_target - decoder_output))
95+
96+
# Optimizer
97+
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1).minimize(loss)
98+
init = tf.global_variables_initializer()
99+
100+
# Saving the model
101+
saver = tf.train.Saver()
102+
step = 0
103+
with tf.Session() as sess:
104+
sess.run(init)
105+
if train_model:
106+
107+
for i in range(n_epochs):
108+
train_data = shuffle(train_data)
109+
# break the train data df into chunks of size batch_size
110+
train_df = [train_data[x:x + batch_size] for x in range(0, train_data.shape[0], batch_size)]
111+
count = 0
112+
for batch in train_df:
113+
if batch.shape[0] == batch_size:
114+
count += 1
115+
sess.run(optimizer, feed_dict={x_input: batch, x_target: batch})
116+
117+
if count % 50 == 0:
118+
batch_loss = sess.run([loss], feed_dict={x_input: batch, x_target: batch})
119+
print("Loss: {}".format(batch_loss))
120+
print("Epoch: {}, iteration: {}".format(i, count))
121+
step += 1
122+
saver.save(sess, save_path=saved_model_path, global_step=step)
123+
print("Model Trained!")
124+
125+
print("Saved Model Path: {}".format(saved_model_path))
126+
else:
127+
all_results = os.listdir(results_path)
128+
all_results.sort()
129+
saver.restore(sess,
130+
save_path=tf.train.latest_checkpoint(results_path + '/' + all_results[-1] + '/Saved_models/'))

data_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def load_cms_data(filename="open_cms_data.root"):
4646

4747
print("\nDataframe:")
4848
print(dataframe.head())
49-
dataframe.to_csv('27D_opensCMS_data.csv')
49+
dataframe.to_csv('27D_openCMS_data.csv')
5050
return dataframe
5151

5252

data_preprocessing.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import pandas as pd
2-
import numpy as np
32
from sklearn.preprocessing import StandardScaler
43
from sklearn.model_selection import train_test_split
54
from sklearn.utils import shuffle
65

76

8-
def preprocess(filename='27D_opensCMS_data.csv'):
7+
def preprocess(filename='27D_openCMS_data.csv'):
98
data_df = pd.read_csv(filename)
109
data_df = data_df.drop(['entry', 'subentry'], axis=1)
1110
# data_df = data_df.astype('float32')
@@ -26,9 +25,8 @@ def preprocess(filename='27D_opensCMS_data.csv'):
2625
print('Test data shape: ')
2726
print(test_set.shape)
2827

29-
data_df.to_csv('27D_opensCMS_preprocessed_data.csv')
28+
data_df.to_csv('27D_openCMS_preprocessed_data.csv')
3029

3130
return train_set, test_set
3231

3332

34-
preprocess()

main.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from data_preprocessing import preprocess
2+
from autoencoders import standard_AE
3+
4+
5+
train_data, test_data = preprocess()
6+
7+
standard_AE.train(train_data=train_data, test_data=test_data)

plot_data.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44

55
def plot():
6-
data_df = pd.read_csv('27D_opensCMS_preprocessed_data.csv')
6+
data_df = pd.read_csv('27D_openCMS_preprocessed_data.csv')
77

88
variable_list = ['pt_', 'eta_', 'phi_', 'mass_',
99
'fX', 'fY', 'fZ', 'mJetArea',
@@ -33,12 +33,17 @@ def plot():
3333
prefix + 'mChargedMuEnergy', prefix + 'mNeutralEmEnergy', prefix + 'mChargedMultiplicity',
3434
prefix + 'mNeutralMultiplicity']
3535

36-
n_bins = 50
36+
n_bins = 100
3737

3838
for kk in range(0, 28):
39-
n_hist_data, bin_edges, _ = plt.hist(data_df[branches[kk]], color='orange', label='Input', alpha=1, bins=n_bins)
40-
plt.xlabel(xlabel=variable_list[kk])
41-
plt.ylabel('# of events')
39+
if branches[kk] == prefix + 'pt_' or branches[kk] == prefix + 'mass_':
40+
n_hist_data, bin_edges, _ = plt.hist(data_df[branches[kk]], color='orange', label='Input', alpha=1, bins=n_bins, log=True)
41+
plt.xlabel(xlabel=variable_list[kk])
42+
plt.ylabel('# of events')
43+
else:
44+
n_hist_data, bin_edges, _ = plt.hist(data_df[branches[kk]], color='orange', label='Input', alpha=1, bins=n_bins)
45+
plt.xlabel(xlabel=variable_list[kk])
46+
plt.ylabel('# of events')
4247
plt.show()
4348

4449
plot()

0 commit comments

Comments
 (0)