Skip to content

Commit be322c1

Browse files
author
Vineet John
committed
Added fully connected network processor and added stats
1 parent 43b8202 commit be322c1

File tree

3 files changed

+413
-3
lines changed

3 files changed

+413
-3
lines changed

assignments/a5/a5.tex

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,23 +35,27 @@ \section{Tensorflow - MNIST} % (fold)
3535
\subsection{Softmax vs. CNN vs. Fully Connected} % (fold)
3636
\label{sub:softmax_vs_cnn_vs_fully_connected}
3737

38-
CNN was run for 1000 iterations.
39-
38+
The result for the vanilla experiments are given in Table \ref{tab:accuracy_comparisons_i}.
4039
\begin{table}[th]
4140
\centering
4241
\begin{tabular}{| l | r |}
4342
\hline
4443
\textbf{Neural Net Type} & \textbf{Accuracy} \\
4544
\hline
4645
\hline
47-
Softmax Regression & 0.9205 \\
46+
Softmax Regression Network & 0.9205 \\
4847
\hline
4948
Convolutional Neural Network & 0.9664 \\
49+
\hline
50+
Fully-connected Feed-Forward Neural Network & 0.8985 \\
5051
\hline
5152
\end{tabular}
5253
\caption{Accuracy Comparisons - I}
5354
\label{tab:accuracy_comparisons_i}
5455
\end{table}
56+
57+
\textbf{Discussion:}
58+
The reason than the CNN performs the best is because it considers patches of the image rather than the intensities of sequential pixels. Since the CNN preserves this 2-D information structure, it performs the best of the lot.
5559

5660
% subsection softmax_vs_cnn_vs_fully_connected (end)
5761

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
# pylint: disable=missing-docstring
2+
import argparse
3+
import os.path
4+
import sys
5+
import time
6+
7+
from six.moves import xrange # pylint: disable=redefined-builtin
8+
import tensorflow as tf
9+
10+
from tensorflow.examples.tutorials.mnist import input_data
11+
from tensorflow.examples.tutorials.mnist import mnist
12+
13+
# Basic model parameters as external flags.
14+
FLAGS = None
15+
16+
17+
def placeholder_inputs(batch_size):
18+
"""Generate placeholder variables to represent the input tensors.
19+
20+
These placeholders are used as inputs by the rest of the model building
21+
code and will be fed from the downloaded data in the .run() loop, below.
22+
23+
Args:
24+
batch_size: The batch size will be baked into both placeholders.
25+
26+
Returns:
27+
images_placeholder: Images placeholder.
28+
labels_placeholder: Labels placeholder.
29+
"""
30+
# Note that the shapes of the placeholders match the shapes of the full
31+
# image and label tensors, except the first dimension is now batch_size
32+
# rather than the full size of the train or test data sets.
33+
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
34+
mnist.IMAGE_PIXELS))
35+
labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))
36+
return images_placeholder, labels_placeholder
37+
38+
39+
def fill_feed_dict(data_set, images_pl, labels_pl):
40+
"""Fills the feed_dict for training the given step.
41+
42+
A feed_dict takes the form of:
43+
feed_dict = {
44+
<placeholder>: <tensor of values to be passed for placeholder>,
45+
....
46+
}
47+
48+
Args:
49+
data_set: The set of images and labels, from input_data.read_data_sets()
50+
images_pl: The images placeholder, from placeholder_inputs().
51+
labels_pl: The labels placeholder, from placeholder_inputs().
52+
53+
Returns:
54+
feed_dict: The feed dictionary mapping from placeholders to values.
55+
"""
56+
# Create the feed_dict for the placeholders filled with the next
57+
# `batch size` examples.
58+
images_feed, labels_feed = data_set.next_batch(FLAGS.batch_size,
59+
FLAGS.fake_data)
60+
feed_dict = {
61+
images_pl: images_feed,
62+
labels_pl: labels_feed,
63+
}
64+
return feed_dict
65+
66+
67+
def do_eval(sess,
68+
eval_correct,
69+
images_placeholder,
70+
labels_placeholder,
71+
data_set):
72+
"""Runs one evaluation against the full epoch of data.
73+
74+
Args:
75+
sess: The session in which the model has been trained.
76+
eval_correct: The Tensor that returns the number of correct predictions.
77+
images_placeholder: The images placeholder.
78+
labels_placeholder: The labels placeholder.
79+
data_set: The set of images and labels to evaluate, from
80+
input_data.read_data_sets().
81+
"""
82+
# And run one epoch of eval.
83+
true_count = 0 # Counts the number of correct predictions.
84+
steps_per_epoch = data_set.num_examples // FLAGS.batch_size
85+
num_examples = steps_per_epoch * FLAGS.batch_size
86+
for step in xrange(steps_per_epoch):
87+
feed_dict = fill_feed_dict(data_set,
88+
images_placeholder,
89+
labels_placeholder)
90+
true_count += sess.run(eval_correct, feed_dict=feed_dict)
91+
precision = float(true_count) / num_examples
92+
print(' Num examples: %d Num correct: %d Precision @ 1: %0.04f' %
93+
(num_examples, true_count, precision))
94+
95+
96+
def run_training():
97+
"""Train MNIST for a number of steps."""
98+
# Get the sets of images and labels for training, validation, and
99+
# test on MNIST.
100+
data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
101+
102+
# Tell TensorFlow that the model will be built into the default Graph.
103+
with tf.Graph().as_default():
104+
# Generate placeholders for the images and labels.
105+
images_placeholder, labels_placeholder = placeholder_inputs(
106+
FLAGS.batch_size)
107+
108+
# Build a Graph that computes predictions from the inference model.
109+
logits = mnist.inference(images_placeholder,
110+
FLAGS.hidden1,
111+
FLAGS.hidden2)
112+
113+
# Add to the Graph the Ops for loss calculation.
114+
loss = mnist.loss(logits, labels_placeholder)
115+
116+
# Add to the Graph the Ops that calculate and apply gradients.
117+
train_op = mnist.training(loss, FLAGS.learning_rate)
118+
119+
# Add the Op to compare the logits to the labels during evaluation.
120+
eval_correct = mnist.evaluation(logits, labels_placeholder)
121+
122+
# Build the summary Tensor based on the TF collection of Summaries.
123+
summary = tf.summary.merge_all()
124+
125+
# Add the variable initializer Op.
126+
init = tf.global_variables_initializer()
127+
128+
# Create a saver for writing training checkpoints.
129+
saver = tf.train.Saver()
130+
131+
# Create a session for running Ops on the Graph.
132+
sess = tf.Session()
133+
134+
# Instantiate a SummaryWriter to output summaries and the Graph.
135+
summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
136+
137+
# And then after everything is built:
138+
139+
# Run the Op to initialize the variables.
140+
sess.run(init)
141+
142+
# Start the training loop.
143+
for step in xrange(FLAGS.max_steps):
144+
start_time = time.time()
145+
146+
# Fill a feed dictionary with the actual set of images and labels
147+
# for this particular training step.
148+
feed_dict = fill_feed_dict(data_sets.train,
149+
images_placeholder,
150+
labels_placeholder)
151+
152+
# Run one step of the model. The return values are the activations
153+
# from the `train_op` (which is discarded) and the `loss` Op. To
154+
# inspect the values of your Ops or variables, you may include them
155+
# in the list passed to sess.run() and the value tensors will be
156+
# returned in the tuple from the call.
157+
_, loss_value = sess.run([train_op, loss],
158+
feed_dict=feed_dict)
159+
160+
duration = time.time() - start_time
161+
162+
# Write the summaries and print an overview fairly often.
163+
if step % 100 == 0:
164+
# Print status to stdout.
165+
print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
166+
# Update the events file.
167+
summary_str = sess.run(summary, feed_dict=feed_dict)
168+
summary_writer.add_summary(summary_str, step)
169+
summary_writer.flush()
170+
171+
# Save a checkpoint and evaluate the model periodically.
172+
if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
173+
checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
174+
saver.save(sess, checkpoint_file, global_step=step)
175+
# Evaluate against the training set.
176+
print('Training Data Eval:')
177+
do_eval(sess,
178+
eval_correct,
179+
images_placeholder,
180+
labels_placeholder,
181+
data_sets.train)
182+
# Evaluate against the validation set.
183+
print('Validation Data Eval:')
184+
do_eval(sess,
185+
eval_correct,
186+
images_placeholder,
187+
labels_placeholder,
188+
data_sets.validation)
189+
# Evaluate against the test set.
190+
print('Test Data Eval:')
191+
do_eval(sess,
192+
eval_correct,
193+
images_placeholder,
194+
labels_placeholder,
195+
data_sets.test)
196+
197+
198+
def main(_):
199+
if tf.gfile.Exists(FLAGS.log_dir):
200+
tf.gfile.DeleteRecursively(FLAGS.log_dir)
201+
tf.gfile.MakeDirs(FLAGS.log_dir)
202+
run_training()
203+
204+
205+
if __name__ == '__main__':
206+
parser = argparse.ArgumentParser()
207+
parser.add_argument(
208+
'--learning_rate',
209+
type=float,
210+
default=0.01,
211+
help='Initial learning rate.'
212+
)
213+
parser.add_argument(
214+
'--max_steps',
215+
type=int,
216+
default=2000,
217+
help='Number of steps to run trainer.'
218+
)
219+
parser.add_argument(
220+
'--hidden1',
221+
type=int,
222+
default=128,
223+
help='Number of units in hidden layer 1.'
224+
)
225+
parser.add_argument(
226+
'--hidden2',
227+
type=int,
228+
default=32,
229+
help='Number of units in hidden layer 2.'
230+
)
231+
parser.add_argument(
232+
'--batch_size',
233+
type=int,
234+
default=100,
235+
help='Batch size. Must divide evenly into the dataset sizes.'
236+
)
237+
parser.add_argument(
238+
'--input_data_dir',
239+
type=str,
240+
default='/tmp/tensorflow/mnist/input_data',
241+
help='Directory to put the input data.'
242+
)
243+
parser.add_argument(
244+
'--log_dir',
245+
type=str,
246+
default='/tmp/tensorflow/mnist/logs/fully_connected_feed',
247+
help='Directory to put the log data.'
248+
)
249+
parser.add_argument(
250+
'--fake_data',
251+
default=False,
252+
help='If true, uses fake data for unit testing.',
253+
action='store_true'
254+
)
255+
256+
FLAGS, unparsed = parser.parse_known_args()
257+
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

0 commit comments

Comments
 (0)