Skip to content

Commit b846bc1

Browse files
authored
Add files via upload
1 parent f14cb5b commit b846bc1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+83757
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Image Classification Project
2+
## Udacity Machine Learning Nanodegree
3+
### Introduction
4+
In this project, you'll classify images from the CIFAR-10 dataset. The dataset consists of airplanes, dogs, cats, and other objects. The dataset will need to be preprocessed, then train a convolutional neural network on all the samples. You'll normalize the images, one-hot encode the labels, build a convolutional layer, max pool layer, and fully connected layer. At then end, you'll see their predictions on the sample images.
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
import pickle
2+
import numpy as np
3+
import matplotlib.pyplot as plt
4+
from sklearn.preprocessing import LabelBinarizer
5+
6+
7+
def _load_label_names():
8+
"""
9+
Load the label names from file
10+
"""
11+
return ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
12+
13+
14+
def load_cfar10_batch(cifar10_dataset_folder_path, batch_id):
15+
"""
16+
Load a batch of the dataset
17+
"""
18+
with open(cifar10_dataset_folder_path + '/data_batch_' + str(batch_id), mode='rb') as file:
19+
batch = pickle.load(file, encoding='latin1')
20+
21+
features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
22+
labels = batch['labels']
23+
24+
return features, labels
25+
26+
27+
def display_stats(cifar10_dataset_folder_path, batch_id, sample_id):
28+
"""
29+
Display Stats of the the dataset
30+
"""
31+
batch_ids = list(range(1, 6))
32+
33+
if batch_id not in batch_ids:
34+
print('Batch Id out of Range. Possible Batch Ids: {}'.format(batch_ids))
35+
return None
36+
37+
features, labels = load_cfar10_batch(cifar10_dataset_folder_path, batch_id)
38+
39+
if not (0 <= sample_id < len(features)):
40+
print('{} samples in batch {}. {} is out of range.'.format(len(features), batch_id, sample_id))
41+
return None
42+
43+
print('\nStats of batch {}:'.format(batch_id))
44+
print('Samples: {}'.format(len(features)))
45+
print('Label Counts: {}'.format(dict(zip(*np.unique(labels, return_counts=True)))))
46+
print('First 20 Labels: {}'.format(labels[:20]))
47+
48+
sample_image = features[sample_id]
49+
sample_label = labels[sample_id]
50+
label_names = _load_label_names()
51+
52+
print('\nExample of Image {}:'.format(sample_id))
53+
print('Image - Min Value: {} Max Value: {}'.format(sample_image.min(), sample_image.max()))
54+
print('Image - Shape: {}'.format(sample_image.shape))
55+
print('Label - Label Id: {} Name: {}'.format(sample_label, label_names[sample_label]))
56+
plt.axis('off')
57+
plt.imshow(sample_image)
58+
59+
60+
def _preprocess_and_save(normalize, one_hot_encode, features, labels, filename):
61+
"""
62+
Preprocess data and save it to file
63+
"""
64+
features = normalize(features)
65+
labels = one_hot_encode(labels)
66+
67+
pickle.dump((features, labels), open(filename, 'wb'))
68+
69+
70+
def preprocess_and_save_data(cifar10_dataset_folder_path, normalize, one_hot_encode):
71+
"""
72+
Preprocess Training and Validation Data
73+
"""
74+
n_batches = 5
75+
valid_features = []
76+
valid_labels = []
77+
78+
for batch_i in range(1, n_batches + 1):
79+
features, labels = load_cfar10_batch(cifar10_dataset_folder_path, batch_i)
80+
validation_count = int(len(features) * 0.1)
81+
82+
# Prprocess and save a batch of training data
83+
_preprocess_and_save(
84+
normalize,
85+
one_hot_encode,
86+
features[:-validation_count],
87+
labels[:-validation_count],
88+
'preprocess_batch_' + str(batch_i) + '.p')
89+
90+
# Use a portion of training batch for validation
91+
valid_features.extend(features[-validation_count:])
92+
valid_labels.extend(labels[-validation_count:])
93+
94+
# Preprocess and Save all validation data
95+
_preprocess_and_save(
96+
normalize,
97+
one_hot_encode,
98+
np.array(valid_features),
99+
np.array(valid_labels),
100+
'preprocess_validation.p')
101+
102+
with open(cifar10_dataset_folder_path + '/test_batch', mode='rb') as file:
103+
batch = pickle.load(file, encoding='latin1')
104+
105+
# load the training data
106+
test_features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
107+
test_labels = batch['labels']
108+
109+
# Preprocess and Save all training data
110+
_preprocess_and_save(
111+
normalize,
112+
one_hot_encode,
113+
np.array(test_features),
114+
np.array(test_labels),
115+
'preprocess_training.p')
116+
117+
118+
def batch_features_labels(features, labels, batch_size):
119+
"""
120+
Split features and labels into batches
121+
"""
122+
for start in range(0, len(features), batch_size):
123+
end = min(start + batch_size, len(features))
124+
yield features[start:end], labels[start:end]
125+
126+
127+
def load_preprocess_training_batch(batch_id, batch_size):
128+
"""
129+
Load the Preprocessed Training data and return them in batches of <batch_size> or less
130+
"""
131+
filename = 'preprocess_batch_' + str(batch_id) + '.p'
132+
features, labels = pickle.load(open(filename, mode='rb'))
133+
134+
# Return the training data in batches of size <batch_size> or less
135+
return batch_features_labels(features, labels, batch_size)
136+
137+
138+
def display_image_predictions(features, labels, predictions):
139+
n_classes = 10
140+
label_names = _load_label_names()
141+
label_binarizer = LabelBinarizer()
142+
label_binarizer.fit(range(n_classes))
143+
label_ids = label_binarizer.inverse_transform(np.array(labels))
144+
145+
fig, axies = plt.subplots(nrows=4, ncols=2)
146+
fig.tight_layout()
147+
fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)
148+
149+
n_predictions = 3
150+
margin = 0.05
151+
ind = np.arange(n_predictions)
152+
width = (1. - 2. * margin) / n_predictions
153+
154+
for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(zip(features, label_ids, predictions.indices, predictions.values)):
155+
pred_names = [label_names[pred_i] for pred_i in pred_indicies]
156+
correct_name = label_names[label_id]
157+
158+
axies[image_i][0].imshow(feature)
159+
axies[image_i][0].set_title(correct_name)
160+
axies[image_i][0].set_axis_off()
161+
162+
axies[image_i][1].barh(ind + margin, pred_values[::-1], width)
163+
axies[image_i][1].set_yticks(ind + margin)
164+
axies[image_i][1].set_yticklabels(pred_names[::-1])
165+
axies[image_i][1].set_xticks([0, 0.5, 1.0])

0 commit comments

Comments
 (0)