akarshsingh9
diff --git a/‎image-classification master/README.md‎
Lines changed: 4 additions & 0 deletions b/‎image-classification master/README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎image-classification master/helper.py‎
Lines changed: 165 additions & 0 deletions b/‎image-classification master/helper.py‎
Lines changed: 165 additions & 0 deletions
@@ -0,0 +1,4 @@
+# Image Classification Project
+## Udacity Machine Learning Nanodegree
+### Introduction
+In this project, you'll classify images from the CIFAR-10 dataset. The dataset consists of airplanes, dogs, cats, and other objects. The dataset will need to be preprocessed, then train a convolutional neural network on all the samples. You'll normalize the images, one-hot encode the labels, build a convolutional layer, max pool layer, and fully connected layer. At then end, you'll see their predictions on the sample images.
@@ -0,0 +1,165 @@
+import pickle
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import LabelBinarizer
+
+
+def _load_label_names():
+    """
+    Load the label names from file
+    """
+    return ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
+
+
+def load_cfar10_batch(cifar10_dataset_folder_path, batch_id):
+    """
+    Load a batch of the dataset
+    """
+    with open(cifar10_dataset_folder_path + '/data_batch_' + str(batch_id), mode='rb') as file:
+        batch = pickle.load(file, encoding='latin1')
+
+    features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
+    labels = batch['labels']
+
+    return features, labels
+
+
+def display_stats(cifar10_dataset_folder_path, batch_id, sample_id):
+    """
+    Display Stats of the the dataset
+    """
+    batch_ids = list(range(1, 6))
+
+    if batch_id not in batch_ids:
+        print('Batch Id out of Range. Possible Batch Ids: {}'.format(batch_ids))
+        return None
+
+    features, labels = load_cfar10_batch(cifar10_dataset_folder_path, batch_id)
+
+    if not (0 <= sample_id < len(features)):
+        print('{} samples in batch {}.  {} is out of range.'.format(len(features), batch_id, sample_id))
+        return None
+
+    print('\nStats of batch {}:'.format(batch_id))
+    print('Samples: {}'.format(len(features)))
+    print('Label Counts: {}'.format(dict(zip(*np.unique(labels, return_counts=True)))))
+    print('First 20 Labels: {}'.format(labels[:20]))
+
+    sample_image = features[sample_id]
+    sample_label = labels[sample_id]
+    label_names = _load_label_names()
+
+    print('\nExample of Image {}:'.format(sample_id))
+    print('Image - Min Value: {} Max Value: {}'.format(sample_image.min(), sample_image.max()))
+    print('Image - Shape: {}'.format(sample_image.shape))
+    print('Label - Label Id: {} Name: {}'.format(sample_label, label_names[sample_label]))
+    plt.axis('off')
+    plt.imshow(sample_image)
+
+
+def _preprocess_and_save(normalize, one_hot_encode, features, labels, filename):
+    """
+    Preprocess data and save it to file
+    """
+    features = normalize(features)
+    labels = one_hot_encode(labels)
+
+    pickle.dump((features, labels), open(filename, 'wb'))
+
+
+def preprocess_and_save_data(cifar10_dataset_folder_path, normalize, one_hot_encode):
+    """
+    Preprocess Training and Validation Data
+    """
+    n_batches = 5
+    valid_features = []
+    valid_labels = []
+
+    for batch_i in range(1, n_batches + 1):
+        features, labels = load_cfar10_batch(cifar10_dataset_folder_path, batch_i)
+        validation_count = int(len(features) * 0.1)
+
+        # Prprocess and save a batch of training data
+        _preprocess_and_save(
+            normalize,
+            one_hot_encode,
+            features[:-validation_count],
+            labels[:-validation_count],
+            'preprocess_batch_' + str(batch_i) + '.p')
+
+        # Use a portion of training batch for validation
+        valid_features.extend(features[-validation_count:])
+        valid_labels.extend(labels[-validation_count:])
+
+    # Preprocess and Save all validation data
+    _preprocess_and_save(
+        normalize,
+        one_hot_encode,
+        np.array(valid_features),
+        np.array(valid_labels),
+        'preprocess_validation.p')
+
+    with open(cifar10_dataset_folder_path + '/test_batch', mode='rb') as file:
+        batch = pickle.load(file, encoding='latin1')
+
+    # load the training data
+    test_features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
+    test_labels = batch['labels']
+
+    # Preprocess and Save all training data
+    _preprocess_and_save(
+        normalize,
+        one_hot_encode,
+        np.array(test_features),
+        np.array(test_labels),
+        'preprocess_training.p')
+
+
+def batch_features_labels(features, labels, batch_size):
+    """
+    Split features and labels into batches
+    """
+    for start in range(0, len(features), batch_size):
+        end = min(start + batch_size, len(features))
+        yield features[start:end], labels[start:end]
+
+
+def load_preprocess_training_batch(batch_id, batch_size):
+    """
+    Load the Preprocessed Training data and return them in batches of <batch_size> or less
+    """
+    filename = 'preprocess_batch_' + str(batch_id) + '.p'
+    features, labels = pickle.load(open(filename, mode='rb'))
+
+    # Return the training data in batches of size <batch_size> or less
+    return batch_features_labels(features, labels, batch_size)
+
+
+def display_image_predictions(features, labels, predictions):
+    n_classes = 10
+    label_names = _load_label_names()
+    label_binarizer = LabelBinarizer()
+    label_binarizer.fit(range(n_classes))
+    label_ids = label_binarizer.inverse_transform(np.array(labels))
+
+    fig, axies = plt.subplots(nrows=4, ncols=2)
+    fig.tight_layout()
+    fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)
+
+    n_predictions = 3
+    margin = 0.05
+    ind = np.arange(n_predictions)
+    width = (1. - 2. * margin) / n_predictions
+
+    for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(zip(features, label_ids, predictions.indices, predictions.values)):
+        pred_names = [label_names[pred_i] for pred_i in pred_indicies]
+        correct_name = label_names[label_id]
+
+        axies[image_i][0].imshow(feature)
+        axies[image_i][0].set_title(correct_name)
+        axies[image_i][0].set_axis_off()
+
+        axies[image_i][1].barh(ind + margin, pred_values[::-1], width)
+        axies[image_i][1].set_yticks(ind + margin)
+        axies[image_i][1].set_yticklabels(pred_names[::-1])
+        axies[image_i][1].set_xticks([0, 0.5, 1.0])