|
1 |
| -""" |
2 |
| -mnist_loader |
3 |
| -~~~~~~~~~~~~ |
4 |
| -
|
5 |
| -A library to load the MNIST image data. For details of the data |
6 |
| -structures that are returned, see the doc strings for ``load_data`` |
7 |
| -and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the |
8 |
| -function usually called by our neural network code. |
9 |
| -""" |
10 |
| - |
11 |
| -#### Libraries |
12 |
| -# Standard library |
13 |
| -import cPickle |
14 |
| -import gzip |
15 |
| - |
16 |
| -# Third-party libraries |
17 |
| -import numpy as np |
18 |
| - |
19 |
| -def load_data(): |
20 |
| - """Return the MNIST data as a tuple containing the training data, |
21 |
| - the validation data, and the test data. |
22 |
| -
|
23 |
| - The ``training_data`` is returned as a tuple with two entries. |
24 |
| - The first entry contains the actual training images. This is a |
25 |
| - numpy ndarray with 50,000 entries. Each entry is, in turn, a |
26 |
| - numpy ndarray with 784 values, representing the 28 * 28 = 784 |
27 |
| - pixels in a single MNIST image. |
28 |
| -
|
29 |
| - The second entry in the ``training_data`` tuple is a numpy ndarray |
30 |
| - containing 50,000 entries. Those entries are just the digit |
31 |
| - values (0...9) for the corresponding images contained in the first |
32 |
| - entry of the tuple. |
33 |
| -
|
34 |
| - The ``validation_data`` and ``test_data`` are similar, except |
35 |
| - each contains only 10,000 images. |
36 |
| -
|
37 |
| - This is a nice data format, but for use in neural networks it's |
38 |
| - helpful to modify the format of the ``training_data`` a little. |
39 |
| - That's done in the wrapper function ``load_data_wrapper()``, see |
40 |
| - below. |
41 |
| - """ |
42 |
| - f = gzip.open('../data/mnist.pkl.gz', 'rb') |
43 |
| - training_data, validation_data, test_data = cPickle.load(f) |
44 |
| - f.close() |
45 |
| - return (training_data, validation_data, test_data) |
46 |
| - |
47 |
| -def load_data_wrapper(): |
48 |
| - """Return a tuple containing ``(training_data, validation_data, |
49 |
| - test_data)``. Based on ``load_data``, but the format is more |
50 |
| - convenient for use in our implementation of neural networks. |
51 |
| -
|
52 |
| - In particular, ``training_data`` is a list containing 50,000 |
53 |
| - 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray |
54 |
| - containing the input image. ``y`` is a 10-dimensional |
55 |
| - numpy.ndarray representing the unit vector corresponding to the |
56 |
| - correct digit for ``x``. |
57 |
| -
|
58 |
| - ``validation_data`` and ``test_data`` are lists containing 10,000 |
59 |
| - 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional |
60 |
| - numpy.ndarry containing the input image, and ``y`` is the |
61 |
| - corresponding classification, i.e., the digit values (integers) |
62 |
| - corresponding to ``x``. |
63 |
| -
|
64 |
| - Obviously, this means we're using slightly different formats for |
65 |
| - the training data and the validation / test data. These formats |
66 |
| - turn out to be the most convenient for use in our neural network |
67 |
| - code.""" |
68 |
| - tr_d, va_d, te_d = load_data() |
69 |
| - training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]] |
70 |
| - training_results = [vectorized_result(y) for y in tr_d[1]] |
71 |
| - training_data = zip(training_inputs, training_results) |
72 |
| - validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]] |
73 |
| - validation_data = zip(validation_inputs, va_d[1]) |
74 |
| - test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]] |
75 |
| - test_data = zip(test_inputs, te_d[1]) |
76 |
| - return (training_data, validation_data, test_data) |
77 |
| - |
78 |
| -def vectorized_result(j): |
79 |
| - """Return a 10-dimensional unit vector with a 1.0 in the jth |
80 |
| - position and zeroes elsewhere. This is used to convert a digit |
81 |
| - (0...9) into a corresponding desired output from the neural |
82 |
| - network.""" |
83 |
| - e = np.zeros((10, 1)) |
84 |
| - e[j] = 1.0 |
85 |
| - return e |
| 1 | +""" |
| 2 | +mnist_loader |
| 3 | +~~~~~~~~~~~~ |
| 4 | +
|
| 5 | +A library to load the MNIST image data. For details of the data |
| 6 | +structures that are returned, see the doc strings for ``load_data`` |
| 7 | +and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the |
| 8 | +function usually called by our neural network code. |
| 9 | +""" |
| 10 | + |
| 11 | +#### Libraries |
| 12 | +# Standard library |
| 13 | +import cPickle |
| 14 | +import gzip |
| 15 | + |
| 16 | +# Third-party libraries |
| 17 | +import numpy as np |
| 18 | + |
| 19 | +def load_data(): |
| 20 | + """Return the MNIST data as a tuple containing the training data, |
| 21 | + the validation data, and the test data. |
| 22 | +
|
| 23 | + The ``training_data`` is returned as a tuple with two entries. |
| 24 | + The first entry contains the actual training images. This is a |
| 25 | + numpy ndarray with 50,000 entries. Each entry is, in turn, a |
| 26 | + numpy ndarray with 784 values, representing the 28 * 28 = 784 |
| 27 | + pixels in a single MNIST image. |
| 28 | +
|
| 29 | + The second entry in the ``training_data`` tuple is a numpy ndarray |
| 30 | + containing 50,000 entries. Those entries are just the digit |
| 31 | + values (0...9) for the corresponding images contained in the first |
| 32 | + entry of the tuple. |
| 33 | +
|
| 34 | + The ``validation_data`` and ``test_data`` are similar, except |
| 35 | + each contains only 10,000 images. |
| 36 | +
|
| 37 | + This is a nice data format, but for use in neural networks it's |
| 38 | + helpful to modify the format of the ``training_data`` a little. |
| 39 | + That's done in the wrapper function ``load_data_wrapper()``, see |
| 40 | + below. |
| 41 | + """ |
| 42 | + f = gzip.open('../data/mnist.pkl.gz', 'rb') |
| 43 | + training_data, validation_data, test_data = cPickle.load(f) |
| 44 | + f.close() |
| 45 | + return (training_data, validation_data, test_data) |
| 46 | + |
| 47 | +def load_data_wrapper(): |
| 48 | + """Return a tuple containing ``(training_data, validation_data, |
| 49 | + test_data)``. Based on ``load_data``, but the format is more |
| 50 | + convenient for use in our implementation of neural networks. |
| 51 | +
|
| 52 | + In particular, ``training_data`` is a list containing 50,000 |
| 53 | + 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray |
| 54 | + containing the input image. ``y`` is a 10-dimensional |
| 55 | + numpy.ndarray representing the unit vector corresponding to the |
| 56 | + correct digit for ``x``. |
| 57 | +
|
| 58 | + ``validation_data`` and ``test_data`` are lists containing 10,000 |
| 59 | + 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional |
| 60 | + numpy.ndarry containing the input image, and ``y`` is the |
| 61 | + corresponding classification, i.e., the digit values (integers) |
| 62 | + corresponding to ``x``. |
| 63 | +
|
| 64 | + Obviously, this means we're using slightly different formats for |
| 65 | + the training data and the validation / test data. These formats |
| 66 | + turn out to be the most convenient for use in our neural network |
| 67 | + code.""" |
| 68 | + tr_d, va_d, te_d = load_data() |
| 69 | + training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]] |
| 70 | + training_results = [vectorized_result(y) for y in tr_d[1]] |
| 71 | + training_data = zip(training_inputs, training_results) |
| 72 | + validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]] |
| 73 | + validation_data = zip(validation_inputs, va_d[1]) |
| 74 | + test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]] |
| 75 | + test_data = zip(test_inputs, te_d[1]) |
| 76 | + return (training_data, validation_data, test_data) |
| 77 | + |
| 78 | +def vectorized_result(j): |
| 79 | + """Return a 10-dimensional unit vector with a 1.0 in the jth |
| 80 | + position and zeroes elsewhere. This is used to convert a digit |
| 81 | + (0...9) into a corresponding desired output from the neural |
| 82 | + network.""" |
| 83 | + e = np.zeros((10, 1)) |
| 84 | + e[j] = 1.0 |
| 85 | + return e |
0 commit comments