Skip to content

Commit 51739b1

Browse files
author
Lorenz Wickert
committed
Fixed some minor problems stopping tutorial
1 parent ecff3c2 commit 51739b1

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ To implement a new dataset, one has to implement 3 classes:
4444
* **Preprocessing:** Datasets often come compressed, split up over many files or in who knows what structure. Therefore, for each dataset we need a Preprocessing class that transforms the datasets into a `StreamedResource`.
4545
* **Iterator:** Provides the iteration implementation on top of the binary stream `StreamedResource`
4646

47-
DataStack provides a [examplary MNIST implementation](https://github.com/le1nux/datastack/blob/master/src/data_stack/mnist_example/factory.py).
47+
DataStack provides a [examplary MNIST implementation](https://github.com/le1nux/datastack/blob/master/src/data_stack/mnist/factory.py).
4848

4949
## Copyright
5050

src/data_stack/mnist/factory.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,23 @@ def __init__(self, storage_connector: StorageConnector):
2121
self.resource_definitions = {
2222
"train": [
2323
ResourceDefinition(identifier=os.path.join(self.raw_path, "samples_train.gz"),
24-
source='http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
24+
# source='http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
25+
source='http://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz',
2526
md5_sum="f68b3c2dcbeaaa9fbdd348bbdeb94873"),
2627
ResourceDefinition(identifier=os.path.join(self.raw_path, "labels_train.gz"),
27-
source='http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
28+
# source='http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
29+
source='http://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz',
2830
md5_sum="d53e105ee54ea40749a09fcbcd1e9432")
2931

3032
],
3133
"test": [
3234
ResourceDefinition(identifier=os.path.join(self.raw_path, "samples_test.gz"),
33-
source='http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
35+
# source='http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
36+
source='https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz',
3437
md5_sum="9fb629c4189551a2d022fa330f9573f3"),
3538
ResourceDefinition(identifier=os.path.join(self.raw_path, "targets.gz"),
36-
source='http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz',
39+
# source='http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz',
40+
source='https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz',
3741
md5_sum="ec29112dd5afa0611ce80d1b7f02629c")
3842
]
3943
}
@@ -94,6 +98,7 @@ def get_dataset_iterator(self, config: Dict[str, Any] = None) -> Tuple[DatasetIt
9498

9599
mnist_factory = MNISTFactory(storage_connector)
96100
mnist_iterator, _ = mnist_factory.get_dataset_iterator(config={"split": "train"})
97-
img, target = mnist_iterator[0]
101+
img, target, _ = mnist_iterator[0]
102+
print("Target = ", target)
98103
plt.imshow(img)
99104
plt.show()

0 commit comments

Comments
 (0)