From a9a6b829140e5a6d47de712227855f29e0b8e841 Mon Sep 17 00:00:00 2001
From: us
Date: Sat, 9 Mar 2019 05:52:48 +0300
Subject: [PATCH 1/4] Added EMNIST Dataset
---
tensorflow_datasets/image/__init__.py | 1 +
tensorflow_datasets/image/mnist.py | 159 ++++++++++++++++++++++++--
2 files changed, 152 insertions(+), 8 deletions(-)
diff --git a/tensorflow_datasets/image/__init__.py b/tensorflow_datasets/image/__init__.py
index 5e27d3ebfa9..39c246d2038 100644
--- a/tensorflow_datasets/image/__init__.py
+++ b/tensorflow_datasets/image/__init__.py
@@ -33,6 +33,7 @@
from tensorflow_datasets.image.mnist import FashionMNIST
from tensorflow_datasets.image.mnist import MNIST
from tensorflow_datasets.image.mnist import KMNIST
+from tensorflow_datasets.image.mnist import EMNIST
from tensorflow_datasets.image.omniglot import Omniglot
from tensorflow_datasets.image.open_images import OpenImagesV4
from tensorflow_datasets.image.quickdraw import QuickdrawBitmap
diff --git a/tensorflow_datasets/image/mnist.py b/tensorflow_datasets/image/mnist.py
index 67818bfc379..01377f2a552 100644
--- a/tensorflow_datasets/image/mnist.py
+++ b/tensorflow_datasets/image/mnist.py
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-"""MNIST and Fashion MNIST."""
+"""MNIST, Fashion MNIST, KMNIST and EMNIST."""
from __future__ import absolute_import
from __future__ import division
@@ -23,6 +23,7 @@
import six.moves.urllib as urllib
import tensorflow as tf
+from tensorflow_datasets.core import api_utils
import tensorflow_datasets.public_api as tfds
# MNIST constants
@@ -68,7 +69,7 @@
"""
-_K_MNIST_CITATION ="""
+_K_MNIST_CITATION = """\
@online{clanuwat2018deep,
author = {Tarin Clanuwat and Mikel Bober-Irizar and Asanobu Kitamoto and Alex Lamb and Kazuaki Yamamoto and David Ha},
title = {Deep Learning for Classical Japanese Literature},
@@ -77,7 +78,17 @@
eprintclass = {cs.CV},
eprinttype = {arXiv},
eprint = {cs.CV/1812.01718},
- }
+}
+"""
+
+_EMNIST_CITATION = """\
+@article{cohen_afshar_tapson_schaik_2017,
+ title={EMNIST: Extending MNIST to handwritten letters},
+ DOI={10.1109/ijcnn.2017.7966217},
+ journal={2017 International Joint Conference on Neural Networks (IJCNN)},
+ author={Cohen, Gregory and Afshar, Saeed and Tapson, Jonathan and Schaik, Andre Van},
+ year={2017}
+}
"""
class MNIST(tfds.core.GeneratorBasedBuilder):
@@ -207,6 +218,143 @@ def _info(self):
citation=_K_MNIST_CITATION,
)
+class EMNISTConfig(tfds.core.BuilderConfig):
+ """BuilderConfig for EMNIST CONFIG."""
+
+ @api_utils.disallow_positional_args
+ def __init__(self, class_number, train_examples, test_examples, **kwargs):
+ """BuilderConfig for EMNIST class number.
+
+ Args:
+ class_number: There are six different splits provided in this dataset. And have
+ different class numbers.
+
+ train_examples, test_examples: So in these have different test and train character
+ numbers.
+
+ **kwargs: keyword arguments forwarded to super.
+ """
+ super(EMNISTConfig, self).__init__(**kwargs)
+ self.class_number = class_number
+ self.train_examples = train_examples
+ self.test_examples = test_examples
+
+
+class EMNIST(MNIST):
+
+ VERSION = tfds.core.Version('1.0.0')
+
+ BUILDER_CONFIGS = [
+ EMNISTConfig(
+ name="byclass",
+ class_number=62,
+ train_examples=697932,
+ test_examples=116323,
+ description="EMNIST ByClass: 814,255 characters. 62 unbalanced classes.",
+ version="0.1.1",
+ ),
+ EMNISTConfig(
+ name="bymerge",
+ class_number=47,
+ train_examples=697932,
+ test_examples=116323,
+ description="EMNIST ByMerge: 814,255 characters. 47 unbalanced classes.",
+ version="0.1.1",
+ ),
+ EMNISTConfig(
+ name="balanced",
+ class_number=47,
+ train_examples=112800,
+ test_examples=18800,
+ description="EMNIST Balanced: 131,600 characters. 47 balanced classes.",
+ version="0.1.1",
+ ),
+ EMNISTConfig(
+ name="letters",
+ class_number=37,
+ train_examples=88800,
+ test_examples=14800,
+ description="EMNIST Letters: 103,600 characters. 26 balanced classes.",
+ version="0.1.1",
+ ),
+ EMNISTConfig(
+ name="digits",
+ class_number=10,
+ train_examples=240000,
+ test_examples=40000,
+ description="EMNIST Digits: 280,000 characters. 10 balanced classes.",
+ version="0.1.1",
+ ),
+ EMNISTConfig(
+ name="mnist",
+ class_number=10,
+ train_examples=60000,
+ test_examples=10000,
+ description="EMNIST MNIST: 70,000 characters. 10 balanced classes.",
+ version="0.1.1",
+ ),
+ EMNISTConfig(
+ name="test",
+ class_number=62,
+ train_examples=10,
+ test_examples=2,
+ description="EMNIST test data config.",
+ version="0.1.1",
+ ),
+ ]
+
+ def _info(self):
+ return tfds.core.DatasetInfo(
+ builder=self,
+ description=("The EMNIST dataset is a set of handwritten character digits"
+ "derived from the NIST Special Database 19 and converted to"
+ "a 28x28 pixel image format and dataset structure that directly"
+ "matches the MNIST dataset."
+),
+ features=tfds.features.FeaturesDict({
+ "image": tfds.features.Image(shape=_MNIST_IMAGE_SHAPE),
+ "label": tfds.features.ClassLabel(num_classes=self.builder_config.class_number),
+
+ }),
+ supervised_keys=("image", "label"),
+ urls=["https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip"],
+ citation=_EMNIST_CITATION,
+ )
+
+ def _split_generators(self, dl_manager):
+
+ filenames = {
+ "train_data": 'emnist-{}-train-images-idx3-ubyte'.format(self.builder_config.name),
+ "train_labels": 'emnist-{}-train-labels-idx1-ubyte'.format(self.builder_config.name),
+ "test_data": 'emnist-{}-test-images-idx3-ubyte'.format(self.builder_config.name),
+ "test_labels": 'emnist-{}-test-labels-idx1-ubyte'.format(self.builder_config.name),
+ }
+ dir_name = dl_manager.manual_dir
+ import os
+ return [
+ tfds.core.SplitGenerator(
+ name=tfds.Split.TRAIN,
+ num_shards=10,
+ gen_kwargs=dict(
+ num_examples=self.builder_config.train_examples,
+ data_path=os.path.join(dir_name, filenames['train_data']),
+ label_path=os.path.join(dir_name, filenames["train_labels"]),
+ )
+
+ ),
+
+ tfds.core.SplitGenerator(
+ name=tfds.Split.TEST,
+ num_shards=1,
+ gen_kwargs=dict(
+ num_examples=self.builder_config.test_examples,
+ data_path=os.path.join(dir_name, filenames['test_data']),
+ label_path=os.path.join(dir_name, filenames["test_labels"]),
+ )
+ )
+ ]
+
+
def _extract_mnist_images(image_filepath, num_images):
@@ -226,8 +374,3 @@ def _extract_mnist_labels(labels_filepath, num_labels):
buf = f.read(num_labels)
labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
return labels
-
-
-
-# test file
-# and full test
\ No newline at end of file
From 976bb826994b05333322d4265b71a7a76ac316c3 Mon Sep 17 00:00:00 2001
From: us
Date: Sat, 9 Mar 2019 05:54:53 +0300
Subject: [PATCH 2/4] Added EMNIST Test
---
tensorflow_datasets/image/mnist_test.py | 5 +++++
tensorflow_datasets/testing/mnist.py | 2 +-
.../emnist/emnist-test-test-images-idx3-ubyte | Bin 0 -> 1584 bytes
.../emnist/emnist-test-test-labels-idx1-ubyte | 1 +
.../emnist/emnist-test-train-images-idx3-ubyte | Bin 0 -> 7856 bytes
.../emnist/emnist-test-train-labels-idx1-ubyte | 1 +
6 files changed, 8 insertions(+), 1 deletion(-)
create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-test-images-idx3-ubyte
create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-test-labels-idx1-ubyte
create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-train-images-idx3-ubyte
create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-train-labels-idx1-ubyte
diff --git a/tensorflow_datasets/image/mnist_test.py b/tensorflow_datasets/image/mnist_test.py
index d60896b1668..48cf40ca54e 100644
--- a/tensorflow_datasets/image/mnist_test.py
+++ b/tensorflow_datasets/image/mnist_test.py
@@ -50,5 +50,10 @@ class KMNISTTest(MNISTTest):
DATASET_CLASS = mnist.KMNIST
+class EMNISTTest(MNISTTest):
+ DATASET_CLASS = mnist.EMNIST
+ BUILDER_CONFIG_NAMES_TO_TEST = ["test"]
+
+
if __name__ == "__main__":
testing.test_main()
diff --git a/tensorflow_datasets/testing/mnist.py b/tensorflow_datasets/testing/mnist.py
index 8669d4ded2c..52eafc1a1de 100644
--- a/tensorflow_datasets/testing/mnist.py
+++ b/tensorflow_datasets/testing/mnist.py
@@ -71,7 +71,7 @@ def write_label_file(filename, num_labels):
def main(_):
- for mnist in ["mnist", "fashion_mnist", "kmnist"]:
+ for mnist in ["mnist", "fashion_mnist", "kmnist", "emnist"]:
output_dir = mnist_dir(mnist)
test_utils.remake_dir(output_dir)
write_image_file(os.path.join(output_dir, _TRAIN_DATA_FILENAME), 10)
diff --git a/tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-test-images-idx3-ubyte b/tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-test-images-idx3-ubyte
new file mode 100644
index 0000000000000000000000000000000000000000..761dc28672377f062ecc5d1e62e2615603064573
GIT binary patch
literal 1584
zcmV-02G991F)=YQF)=YQF)=YQF)=Y5s=?9ms(91SG`OAqvP+_CcE+0^b_id)WM>Gj
zG0g4F)Xi89A(;D6arbQZ4rvznDMMOP_n0A>-G9KZeL^*3`W(l;eOq0-K?EH7;`kVL
z&vu}<0bF5G_YF5r0<1ulh#9t5MsOFGb#bh!gY=9rz*3|Je8nnc^~*E7ZKW%KDk~lo
zcuZ@~kQXoAz=+_#_Od}(`SX^_7U?aF%rhDVTEr-N2>0p^nuZ87MByKyCmiONrc2SN
z%t1h1<%s={y@Y2`K0G4mg^0)^NFITe6(5kov7VwkyrNpf8d`8Sm<;i%glK8L`JJta8BH&I*0S@%
zGul!PuL3$>4f}{$2MtE?0N!JTsQUk}F_6V~u-4SZu!`{WkT?>y<7gHJa8!=-hiY)X
z1@$uI=}dR(PD5R^P3!7Wg9FftNp()%ajnf|ID*^
zr~95Y>C&ARVh)FNIqOcK&&1t7A4f7iQ;OfvzIL1SL@;cq)ur>gey2<6D}N)^_?oSk
zBGocJdFop(4S>%6pqPTAW`WNhTT?|rj24@7k~hJ?U%uVW*JxA
z$O5K6k6vW7-#hk0rX>2R&mq@n72+^%GSaNvdQp7@Gn&tlVNHa4-P*wdC@iYI>i@Y)
za2%l5IHC!Z)Chyw+_b&wnr&DVXFO}+&qQpOi4%W&)H*%2cX}fW*D$VN&{(VD->cn4
zM1HvQzR~Ogq^p95^w$
zRT;aJP(?fC)s1#O&J-{n1nDp{RMhF5{KLSoy68;~`K*0US&n=CV-lwj**0zWE=J8ERIAQ++cF9jcP&MEuGe$6@4oV=7;;bRfTk&&^Aioc%OG{3{
zr!$s
zha8E+#T@JnozDjlhV-RbW#Sp2aMdS0YaILx%aUc~
zn~m?DbOqj7fS}&kvY27XCYknw*CiU_t>5M%YGAvd>oJyN!mHI9;5spNXY#c14Kd5r
z7O3kiuopn~p9T-Zin9NvwDu5`UYgzU=kf5j)JG%2bAP~DSVw?6f#(+%mK#u(ka8>Q
zSMyfi%Ek#Axpg<2+p&SAHOhEfj#wbC5O38~kivE98aA>12h+rwMuYB2
z$+oS>xOIAWBU$|`
za5Jw}S4E>c*K{q|Yu$pcRMCC`wNL#sJ-S8U1zgT_(wTHDNGo6ne;lcfIrbq9H!{}-
zO|adh@%5dpV0(J{x(-d?>6+n0bWQdHGqkpF**9UT1Q4}1A*pT_5T`xy>6p3hL&veX
zofPd8y~QkeVGLn7TD{2F`>ej`95BFMfp};#x?8fAplr99t&^nIQe7@B9OV2@e>G;42&|uDOabV
zaxYPr|7%&f`(=W9JF#bIF0A`HV!u#Ghs1(9J2sniVzQ1OT{_`HyN~;buSe`(nuz{2
z>0=CO?VLeBH+YrD#=6EbKh}vK=ELU5Vfy!SU3yZG!_ijoMjoJN+8sZ38UK~0nMC5T
z55n@#53aegc=?pRDe<*T(Qdj7L~Cuw#q66NpH$)m25*UxrAv&-#yKi(jwemw5@+Q>
zS@lU%t`*>NzzkhFwsorP#b*Qh_>F0PwR-}4E;
z^;&%23nY*|q>45~lE`S`AK@99ts{!Rb<4G=4zB4n(+T?t=^9K_BKS>FNoBT>X`7tb
z49FWnAm;M77{EImE-iU=
z`@_4*1u&Ylb3@fww0=pL!-J4%B+>{W{%qOn-wFW
z^Bs?JMKJah1GrBy0APldvNgy|4HCPg=EoygN1j?m6Si=Oq>+9M9S4x1;c_OPw<(c%
z(EE6F3+oH&E8qk`EO!000Xxs@G}!%=P;GAOhf~+Z7@!)&V1{^r*s>it_PU(p6+}XA
z;RWG?hYi7*|ETZSOpqT0Qj$>{-p$tY2#PaT6
zp)9Ys?hHUI_8;}R^t+b)9n8Nhd)Z$@)K0m%L^9W@&QGjRd0g!1fCZX9+zR3lhtOHaWrXDfhVXYEfJe`pR
z7cj?B;>Q5p+>_Rs1h&A*wimV#1VEyIOccj4D89Lx9d4s1=tDT(&~SVH|sf;{n`)Q$I@Ts%L|beV;U@$%_N?*t~e?Ic*W2|N4CP&s%r>vCguUPsciOrf9H1
zv23tt+qubd4#_-7{^2OMh^25zNvd)gWVcu7qn)W@n+fl6d)8VTcL}1xq_(r*fjeY?4>zP{CU=I^in|^o8cEr&esN~yb6;P
z`KLir2Qz*UOi@TyY68H2RTYzenCn^S?y$NOT58MQm+yu?l0
zs8DUSoq)J~W2t!L@gikW7-2F5+hi7E?-vPFBnVTrbz6D^Z
zyUFtGigg^DIxV}uf{&^tA1qd>s?uoSFA7txX89Z1et^(b7
zf!SUb?rSeW6=)-)^1#`~+UpcZDnou9;F4IN*fUIzS!!g6D~g(uSic%$mZ-Wlm}e=?
z8kfQtLSGM?YzC^YKnTth^jBtpsqyl!-myHwGDS%W9nh{~@v!H(w;$frkJN6M1BGjw
z_^9kKQCm*zAZIX_PYGs(9w1Dym@F0v1xpC*D`wcH$~ZsMuegtd>QB0%j!g!pGOv|*
zDp0P;r!)9`M{@0`Yd-h3u?lA?z$g;6P+-geo4f*R4b;%$xNG7ZYATZut{d@l?C4y%
zp~?{6K(#Pw)5JE!KEGmGb?_eM>JE9HksMyc(RH_}i5%;519{<*A=3qk8sR0y#gEDs
zdYtik4^(R@cB_aeuIJ*zgJTc@^4Hg%Cii54@zLotSSW!KerYly9^s@{#nd=fQw(s5
z>gLoFBjCoEGZm?8w2fdUj6!;H*i3G44_<6yUEdl4>V*L06n&N#-
zNr=g(s_6C=w}F>olv2&*Mq2oTZwT+0k2>$Ar0;22lSp570PZ@VsDEU4i;+P&aan@+
zcf_(Q7`^$-PYvOSO{(a?5lIc9Hz3bv|ALX7L$Gf`$&B&OO=R1fVq^Q4I|uF;R0yF;
zI}@`S#COGl&^VJWAho;8SeH~%sK_%MZzomwHm&gdu~t3m`Rv
zIiOqe0T$#Rug65HFwIQV@EEJnxs?d9I}p1X=Iv_H<%sB%kOj}G#*6hAW$!C~w_^&V
zzk!9U(g<$a;HBM&w_MgM?qnFbvpO&gop{&eA*v^u7bbE*SEk+BQ_bWmBw}bsh<=@W
zpHhCrZ{rW{g&WGyI2bfCMeCZP8BQ!Wjl*xh#83=BU8j52LI24>lSR!iE@pmdj#3FN
zAvKHZh=E*p?*iVPi9#?M%*DIkJFaoAo#o>m4|6{is$*Q-Y$jlJuI@Ig1d7Tk+6mGT
z(+!C*zJHsk;`_FXJIP?DP*M%2VwG&5$8?
zfe*#NGR+h!(pEtez-`Lzf#}n{H{jk?8nIag`HQ3qhV>cgYBXzFf!NG=;8?Io#xzRz
zN^k{z&b5%&FQTF>go|i^A5c`$T2nk^#x}YSjdZ@9_^~zdP-`Ow{(Z!c!mU53Mf-aB
z>?Y~+Episk{w`0tNBZN`cJPL<87fjq>$KK!L5q#1hI{A3X9IWBolhsqpy1C+;T2u*
z=h1j*&%w!?%3w{WsPwyWG`ttWK(-oJvh{VnCh<@efhr78lAOi*^x_d!Q>?ULtFnXT
zBq#r9|CeB3LsAz7Erk(<2ExNpsaOGM+Yu;oXLtB-pM4BLW}B|A?n8kvW+nA5eHii2
zU`KjH?jSJ$BQK3%a=Smq$0)1&&+Bycz0ErC2Zl15abEbq1!BwVV9u{eRLe~K=%F;g
zQ>Z7q8?rDt{OWvHdtBk*c|{yJr!mwY4LS@78m1q$+=?P#o%2
zUhGERyn(&O#n~6D)26qzEBcCJ
zzeiVeG=9(%YniAsZvnwS=G(aNiK(j&Yzm^cb;cGh&#dHW&n#PdNU;>o7i)QdvKRV9
z+r}#{e$1>cAkm_odvD&#qQ2W7jww(jM|)7|xRzf5ntz3!NL^*DEq&W^dMeSI`R(C5
z$i750g|Qnh*KhIp3)(l6FWyd-+3~SH@X0_ECObu1njXZDI1Z|aqq~z!M7A<-*rbV-
zepZ1H&LZ+OA@tZb9IroG;7G!t$dh2W62PQFxDbFrt7~GVfIy+{2*W~8O&rwdcgTZh
zz-?}7AwaISNgE5XUfOH&P&s9=+zMniDz@p0Vy5O&w>`r>(K^s=YcY{_W
z1H81jTd@m|61!g%CIUX$3S4aM2xKKwosqKHWjcCg_|NrpO1Q1BjWIeqDD%s#*K>?v
zSTPpXsxPRlBh^4+VzdbJ9hwWABROjh!EoXZ6OjP%xKTDf^`k9dG(rQ(0OAg
zefU9VJS9-csTSbOp~2uryevemg3kYw4wxK9?0m|sDQ2ZAoUD{?CZ`L^!Al&N0Q)M;
z{)*8bhY>Tb3loh)R{n$|(LN!#F107r&&sniz=M-gkr>yw@*0lwr8QsHT6ed33Wy3i
zyu#X|H~6*5k_(sLt*T);3m@z10dsaYQTU^{WSm@R^#ZMVlTbg|
ztVG}PQ9}>_a;%u0i);KR$da3jlxP6JY<=^))Cl)~R7nFdUF-J*q*L#rA>F5cMBd1$
zpZ&7(aOA-4=I*}J>1`pTRK~%;oL9j_sb6EbJ=8K@Bm(mHjChi%mI?^4jJD{(QZ{Jp
z%84+wwY@TOWcnk34OCp}-eiT(Dbh7w!qoBxZ!p$E9hC9*xg)?it6)2{gX$Y)U`@G8
zO%#rqWOPSns{9mc4w31mo9V!ZFXvQE_?LSGWZ#WwSK8QqVpZam(CNMF&|0~Fbg5};DPmlGlfX9rYgSr?Z{{{*vYMD@0_P%RmUd$7
z-(bILrD`ozH=k0lVv+k4M-QGsQE#?Zh}H;>+6d0vTVzIk77Y2}B-bEO-qG@{c3DkN
zwLP>-)2RY%R~mwOvzRTAbPNkj+nX(e&vl(t16YYMJH^URdxJedmbn$(aOXGN5QTD!
zQOLgL5QUWq#v$U#rUy{@BW2>viF^hvqoMn!Pg%6R3!
zZZ)xa3XAD!_e6FL2-H0SGN)@bTqZ+Hx*G41ZP!+}J~1t;VEkGZF~nh*j2fW6GSx5q
zqH&=2vevY&-xhJnxk+w(J{G`yz3;ewQLE7SO5%9wPkj8caP`5tW}Y@aA>l4`P)M_K
zl%3rl<^WkSi`4Y9m44bzlpx->(@_eTJG|lg0Qst}^hitH=F6NrzKqk?N?GJLoKgkZ
zfP#M~vYpI$E}P;Txrro$bz^=vR$o|#4GFE5sSuu!zumdS*sZs7opH}y!tIE!0<%w_
z`yG;s{h9#3ApqmdoV!Rm}e$WO$1tg=twOF9toaNI9~5C5NP5D}ac=ODXP9Xwd;GH2(Be=u#3L(C;^U@V0v88Pas
z(~W^VY;|E`!Fo8wy@v&onQgXxGN8JXda|VM;2@>QM-V3fBazvPgX=qbJn)*m=1Ql~
zGgieW1?7pq1?DsixQQy;$lpV|03V=-53Sp`3uW~;S`_b?PUc{UAlQcgLFm42=e@Bz
zDonI>7o!9B4al#j`SxiDp^B-%y!?vunQ1N(*SJ{myH~-!l|EjLX}(MHvXG|HRi6z(
zt1CrDRHGIQENM+Du9CO^BdS15nM;GV?>N7p4|j0nvddN4_B=I@;eNs`aIb6tc
z6&DXFwZ4?#!;COeJ+X3h3tvEhS-Il1=VTO>RdtDek~G$i_b~C`f#zF}-U2SMNu3v=
z^|Tb<2~-{i*C@Q_vz`pwiI@;U>gQ-7u?Ye_tu-OGOi4*f1i)$hG`TK6(V+zCa<9mJ
zZX$caX^43qKg8SLtgqr()l#%5@FG*iK`lq)C9fh3wNaqCb>+Smogh$^1xnRZnSx1^
zOFI>I^2lkG%YO!O1|ag8UPg6nX^St7^5=O+L(T#|VmQPChz)LmmG{!A+tiJd4*?A0
zR8VO=vzl!}cU_3Pb2;|KeZVmqrJwnd_lyE#yPz-LoY>%>iU3Ph*Vx
zW4LI3v~bq_z7U91nQP1tYr=ypL6J`6WVHhy3w@CLLAG@>Xq5Ne%DBeFLbWP5!^<2G
z>kWZi2`;>on^gC|6Im*jpg@sW`5zXV^T^am*vRgq_GRMHC0_4dn^FfBj272A<3$x(
zt@my=p$nl-En_Tdg(2&Fd0jBB?ue({891o+}3sxZF2*xgCHD>@r<1|Em5SmcO4DCh^uFjtZ>^Ks{lj=X)sjO0DIT;3AxS
zPXRdkjsL?pw)gCvOen@Y76+Ug(@lZ>>N#TwLAH8I(Wp=1{{_I+BDjmQ3r~;!&6+=s
zij<^~Kf7h9gN|qzDXzRZRD=I$#y5Cl}6
zntlymYf>>W>E1pdUXu#ltB~&h*=tVa&I>?Ig8}l?27|Vo1X$
zvLU0yBCINqM>mu{Y&iPqa+m73mvfNC*jgoor#_3o1YB6f3G**~yK^)varL8(wHpLX
z!YfNfYHyHma$XsDu~)lpcfjFgR`&$5LB|9}^LAayY6?Ew@tkHYSoTQNk%`D-YNg+x
z`(_fddf32gXom7%Go&K@f$gbc3VAXGV3C+BkHSUWitW_+VSv`-1|Wo+pyToEjccI{
zmQ%p>Zn!gR6T3i=bUvRbGx%fffA#43H6seHvTTf-#>}hVCH$rQ86MdZzbC|6$pr5@
ztQb++zlBKb0n4j&;Hf83xpzuPLoiyhO-_gUYTBWWs;?S_?;qx?{<~e**{*K#m+9b$
z!g7w(iB^8A00i>q*XqT7G$M1v_J|VZajQ`t+zF7V;%Zg;eu&)8yqd|z(dx2{=1a;>
zfe&Q?(Uw=1t`du!=^Z$6-P!3rlq-f!!iiu7s6emeI;UC#ys({_YK9fl;KA>+=sj$u
z2fMB?z}4-*z8Nce`F+KvR*V(mwFQ{^JCKKq1Y!&MGU-)5ca0^M&o;LdV=?^y!|Bv(
z35;E@zO$UDNue#TTJr{XzQmwJFTK(?OUWTj&PAE&8S9`v_!Gd@9hPE34WGK(6z%{<
z+sAgYfmdKTlf@X+L8Jcr2W$!i1(2$W=>v8cPzDz1Px7Y}xFrr4DMZ8F{TL8I
zzy?rCX0jVFt?}@AfR)sTvaAiW9^(NlN;$ADCC)Wx`37GoGuE$O1dA9J#uatTmpyGxlH|^;njfZ^6<=Bei(`xyUpzw-)~ypCS4x-*Vr
z&ajHvjqeOBg@QMz#MP`=aly|DgdfD9Pt&Leg_AW7kE%gQ7kR@w$&Zr4c-UIzKysnw
zI#|M&+Jj}#2X=7p5AP?>Ns%1lV>Ax_aM1wB^4Vv^CfU9~?*l|hj#vuydANAo;
z7O+BLGcgb4riuN$%JyNH2&+&#@D`fzO4Pyi#nEz@8>co|D;AD5&+VHJ!v0at06V
zc*?NR%eS-EpYZ~VdtDR$s`ogO?x`Mc5glTro^Kur9rRyI3vajd$W8M2(i-6~(N!w3
z?FEEDO?$TzeNbU@D7@vqeIYxH`=No{SOX$MByZI0?czxB-Typo0tR~{OrXGl+iy{o
O4!Cd{auR#e1u!1Q#aDd*
literal 0
HcmV?d00001
diff --git a/tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-train-labels-idx1-ubyte b/tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-train-labels-idx1-ubyte
new file mode 100644
index 00000000000..77deb0eed69
--- /dev/null
+++ b/tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-train-labels-idx1-ubyte
@@ -0,0 +1 @@
+11111111
\ No newline at end of file
From e631bac5bd2c9c6372fb326eaa41ec5177229ed8 Mon Sep 17 00:00:00 2001
From: us
Date: Mon, 11 Mar 2019 00:55:56 +0300
Subject: [PATCH 3/4] Fixed version issues
---
tensorflow_datasets/image/mnist.py | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/tensorflow_datasets/image/mnist.py b/tensorflow_datasets/image/mnist.py
index 01377f2a552..fb8c4908cd4 100644
--- a/tensorflow_datasets/image/mnist.py
+++ b/tensorflow_datasets/image/mnist.py
@@ -19,6 +19,7 @@
from __future__ import division
from __future__ import print_function
+import os
import numpy as np
import six.moves.urllib as urllib
import tensorflow as tf
@@ -242,7 +243,7 @@ def __init__(self, class_number, train_examples, test_examples, **kwargs):
class EMNIST(MNIST):
- VERSION = tfds.core.Version('1.0.0')
+ VERSION = tfds.core.Version('1.0.1')
BUILDER_CONFIGS = [
EMNISTConfig(
@@ -251,7 +252,7 @@ class EMNIST(MNIST):
train_examples=697932,
test_examples=116323,
description="EMNIST ByClass: 814,255 characters. 62 unbalanced classes.",
- version="0.1.1",
+ version="1.0.1",
),
EMNISTConfig(
name="bymerge",
@@ -259,7 +260,7 @@ class EMNIST(MNIST):
train_examples=697932,
test_examples=116323,
description="EMNIST ByMerge: 814,255 characters. 47 unbalanced classes.",
- version="0.1.1",
+ version="1.0.1",
),
EMNISTConfig(
name="balanced",
@@ -267,7 +268,7 @@ class EMNIST(MNIST):
train_examples=112800,
test_examples=18800,
description="EMNIST Balanced: 131,600 characters. 47 balanced classes.",
- version="0.1.1",
+ version="1.0.1",
),
EMNISTConfig(
name="letters",
@@ -275,7 +276,7 @@ class EMNIST(MNIST):
train_examples=88800,
test_examples=14800,
description="EMNIST Letters: 103,600 characters. 26 balanced classes.",
- version="0.1.1",
+ version="1.0.1",
),
EMNISTConfig(
name="digits",
@@ -283,7 +284,7 @@ class EMNIST(MNIST):
train_examples=240000,
test_examples=40000,
description="EMNIST Digits: 280,000 characters. 10 balanced classes.",
- version="0.1.1",
+ version="1.0.1",
),
EMNISTConfig(
name="mnist",
@@ -291,7 +292,7 @@ class EMNIST(MNIST):
train_examples=60000,
test_examples=10000,
description="EMNIST MNIST: 70,000 characters. 10 balanced classes.",
- version="0.1.1",
+ version="1.0.1",
),
EMNISTConfig(
name="test",
@@ -299,7 +300,7 @@ class EMNIST(MNIST):
train_examples=10,
test_examples=2,
description="EMNIST test data config.",
- version="0.1.1",
+ version="1.0.1",
),
]
@@ -330,7 +331,7 @@ def _split_generators(self, dl_manager):
"test_labels": 'emnist-{}-test-labels-idx1-ubyte'.format(self.builder_config.name),
}
dir_name = dl_manager.manual_dir
- import os
+
return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN,
From 7699041a9157d9df5c1407fb3f98f28515b8a443 Mon Sep 17 00:00:00 2001
From: us
Date: Tue, 12 Mar 2019 01:42:35 +0300
Subject: [PATCH 4/4] Added statement to check downloaded dataset.
---
tensorflow_datasets/image/mnist.py | 24 ++++++++++++++++++++++++
tensorflow_datasets/image/mnist_test.py | 4 ++--
2 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/tensorflow_datasets/image/mnist.py b/tensorflow_datasets/image/mnist.py
index 2ef5c820575..9456be7a476 100644
--- a/tensorflow_datasets/image/mnist.py
+++ b/tensorflow_datasets/image/mnist.py
@@ -333,8 +333,32 @@ def _split_generators(self, dl_manager):
"test_data": 'emnist-{}-test-images-idx3-ubyte'.format(self.builder_config.name),
"test_labels": 'emnist-{}-test-labels-idx1-ubyte'.format(self.builder_config.name),
}
+
dir_name = dl_manager.manual_dir
+ if not tf.io.gfile.exists(os.path.join(dir_name, filenames['train_data'])):
+ # The current tfds.core.download_manager is unable to extract multiple and nested files.
+ # We'll add soon!
+ msg = "You must download and extract the dataset files manually and place them in : "
+ msg += dl_manager.manual_dir
+ msg += """File tree must be like this :\n
+ .
+ ├── emnist
+ │   ├── emnist-byclass-train-images-idx3-ubyte
+ │   ├── emnist-byclass-train-labels-idx3-ubyte
+ │   ├── emnist-byclass-test-images-idx3-ubyte
+ │   ├── emnist-byclass-test-labels-idx3-ubyte
+ │   ├── emnist-bymerge-train-images-idx3-ubyte
+ │   ├── emnist-bymerge-train-labels-idx3-ubyte
+ │   ├── emnist-bymerge-test-images-idx3-ubyte
+ │   ├── emnist-bymerge-test-labels-idx3-ubyte
+ │   ├── .......
+ │   ├── .....
+ │   ├── ...
+ │ ├──
+ """
+ raise FileNotFoundError(msg.replace(" ", ""))
+
return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN,
diff --git a/tensorflow_datasets/image/mnist_test.py b/tensorflow_datasets/image/mnist_test.py
index 921e6bdbbfd..c64e2268f1c 100644
--- a/tensorflow_datasets/image/mnist_test.py
+++ b/tensorflow_datasets/image/mnist_test.py
@@ -52,8 +52,8 @@ class KMNISTTest(MNISTTest):
class EMNISTTest(MNISTTest):
- DATASET_CLASS = mnist.EMNIST
- BUILDER_CONFIG_NAMES_TO_TEST = ["test"]
+ DATASET_CLASS = mnist.EMNIST
+ BUILDER_CONFIG_NAMES_TO_TEST = ["test"]
if __name__ == "__main__":