From a9a6b829140e5a6d47de712227855f29e0b8e841 Mon Sep 17 00:00:00 2001 From: us Date: Sat, 9 Mar 2019 05:52:48 +0300 Subject: [PATCH 1/4] Added EMNIST Dataset --- tensorflow_datasets/image/__init__.py | 1 + tensorflow_datasets/image/mnist.py | 159 ++++++++++++++++++++++++-- 2 files changed, 152 insertions(+), 8 deletions(-) diff --git a/tensorflow_datasets/image/__init__.py b/tensorflow_datasets/image/__init__.py index 5e27d3ebfa9..39c246d2038 100644 --- a/tensorflow_datasets/image/__init__.py +++ b/tensorflow_datasets/image/__init__.py @@ -33,6 +33,7 @@ from tensorflow_datasets.image.mnist import FashionMNIST from tensorflow_datasets.image.mnist import MNIST from tensorflow_datasets.image.mnist import KMNIST +from tensorflow_datasets.image.mnist import EMNIST from tensorflow_datasets.image.omniglot import Omniglot from tensorflow_datasets.image.open_images import OpenImagesV4 from tensorflow_datasets.image.quickdraw import QuickdrawBitmap diff --git a/tensorflow_datasets/image/mnist.py b/tensorflow_datasets/image/mnist.py index 67818bfc379..01377f2a552 100644 --- a/tensorflow_datasets/image/mnist.py +++ b/tensorflow_datasets/image/mnist.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""MNIST and Fashion MNIST.""" +"""MNIST, Fashion MNIST, KMNIST and EMNIST.""" from __future__ import absolute_import from __future__ import division @@ -23,6 +23,7 @@ import six.moves.urllib as urllib import tensorflow as tf +from tensorflow_datasets.core import api_utils import tensorflow_datasets.public_api as tfds # MNIST constants @@ -68,7 +69,7 @@ """ -_K_MNIST_CITATION =""" +_K_MNIST_CITATION = """\ @online{clanuwat2018deep, author = {Tarin Clanuwat and Mikel Bober-Irizar and Asanobu Kitamoto and Alex Lamb and Kazuaki Yamamoto and David Ha}, title = {Deep Learning for Classical Japanese Literature}, @@ -77,7 +78,17 @@ eprintclass = {cs.CV}, eprinttype = {arXiv}, eprint = {cs.CV/1812.01718}, - } +} +""" + +_EMNIST_CITATION = """\ +@article{cohen_afshar_tapson_schaik_2017, + title={EMNIST: Extending MNIST to handwritten letters}, + DOI={10.1109/ijcnn.2017.7966217}, + journal={2017 International Joint Conference on Neural Networks (IJCNN)}, + author={Cohen, Gregory and Afshar, Saeed and Tapson, Jonathan and Schaik, Andre Van}, + year={2017} +} """ class MNIST(tfds.core.GeneratorBasedBuilder): @@ -207,6 +218,143 @@ def _info(self): citation=_K_MNIST_CITATION, ) +class EMNISTConfig(tfds.core.BuilderConfig): + """BuilderConfig for EMNIST CONFIG.""" + + @api_utils.disallow_positional_args + def __init__(self, class_number, train_examples, test_examples, **kwargs): + """BuilderConfig for EMNIST class number. + + Args: + class_number: There are six different splits provided in this dataset. And have + different class numbers. + + train_examples, test_examples: So in these have different test and train character + numbers. + + **kwargs: keyword arguments forwarded to super. + """ + super(EMNISTConfig, self).__init__(**kwargs) + self.class_number = class_number + self.train_examples = train_examples + self.test_examples = test_examples + + +class EMNIST(MNIST): + + VERSION = tfds.core.Version('1.0.0') + + BUILDER_CONFIGS = [ + EMNISTConfig( + name="byclass", + class_number=62, + train_examples=697932, + test_examples=116323, + description="EMNIST ByClass: 814,255 characters. 62 unbalanced classes.", + version="0.1.1", + ), + EMNISTConfig( + name="bymerge", + class_number=47, + train_examples=697932, + test_examples=116323, + description="EMNIST ByMerge: 814,255 characters. 47 unbalanced classes.", + version="0.1.1", + ), + EMNISTConfig( + name="balanced", + class_number=47, + train_examples=112800, + test_examples=18800, + description="EMNIST Balanced: 131,600 characters. 47 balanced classes.", + version="0.1.1", + ), + EMNISTConfig( + name="letters", + class_number=37, + train_examples=88800, + test_examples=14800, + description="EMNIST Letters: 103,600 characters. 26 balanced classes.", + version="0.1.1", + ), + EMNISTConfig( + name="digits", + class_number=10, + train_examples=240000, + test_examples=40000, + description="EMNIST Digits: 280,000 characters. 10 balanced classes.", + version="0.1.1", + ), + EMNISTConfig( + name="mnist", + class_number=10, + train_examples=60000, + test_examples=10000, + description="EMNIST MNIST: 70,000 characters. 10 balanced classes.", + version="0.1.1", + ), + EMNISTConfig( + name="test", + class_number=62, + train_examples=10, + test_examples=2, + description="EMNIST test data config.", + version="0.1.1", + ), + ] + + def _info(self): + return tfds.core.DatasetInfo( + builder=self, + description=("The EMNIST dataset is a set of handwritten character digits" + "derived from the NIST Special Database 19 and converted to" + "a 28x28 pixel image format and dataset structure that directly" + "matches the MNIST dataset." +), + features=tfds.features.FeaturesDict({ + "image": tfds.features.Image(shape=_MNIST_IMAGE_SHAPE), + "label": tfds.features.ClassLabel(num_classes=self.builder_config.class_number), + + }), + supervised_keys=("image", "label"), + urls=["https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip"], + citation=_EMNIST_CITATION, + ) + + def _split_generators(self, dl_manager): + + filenames = { + "train_data": 'emnist-{}-train-images-idx3-ubyte'.format(self.builder_config.name), + "train_labels": 'emnist-{}-train-labels-idx1-ubyte'.format(self.builder_config.name), + "test_data": 'emnist-{}-test-images-idx3-ubyte'.format(self.builder_config.name), + "test_labels": 'emnist-{}-test-labels-idx1-ubyte'.format(self.builder_config.name), + } + dir_name = dl_manager.manual_dir + import os + return [ + tfds.core.SplitGenerator( + name=tfds.Split.TRAIN, + num_shards=10, + gen_kwargs=dict( + num_examples=self.builder_config.train_examples, + data_path=os.path.join(dir_name, filenames['train_data']), + label_path=os.path.join(dir_name, filenames["train_labels"]), + ) + + ), + + tfds.core.SplitGenerator( + name=tfds.Split.TEST, + num_shards=1, + gen_kwargs=dict( + num_examples=self.builder_config.test_examples, + data_path=os.path.join(dir_name, filenames['test_data']), + label_path=os.path.join(dir_name, filenames["test_labels"]), + ) + ) + ] + + def _extract_mnist_images(image_filepath, num_images): @@ -226,8 +374,3 @@ def _extract_mnist_labels(labels_filepath, num_labels): buf = f.read(num_labels) labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64) return labels - - - -# test file -# and full test \ No newline at end of file From 976bb826994b05333322d4265b71a7a76ac316c3 Mon Sep 17 00:00:00 2001 From: us Date: Sat, 9 Mar 2019 05:54:53 +0300 Subject: [PATCH 2/4] Added EMNIST Test --- tensorflow_datasets/image/mnist_test.py | 5 +++++ tensorflow_datasets/testing/mnist.py | 2 +- .../emnist/emnist-test-test-images-idx3-ubyte | Bin 0 -> 1584 bytes .../emnist/emnist-test-test-labels-idx1-ubyte | 1 + .../emnist/emnist-test-train-images-idx3-ubyte | Bin 0 -> 7856 bytes .../emnist/emnist-test-train-labels-idx1-ubyte | 1 + 6 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-test-images-idx3-ubyte create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-test-labels-idx1-ubyte create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-train-images-idx3-ubyte create mode 100644 tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-train-labels-idx1-ubyte diff --git a/tensorflow_datasets/image/mnist_test.py b/tensorflow_datasets/image/mnist_test.py index d60896b1668..48cf40ca54e 100644 --- a/tensorflow_datasets/image/mnist_test.py +++ b/tensorflow_datasets/image/mnist_test.py @@ -50,5 +50,10 @@ class KMNISTTest(MNISTTest): DATASET_CLASS = mnist.KMNIST +class EMNISTTest(MNISTTest): + DATASET_CLASS = mnist.EMNIST + BUILDER_CONFIG_NAMES_TO_TEST = ["test"] + + if __name__ == "__main__": testing.test_main() diff --git a/tensorflow_datasets/testing/mnist.py b/tensorflow_datasets/testing/mnist.py index 8669d4ded2c..52eafc1a1de 100644 --- a/tensorflow_datasets/testing/mnist.py +++ b/tensorflow_datasets/testing/mnist.py @@ -71,7 +71,7 @@ def write_label_file(filename, num_labels): def main(_): - for mnist in ["mnist", "fashion_mnist", "kmnist"]: + for mnist in ["mnist", "fashion_mnist", "kmnist", "emnist"]: output_dir = mnist_dir(mnist) test_utils.remake_dir(output_dir) write_image_file(os.path.join(output_dir, _TRAIN_DATA_FILENAME), 10) diff --git a/tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-test-images-idx3-ubyte b/tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-test-images-idx3-ubyte new file mode 100644 index 0000000000000000000000000000000000000000..761dc28672377f062ecc5d1e62e2615603064573 GIT binary patch literal 1584 zcmV-02G991F)=YQF)=YQF)=YQF)=Y5s=?9ms(91SG`OAqvP+_CcE+0^b_id)WM>Gj zG0g4F)Xi89A(;D6arbQZ4rvznDMMOP_n0A>-G9KZeL^*3`W(l;eOq0-K?EH7;`kVL z&vu}<0bF5G_YF5r0<1ulh#9t5MsOFGb#bh!gY=9rz*3|Je8nnc^~*E7ZKW%KDk~lo zcuZ@~kQXoAz=+_#_Od}(`SX^_7U?aF%rhDVTEr-N2>0p^nuZ87MByKyCmiONrc2SN z%t1h1<%s={y@Y2`K0G4mg^0)^NFITe6(5kov7VwkyrNpf8d`8Sm<;i%glK8L`JJta8BH&I*0S@% zGul!PuL3$>4f}{$2MtE?0N!JTsQUk}F_6V~u-4SZu!`{WkT?>y<7gHJa8!=-hiY)X z1@$uI=}dR(PD5R^P3!7Wg9FftNp()%ajnf|ID*^ zr~95Y>C&ARVh)FNIqOcK&&1t7A4f7iQ;OfvzIL1SL@;cq)ur>gey2<6D}N)^_?oSk zBGocJdFop(4S>%6pqPTAW`WNhTT?|rj24@7k~hJ?U%uVW*JxA z$O5K6k6vW7-#hk0rX>2R&mq@n72+^%GSaNvdQp7@Gn&tlVNHa4-P*wdC@iYI>i@Y) za2%l5IHC!Z)Chyw+_b&wnr&DVXFO}+&qQpOi4%W&)H*%2cX}fW*D$VN&{(VD->cn4 zM1HvQzR~Ogq^p95^w$ zRT;aJP(?fC)s1#O&J-{n1nDp{RMhF5{KLSoy68;~`K*0US&n=CV-lwj**0zWE=J8ERIAQ++cF9jcP&MEuGe$6@4oV=7;;bRfTk&&^Aioc%OG{3{ zr!$s zha8E+#T@JnozDjlhV-RbW#Sp2aMdS0YaILx%aUc~ zn~m?DbOqj7fS}&kvY27XCYknw*CiU_t>5M%YGAvd>oJyN!mHI9;5spNXY#c14Kd5r z7O3kiuopn~p9T-Zin9NvwDu5`UYgzU=kf5j)JG%2bAP~DSVw?6f#(+%mK#u(ka8>Q zSMyfi%Ek#Axpg<2+p&SAHOhEfj#wbC5O38~kivE98aA>12h+rwMuYB2 z$+oS>xOIAWBU$|` za5Jw}S4E>c*K{q|Yu$pcRMCC`wNL#sJ-S8U1zgT_(wTHDNGo6ne;lcfIrbq9H!{}- zO|adh@%5dpV0(J{x(-d?>6+n0bWQdHGqkpF**9UT1Q4}1A*pT_5T`xy>6p3hL&veX zofPd8y~QkeVGLn7TD{2F`>ej`95BFMfp};#x?8fAplr99t&^nIQe7@B9OV2@e>G;42&|uDOabV zaxYPr|7%&f`(=W9JF#bIF0A`HV!u#Ghs1(9J2sniVzQ1OT{_`HyN~;buSe`(nuz{2 z>0=CO?VLeBH+YrD#=6EbKh}vK=ELU5Vfy!SU3yZG!_ijoMjoJN+8sZ38UK~0nMC5T z55n@#53aegc=?pRDe<*T(Qdj7L~Cuw#q66NpH$)m25*UxrAv&-#yKi(jwemw5@+Q> zS@lU%t`*>NzzkhFwsorP#b*Qh_>F0PwR-}4E; z^;&%23nY*|q>45~lE`S`AK@99ts{!Rb<4G=4zB4n(+T?t=^9K_BKS>FNoBT>X`7tb z49FWnAm;M77{EImE-iU= z`@_4*1u&Ylb3@fww0=pL!-J4%B+>{W{%qOn-wFW z^Bs?JMKJah1GrBy0APldvNgy|4HCPg=EoygN1j?m6Si=Oq>+9M9S4x1;c_OPw<(c% z(EE6F3+oH&E8qk`EO!000Xxs@G}!%=P;GAOhf~+Z7@!)&V1{^r*s>it_PU(p6+}XA z;RWG?hYi7*|ETZSOpqT0Qj$>{-p$tY2#PaT6 zp)9Ys?hHUI_8;}R^t+b)9n8Nhd)Z$@)K0m%L^9W@&QGjRd0g!1fCZX9+zR3lhtOHaWrXDfhVXYEfJe`pR z7cj?B;>Q5p+>_Rs1h&A*wimV#1VEyIOccj4D89Lx9d4s1=tDT(&~SVH|sf;{n`)Q$I@Ts%L|beV;U@$%_N?*t~e?Ic*W2|N4CP&s%r>vCguUPsciOrf9H1 zv23tt+qubd4#_-7{^2OMh^25zNvd)gWVcu7qn)W@n+fl6d)8VTcL}1xq_(r*fjeY?4>zP{CU=I^in|^o8cEr&esN~yb6;P z`KLir2Qz*UOi@TyY68H2RTYzenCn^S?y$NOT58MQm+yu?l0 zs8DUSoq)J~W2t!L@gikW7-2F5+hi7E?-vPFBnVTrbz6D^Z zyUFtGigg^DIxV}uf{&^tA1qd>s?uoSFA7txX89Z1et^(b7 zf!SUb?rSeW6=)-)^1#`~+UpcZDnou9;F4IN*fUIzS!!g6D~g(uSic%$mZ-Wlm}e=? z8kfQtLSGM?YzC^YKnTth^jBtpsqyl!-myHwGDS%W9nh{~@v!H(w;$frkJN6M1BGjw z_^9kKQCm*zAZIX_PYGs(9w1Dym@F0v1xpC*D`wcH$~ZsMuegtd>QB0%j!g!pGOv|* zDp0P;r!)9`M{@0`Yd-h3u?lA?z$g;6P+-geo4f*R4b;%$xNG7ZYATZut{d@l?C4y% zp~?{6K(#Pw)5JE!KEGmGb?_eM>JE9HksMyc(RH_}i5%;519{<*A=3qk8sR0y#gEDs zdYtik4^(R@cB_aeuIJ*zgJTc@^4Hg%Cii54@zLotSSW!KerYly9^s@{#nd=fQw(s5 z>gLoFBjCoEGZm?8w2fdUj6!;H*i3G44_<6yUEdl4>V*L06n&N#- zNr=g(s_6C=w}F>olv2&*Mq2oTZwT+0k2>$Ar0;22lSp570PZ@VsDEU4i;+P&aan@+ zcf_(Q7`^$-PYvOSO{(a?5lIc9Hz3bv|ALX7L$Gf`$&B&OO=R1fVq^Q4I|uF;R0yF; zI}@`S#COGl&^VJWAho;8SeH~%sK_%MZzomwHm&gdu~t3m`Rv zIiOqe0T$#Rug65HFwIQV@EEJnxs?d9I}p1X=Iv_H<%sB%kOj}G#*6hAW$!C~w_^&V zzk!9U(g<$a;HBM&w_MgM?qnFbvpO&gop{&eA*v^u7bbE*SEk+BQ_bWmBw}bsh<=@W zpHhCrZ{rW{g&WGyI2bfCMeCZP8BQ!Wjl*xh#83=BU8j52LI24>lSR!iE@pmdj#3FN zAvKHZh=E*p?*iVPi9#?M%*DIkJFaoAo#o>m4|6{is$*Q-Y$jlJuI@Ig1d7Tk+6mGT z(+!C*zJHsk;`_FXJIP?DP*M%2VwG&5$8? zfe*#NGR+h!(pEtez-`Lzf#}n{H{jk?8nIag`HQ3qhV>cgYBXzFf!NG=;8?Io#xzRz zN^k{z&b5%&FQTF>go|i^A5c`$T2nk^#x}YSjdZ@9_^~zdP-`Ow{(Z!c!mU53Mf-aB z>?Y~+Episk{w`0tNBZN`cJPL<87fjq>$KK!L5q#1hI{A3X9IWBolhsqpy1C+;T2u* z=h1j*&%w!?%3w{WsPwyWG`ttWK(-oJvh{VnCh<@efhr78lAOi*^x_d!Q>?ULtFnXT zBq#r9|CeB3LsAz7Erk(<2ExNpsaOGM+Yu;oXLtB-pM4BLW}B|A?n8kvW+nA5eHii2 zU`KjH?jSJ$BQK3%a=Smq$0)1&&+Bycz0ErC2Zl15abEbq1!BwVV9u{eRLe~K=%F;g zQ>Z7q8?rDt{OWvHdtBk*c|{yJr!mwY4LS@78m1q$+=?P#o%2 zUhGERyn(&O#n~6D)26qzEBcCJ zzeiVeG=9(%YniAsZvnwS=G(aNiK(j&Yzm^cb;cGh&#dHW&n#PdNU;>o7i)QdvKRV9 z+r}#{e$1>cAkm_odvD&#qQ2W7jww(jM|)7|xRzf5ntz3!NL^*DEq&W^dMeSI`R(C5 z$i750g|Qnh*KhIp3)(l6FWyd-+3~SH@X0_ECObu1njXZDI1Z|aqq~z!M7A<-*rbV- zepZ1H&LZ+OA@tZb9IroG;7G!t$dh2W62PQFxDbFrt7~GVfIy+{2*W~8O&rwdcgTZh zz-?}7AwaISNgE5XUfOH&P&s9=+zMniDz@p0Vy5O&w>`r>(K^s=YcY{_W z1H81jTd@m|61!g%CIUX$3S4aM2xKKwosqKHWjcCg_|NrpO1Q1BjWIeqDD%s#*K>?v zSTPpXsxPRlBh^4+VzdbJ9hwWABROjh!EoXZ6OjP%xKTDf^`k9dG(rQ(0OAg zefU9VJS9-csTSbOp~2uryevemg3kYw4wxK9?0m|sDQ2ZAoUD{?CZ`L^!Al&N0Q)M; z{)*8bhY>Tb3loh)R{n$|(LN!#F107r&&sniz=M-gkr>yw@*0lwr8QsHT6ed33Wy3i zyu#X|H~6*5k_(sLt*T);3m@z10dsaYQTU^{WSm@R^#ZMVlTbg| ztVG}PQ9}>_a;%u0i);KR$da3jlxP6JY<=^))Cl)~R7nFdUF-J*q*L#rA>F5cMBd1$ zpZ&7(aOA-4=I*}J>1`pTRK~%;oL9j_sb6EbJ=8K@Bm(mHjChi%mI?^4jJD{(QZ{Jp z%84+wwY@TOWcnk34OCp}-eiT(Dbh7w!qoBxZ!p$E9hC9*xg)?it6)2{gX$Y)U`@G8 zO%#rqWOPSns{9mc4w31mo9V!ZFXvQE_?LSGWZ#WwSK8QqVpZam(CNMF&|0~Fbg5};DPmlGlfX9rYgSr?Z{{{*vYMD@0_P%RmUd$7 z-(bILrD`ozH=k0lVv+k4M-QGsQE#?Zh}H;>+6d0vTVzIk77Y2}B-bEO-qG@{c3DkN zwLP>-)2RY%R~mwOvzRTAbPNkj+nX(e&vl(t16YYMJH^URdxJedmbn$(aOXGN5QTD! zQOLgL5QUWq#v$U#rUy{@BW2>viF^hvqoMn!Pg%6R3! zZZ)xa3XAD!_e6FL2-H0SGN)@bTqZ+Hx*G41ZP!+}J~1t;VEkGZF~nh*j2fW6GSx5q zqH&=2vevY&-xhJnxk+w(J{G`yz3;ewQLE7SO5%9wPkj8caP`5tW}Y@aA>l4`P)M_K zl%3rl<^WkSi`4Y9m44bzlpx->(@_eTJG|lg0Qst}^hitH=F6NrzKqk?N?GJLoKgkZ zfP#M~vYpI$E}P;Txrro$bz^=vR$o|#4GFE5sSuu!zumdS*sZs7opH}y!tIE!0<%w_ z`yG;s{h9#3ApqmdoV!Rm}e$WO$1tg=twOF9toaNI9~5C5NP5D}ac=ODXP9Xwd;GH2(Be=u#3L(C;^U@V0v88Pas z(~W^VY;|E`!Fo8wy@v&onQgXxGN8JXda|VM;2@>QM-V3fBazvPgX=qbJn)*m=1Ql~ zGgieW1?7pq1?DsixQQy;$lpV|03V=-53Sp`3uW~;S`_b?PUc{UAlQcgLFm42=e@Bz zDonI>7o!9B4al#j`SxiDp^B-%y!?vunQ1N(*SJ{myH~-!l|EjLX}(MHvXG|HRi6z( zt1CrDRHGIQENM+Du9CO^BdS15nM;GV?>N7p4|j0nvddN4_B=I@;eNs`aIb6tc z6&DXFwZ4?#!;COeJ+X3h3tvEhS-Il1=VTO>RdtDek~G$i_b~C`f#zF}-U2SMNu3v= z^|Tb<2~-{i*C@Q_vz`pwiI@;U>gQ-7u?Ye_tu-OGOi4*f1i)$hG`TK6(V+zCa<9mJ zZX$caX^43qKg8SLtgqr()l#%5@FG*iK`lq)C9fh3wNaqCb>+Smogh$^1xnRZnSx1^ zOFI>I^2lkG%YO!O1|ag8UPg6nX^St7^5=O+L(T#|VmQPChz)LmmG{!A+tiJd4*?A0 zR8VO=vzl!}cU_3Pb2;|KeZVmqrJwnd_lyE#yPz-LoY>%>iU3Ph*Vx zW4LI3v~bq_z7U91nQP1tYr=ypL6J`6WVHhy3w@CLLAG@>Xq5Ne%DBeFLbWP5!^<2G z>kWZi2`;>on^gC|6Im*jpg@sW`5zXV^T^am*vRgq_GRMHC0_4dn^FfBj272A<3$x( zt@my=p$nl-En_Tdg(2&Fd0jBB?ue({891o+}3sxZF2*xgCHD>@r<1|Em5SmcO4DCh^uFjtZ>^Ks{lj=X)sjO0DIT;3AxS zPXRdkjsL?pw)gCvOen@Y76+Ug(@lZ>>N#TwLAH8I(Wp=1{{_I+BDjmQ3r~;!&6+=s zij<^~Kf7h9gN|qzDXzRZRD=I$#y5Cl}6 zntlymYf>>W>E1pdUXu#ltB~&h*=tVa&I>?Ig8}l?27|Vo1X$ zvLU0yBCINqM>mu{Y&iPqa+m73mvfNC*jgoor#_3o1YB6f3G**~yK^)varL8(wHpLX z!YfNfYHyHma$XsDu~)lpcfjFgR`&$5LB|9}^LAayY6?Ew@tkHYSoTQNk%`D-YNg+x z`(_fddf32gXom7%Go&K@f$gbc3VAXGV3C+BkHSUWitW_+VSv`-1|Wo+pyToEjccI{ zmQ%p>Zn!gR6T3i=bUvRbGx%fffA#43H6seHvTTf-#>}hVCH$rQ86MdZzbC|6$pr5@ ztQb++zlBKb0n4j&;Hf83xpzuPLoiyhO-_gUYTBWWs;?S_?;qx?{<~e**{*K#m+9b$ z!g7w(iB^8A00i>q*XqT7G$M1v_J|VZajQ`t+zF7V;%Zg;eu&)8yqd|z(dx2{=1a;> zfe&Q?(Uw=1t`du!=^Z$6-P!3rlq-f!!iiu7s6emeI;UC#ys({_YK9fl;KA>+=sj$u z2fMB?z}4-*z8Nce`F+KvR*V(mwFQ{^JCKKq1Y!&MGU-)5ca0^M&o;LdV=?^y!|Bv( z35;E@zO$UDNue#TTJr{XzQmwJFTK(?OUWTj&PAE&8S9`v_!Gd@9hPE34WGK(6z%{< z+sAgYfmdKTlf@X+L8Jcr2W$!i1(2$W=>v8cPzDz1Px7Y}xFrr4DMZ8F{TL8I zzy?rCX0jVFt?}@AfR)sTvaAiW9^(NlN;$ADCC)Wx`37GoGuE$O1dA9J#uatTmpyGxlH|^;njfZ^6<=Bei(`xyUpzw-)~ypCS4x-*Vr z&ajHvjqeOBg@QMz#MP`=aly|DgdfD9Pt&Leg_AW7kE%gQ7kR@w$&Zr4c-UIzKysnw zI#|M&+Jj}#2X=7p5AP?>Ns%1lV>Ax_aM1wB^4Vv^CfU9~?*l|hj#vuydANAo; z7O+BLGcgb4riuN$%JyNH2&+&#@D`fzO4Pyi#nEz@8>co|D;AD5&+VHJ!v0at06V zc*?NR%eS-EpYZ~VdtDR$s`ogO?x`Mc5glTro^Kur9rRyI3vajd$W8M2(i-6~(N!w3 z?FEEDO?$TzeNbU@D7@vqeIYxH`=No{SOX$MByZI0?czxB-Typo0tR~{OrXGl+iy{o O4!Cd{auR#e1u!1Q#aDd* literal 0 HcmV?d00001 diff --git a/tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-train-labels-idx1-ubyte b/tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-train-labels-idx1-ubyte new file mode 100644 index 00000000000..77deb0eed69 --- /dev/null +++ b/tensorflow_datasets/testing/test_data/fake_examples/emnist/emnist-test-train-labels-idx1-ubyte @@ -0,0 +1 @@ +11111111 \ No newline at end of file From e631bac5bd2c9c6372fb326eaa41ec5177229ed8 Mon Sep 17 00:00:00 2001 From: us Date: Mon, 11 Mar 2019 00:55:56 +0300 Subject: [PATCH 3/4] Fixed version issues --- tensorflow_datasets/image/mnist.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tensorflow_datasets/image/mnist.py b/tensorflow_datasets/image/mnist.py index 01377f2a552..fb8c4908cd4 100644 --- a/tensorflow_datasets/image/mnist.py +++ b/tensorflow_datasets/image/mnist.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function +import os import numpy as np import six.moves.urllib as urllib import tensorflow as tf @@ -242,7 +243,7 @@ def __init__(self, class_number, train_examples, test_examples, **kwargs): class EMNIST(MNIST): - VERSION = tfds.core.Version('1.0.0') + VERSION = tfds.core.Version('1.0.1') BUILDER_CONFIGS = [ EMNISTConfig( @@ -251,7 +252,7 @@ class EMNIST(MNIST): train_examples=697932, test_examples=116323, description="EMNIST ByClass: 814,255 characters. 62 unbalanced classes.", - version="0.1.1", + version="1.0.1", ), EMNISTConfig( name="bymerge", @@ -259,7 +260,7 @@ class EMNIST(MNIST): train_examples=697932, test_examples=116323, description="EMNIST ByMerge: 814,255 characters. 47 unbalanced classes.", - version="0.1.1", + version="1.0.1", ), EMNISTConfig( name="balanced", @@ -267,7 +268,7 @@ class EMNIST(MNIST): train_examples=112800, test_examples=18800, description="EMNIST Balanced: 131,600 characters. 47 balanced classes.", - version="0.1.1", + version="1.0.1", ), EMNISTConfig( name="letters", @@ -275,7 +276,7 @@ class EMNIST(MNIST): train_examples=88800, test_examples=14800, description="EMNIST Letters: 103,600 characters. 26 balanced classes.", - version="0.1.1", + version="1.0.1", ), EMNISTConfig( name="digits", @@ -283,7 +284,7 @@ class EMNIST(MNIST): train_examples=240000, test_examples=40000, description="EMNIST Digits: 280,000 characters. 10 balanced classes.", - version="0.1.1", + version="1.0.1", ), EMNISTConfig( name="mnist", @@ -291,7 +292,7 @@ class EMNIST(MNIST): train_examples=60000, test_examples=10000, description="EMNIST MNIST: 70,000 characters. 10 balanced classes.", - version="0.1.1", + version="1.0.1", ), EMNISTConfig( name="test", @@ -299,7 +300,7 @@ class EMNIST(MNIST): train_examples=10, test_examples=2, description="EMNIST test data config.", - version="0.1.1", + version="1.0.1", ), ] @@ -330,7 +331,7 @@ def _split_generators(self, dl_manager): "test_labels": 'emnist-{}-test-labels-idx1-ubyte'.format(self.builder_config.name), } dir_name = dl_manager.manual_dir - import os + return [ tfds.core.SplitGenerator( name=tfds.Split.TRAIN, From 7699041a9157d9df5c1407fb3f98f28515b8a443 Mon Sep 17 00:00:00 2001 From: us Date: Tue, 12 Mar 2019 01:42:35 +0300 Subject: [PATCH 4/4] Added statement to check downloaded dataset. --- tensorflow_datasets/image/mnist.py | 24 ++++++++++++++++++++++++ tensorflow_datasets/image/mnist_test.py | 4 ++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/tensorflow_datasets/image/mnist.py b/tensorflow_datasets/image/mnist.py index 2ef5c820575..9456be7a476 100644 --- a/tensorflow_datasets/image/mnist.py +++ b/tensorflow_datasets/image/mnist.py @@ -333,8 +333,32 @@ def _split_generators(self, dl_manager): "test_data": 'emnist-{}-test-images-idx3-ubyte'.format(self.builder_config.name), "test_labels": 'emnist-{}-test-labels-idx1-ubyte'.format(self.builder_config.name), } + dir_name = dl_manager.manual_dir + if not tf.io.gfile.exists(os.path.join(dir_name, filenames['train_data'])): + # The current tfds.core.download_manager is unable to extract multiple and nested files. + # We'll add soon! + msg = "You must download and extract the dataset files manually and place them in : " + msg += dl_manager.manual_dir + msg += """File tree must be like this :\n + . + ├── emnist + │   ├── emnist-byclass-train-images-idx3-ubyte + │   ├── emnist-byclass-train-labels-idx3-ubyte + │   ├── emnist-byclass-test-images-idx3-ubyte + │   ├── emnist-byclass-test-labels-idx3-ubyte + │   ├── emnist-bymerge-train-images-idx3-ubyte + │   ├── emnist-bymerge-train-labels-idx3-ubyte + │   ├── emnist-bymerge-test-images-idx3-ubyte + │   ├── emnist-bymerge-test-labels-idx3-ubyte + │   ├── ....... + │   ├── ..... + │   ├── ... + │ ├── + """ + raise FileNotFoundError(msg.replace(" ", "")) + return [ tfds.core.SplitGenerator( name=tfds.Split.TRAIN, diff --git a/tensorflow_datasets/image/mnist_test.py b/tensorflow_datasets/image/mnist_test.py index 921e6bdbbfd..c64e2268f1c 100644 --- a/tensorflow_datasets/image/mnist_test.py +++ b/tensorflow_datasets/image/mnist_test.py @@ -52,8 +52,8 @@ class KMNISTTest(MNISTTest): class EMNISTTest(MNISTTest): - DATASET_CLASS = mnist.EMNIST - BUILDER_CONFIG_NAMES_TO_TEST = ["test"] + DATASET_CLASS = mnist.EMNIST + BUILDER_CONFIG_NAMES_TO_TEST = ["test"] if __name__ == "__main__":