From 979ac971632f9713455110e2a36c765a748eb7cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Chollet?= Date: Thu, 11 Jun 2020 17:33:32 -0700 Subject: [PATCH] Add the ability to generate TF-format guides (#87) * Add the ability to generate TF-format guides * Add link conversion to tf notebooks * Use fully-qualified links --- .../training_with_built_in_methods_64_0.png | Bin 0 -> 76359 bytes .../customizing_what_happens_in_fit.ipynb | 41 +- guides/ipynb/functional_api.ipynb | 99 +- ...ew_layers_and_models_via_subclassing.ipynb | 63 +- guides/ipynb/sequential_model.ipynb | 84 +- guides/ipynb/serialization_and_saving.ipynb | 116 +- .../training_with_built_in_methods.ipynb | 249 ++- .../understanding_masking_and_padding.ipynb | 38 +- guides/ipynb/working_with_rnns.ipynb | 80 +- ...writing_a_training_loop_from_scratch.ipynb | 44 +- guides/ipynb/writing_your_own_callbacks.ipynb | 20 +- guides/md/customizing_what_happens_in_fit.md | 55 +- guides/md/functional_api.md | 57 +- ...g_new_layers_and_models_via_subclassing.md | 71 +- guides/md/sequential_model.md | 58 +- guides/md/serialization_and_saving.md | 112 +- guides/md/training_with_built_in_methods.md | 295 +-- .../md/understanding_masking_and_padding.md | 42 +- guides/md/working_with_rnns.md | 54 +- .../writing_a_training_loop_from_scratch.md | 104 +- guides/md/writing_your_own_callbacks.md | 292 ++- guides/training_with_built_in_methods.py | 9 +- guides/working_with_rnns.py | 2 +- scripts/autogen.py | 6 +- scripts/generate_tf_guides.py | 242 +++ scripts/layers_master.py | 5 + scripts/tutobooks.py | 2 + tf/custom_callback.ipynb | 630 ++++++ tf/custom_layers_and_models.ipynb | 1239 +++++++++++ tf/customizing_what_happens_in_fit.ipynb | 610 ++++++ tf/functional.ipynb | 1420 ++++++++++++ tf/masking_and_padding.ipynb | 618 ++++++ tf/rnn.ipynb | 918 ++++++++ tf/save_and_serialize.ipynb | 1330 ++++++++++++ tf/sequential_model.ipynb | 736 +++++++ tf/train_and_evaluate.ipynb | 1904 +++++++++++++++++ tf/transfer_learning.ipynb | 926 ++++++++ tf/writing_a_training_loop_from_scratch.ipynb | 852 ++++++++ 38 files changed, 12237 insertions(+), 1186 deletions(-) create mode 100644 guides/img/training_with_built_in_methods/training_with_built_in_methods_64_0.png create mode 100644 scripts/generate_tf_guides.py create mode 100644 tf/custom_callback.ipynb create mode 100644 tf/custom_layers_and_models.ipynb create mode 100644 tf/customizing_what_happens_in_fit.ipynb create mode 100644 tf/functional.ipynb create mode 100644 tf/masking_and_padding.ipynb create mode 100644 tf/rnn.ipynb create mode 100644 tf/save_and_serialize.ipynb create mode 100644 tf/sequential_model.ipynb create mode 100644 tf/train_and_evaluate.ipynb create mode 100644 tf/transfer_learning.ipynb create mode 100644 tf/writing_a_training_loop_from_scratch.ipynb diff --git a/guides/img/training_with_built_in_methods/training_with_built_in_methods_64_0.png b/guides/img/training_with_built_in_methods/training_with_built_in_methods_64_0.png new file mode 100644 index 0000000000000000000000000000000000000000..f6bd989a4bff6b6b458adeca0432b51a0e09b538 GIT binary patch literal 76359 zcmeFZg;$l^8aIkaBeDoVO1h=HK|xv?=}u|sW`RgaH%OPHbc-OpNa+Sik?w|XvXAHN zbH1_fxPQRCW2~_^!^K*>bI#{|;umj-;%i9^G(t2uI5-SxDRE^uI0St-xCdt_NZ?;= zO7}&<{~kChOTK_B8zkO@gA;|578g@>d$65`+&rdw+Reh?A4&la&+YU8>p9sn8U(J_v_8d>FX+0GsTeUcoV~NJ=h)E#aEg7SR{$sdmBsfBwV> zw#8h1#pYat&I!p)|M>GCTyWY74`eS{tIU1}a1BNA*Mp zDz@{DHSR|PJe1D@=VSj`NE`?BnUKfl%9c-)tqrlFEeN(>Pt-ZAr^>yWe%}0RGssat zl8P+IrE)WwSaJ6y!ZGO|Qa@e3?Tw{Ptr`**5SQg9hr-AetIXsbN9B2v$zOg@9n7ZC zkWOUQ1s}5MfAU5LOKfkuk{E^9c}xjO;P!r>yO!n;j)p{V5K-eWphF^ebDVF-dIV3W zjFI-75{?m4c^c}FC|mceeke9QWLqi=^C$hseU{4HNBHbTFQ{@V z_0Px1{D47U2lx8_;gADq!7=?zu9W=ev#>{1yjad&LjLD>=>)-dqYcFg#r$)4$oduL z>;CnpAw2MRmy_T69lA-(%bIriM{^`2>*c?g9Pw@D#`ZTlZ#i8Pa=lHe8szUA`=T#m zD!h^AnohuLpE+$=kXC)W+q5?^ws?0jrgjS()Yq4t6~|jG0|F z;W--yAt&q0pvPRuq+aFI7CGT_+CR*Gj}@+iFyYzy@~vfarly$hpaVy~RI6r=+fby# zvia`n+Z~4_sdsh5^Y~EErKR=}LbiW;wsK>u{yI~{U?0o;{>6nm*>9gEh;k)ajfiRX9i`uz&vW`9E zK}i9R`FpG@dueR1SV_^wF}uDWrE!=?xP!I!cZA>+KLQ^ZO7-0PI@t`aC10iBwOLOT zxmgg2%dktFNYN6*7MfN|+{wzys&r;0s@HxS@_J6BFgtHThTmCpBy8S1zgLfZR+Nae=R&+ zei+B+I_D5%IZ=|V+2~YAz-^t>u>T!>Uf|o!sdm-bf@dmN%T)8_?_#qb*%Ir=qY6Ai zo~NAP-PBp;?I7Opx`QsF;_pRZ|JU0wMdUQ$5vb{sGRIQ5ZBmj&e0^Zf>3_0_C=I-u zB(r{t&fR(0;1#>dVuM~-Y=4#YG`sf~Tkw5F)xYMOTw(97*)egS=r%ejX(AlT^d&H< zMOW;#Ts5Yu)05=#(W{F5@>ythHv`A;h8i-l!VI)mHEpcZ@E8EhU*{H<6*9S#j1RQ2^y%VV9C{0S=ZZEBR*CCwtTN9<@MR^V@ zU1k9ow^kj&e7BnlzU-d`){~6qX3HAZ301!63C&CJE%`m1)5RYPVyav8X@c&CS2FGJWnjwDW6k#icp z?X%@*e!kse%WhTqq#M=0k|ER@!GJ#WY%A5S#d1RXi<579r`kI7gu^6tfM;<|f`IBs zH2Oh=z+Q9J^xNXnenfm7gZsRZdF)mi_h_bx z|H;1VqIbc`=`$EuCo>kd-9J%I95XpBTm+zifTC5-s`6;mM;-?Sxwx z2G3mP_s(haj5KzFdXzS%^+bK2AD1pWGn)s+Us_}=6E_hA*bNK=zk?W3cYRAn*Ls4A z_B09kC4owli;Xj%Kl^g)L(DPTikw8*kXbjE-Es#1R@cqE#2JQX_u2YBVcWqgKDVx& zHgi9VRX|>2J`!Dket$SYQ>!Yu4vsh~xt#t3MpY*K)~6b)$&{Q3e%RNKdvH4KhKAQL z#xM0JaUwdnrZ-SnVx^DQCqYzIZfM}_^9dok z+3~L*NKMGK&Br6MjRA1@kaILjZqzx$@`wH+pFqG^Iwundx|LrpEem(BxAFPdlOVo8 z(XmjB;9W*>>>@D#Wv&y1h`OB8k=H7YCxc)AyA``#8?uMkGBokRtTXh92LYatVPD4G z^+sI&Quv1B@4|w{69@%JLM!Pj0pa7Lxze$5m)5in#F&mq?I6^WeXmzY%}#!gPbai( z{i(xI@vWF5*9QA#uZX#N=;;hio^`wm(0M-)0q(+~Pn2by_L^d!aO&@}sifDTCCGt@ zI#3&Ee}Fu>J*qa^xI*p0?zcN1o8`zhoV!qU&n3WYGx1iym;zoPH#@h>Gc; zCh9$5LkUGQZ3Af}-}_~V-xU4lm)f`}9Q*jM_^$WcG5Up-GKZnkzlF}`ayA^!k!^+S zPbv~<7WfSDJG*?vgmgR%T0CL&1#YQuw@c&vW(?_Oe6&R`&g>o>zkvSO6Rx4Q1_J%T9pn8m0))Kj<~ zS37miv2dZpbPxy!W;a!@V6de*&I8f(0CuFxOdesqq#)JLKJ4KqpeAgY-XW>wyb7;g zfPJgQ{Oi7WR_H~)Xx^P1D=LBGR})&pUx6M-E%VLi<`}o70>Yf`@tZ!}%&t^1^HKeF zWnVQ$zo6j(R3s+DeVt(bLUf%9f$b>O?}httt&O%o+_SS8D;5mD4_3jh{*JhGOa|e` zOor8!9e2$jRq{W)L!T3265k6tL=&5bAk$332Tr_4#d#FRu%GdP&yN3$x`{GqN+>u) zISsq}U^^mCZBETxw>jufB@%@rIJRv*c;%hp5%NCT9YTr#T~f+d$gnMU2$ZMOKroJN zg(*qLyDy=I8`Sk+Co6#4>6TBF=4Azp!gD@-cB?l1Fo;^IMUp`ub~f#>PNjH&;6H*h ziyt!?^Y8`Y?eN`ZK}KANJZ-QpI^AnUmpKQ?np`NKU70WEtZ+5ug?SiLt>PA;FU-*M zE3PEM=F8rEuZGS1)JK(wt!4E_O{3CiG?+wcauh?fRdhqLKP$2gS#h)9 zdO>E`*W75e;{7M+4NzAZkKA~o1_h7gcJK3(#qv_Ds=;tJI!R#(DFS&3y}(12F23YLoAPN!Da4eaenvB z#`f`ll5#c#h4QF%2DeSmG2XS@PX=uG9p*Hu;YG~-zKS`POH(%!;Qkm)zhX+T4?jev zi?ho*%=5}1F@FtD6-D{Ls&F`4BJ9)db7V1u1R**pg``63hP8wg7dDmD@=1Qz-4glI zeS^IK#;HJjz0WZGrefnSYmOiL+(hoKtF*MpS8&k(G}0a6a7N$AwiL<^A5SRJvtvGY z#1uLnZYGt_xN`- zyV~Y;#Qy9Cf0}2O(`MXu+PR{ZGG(#v!vm?G(vXd6+HI6oc}VGrt>Xyl{^vF)yM{Hq zhk*z>7K1`(I;M>VZSXwp*tjVBoqc`VDi6qtAH$m6j~*$D-QDc@PT~%IZYRAdZ}y8vy!|B!S~v|E z;>R6gpvwr)*X-DZ$9=8{pGjsh4Z2M=K4T`XC58a9Mii~9PJDffCb0mdLTBV%-ews(zJsNU%i#dT{ zs^%Pk=Xtek{?~i~HNj~mU4MxDnDa}^Ws$$%^MEspGWH=bNo8pgL`T83c^dA7E$!1u z$GS)PJqUfg)spk~J|d@jmdCYhq8H(2km(odbJV?<_tfxd<*63s4%Po75o0{$>4g>ssof*7hBD=LT@98ye4k15N} z$2RG-I$1%9h9mjh72Dj89?PoP60IhpnpWQYXQp=3$Gh_0CsD%A%+4GR9l zZX=_KzCd*(=y>s);(l`ablFNW2d0sO10RpQuV=*5f-(Y;%6)>MU^<%QFz#GmM@qkqj(rY-8Yg6q@Fm`tr#sq#m@Yld z2mVEz28e(1owzt$SvPmFaU*#Q)scqPEE%4m3B*&s(Z}Eb9sRd)1Yg(tUQyF4S=aZn zUX7x+aUvs1jNd#M^o>sYm%g~qIdms)l{F2Ly?&t+`s+sSY3FFBa%*87yB^x0*HDM=P5C?w?&$(d+01c|B2(AqE zGFh!>Pi>W0-$=mKb%hgo?#kUcOjnxDv_zpL9On?I*sh3G_}|5{;y)8`v4)N8dlE5g z+WX=ZKjqqcZP|*0O$M!`P#k2B)Ij-+y?{x)hg2V)(5Sdhe~q&b6YR0jkVdqOuYi82 zrvsB*&@^b<2T*B0EiqTbu@8a1Kc_N5qp9&IWclG*B;DtP@}ciUqvet;P@VaRe7WT8 zb?4Cng)kcTq$BOZ?GRQ1_qXZ}dy8*S@7vro7#lwfih{ zRwhF zQR9jT)X_Mb7m(IHQB;5YY|~x&UoLS191BXx2t`{itKJ28{&IYMf(8#J5v7%i_ryYs z6Mmf3LYga2etHYX(;daZc2E{orXh;1Z+dEXW@;R%hHY!Rl}5H28c71vtf~71m0+Y! z0%%tlj5-2Hb4*#wj26FEB1QD7O32_WKB4t*X8x3jvq|DgDC$AiVX}ujXH0}2xY+;M zKCdl4oh2}4D}g0)K3P7%J^T3;sfxwe=V&p%HyUf@s*!Lm5(&(@MNoY#zT3k-7M}Q8 z9NZ6&Ha=kD*Wwds@yi=9l}b<$cy+I!BiEhmHjU-aw=PX55%3s+!wN_TUQSD6W|N-nbv6?{DiFYv$zLzzoLs*m26YN`%!&TLD`N^of>2&3JO6 z{y*LlT6pR2DA>zR{0nRW5EdxpQdRF)8_99Z2J#Zz4|P-H9kf&)R>Zvh}t$aIzaR%uWY@|Upc@@018L6JKz%Jq~KqdJ8Q;gs)B;DeH8 zk{ySH-i*eQe+YT{Dz5}>v)^FzM}@JZFXH7xm~$4RzW^*Ap)-CK=S;MQ$&;lf^9=+6*Y+<{+wkSoy@DbDJ;l1}E# z@w>UDa*B!XsGsAcDPRx9==~IY8|EToN1qr(Jr^yV-?`{(Kjoi{6Za9$1Wm))>-J*f zRb^!1HhJ={lW>$YjeLs9?QwxfVY9=CH0>Ja5Lq&Ep)YyR($`3XG`Eah00tmg8rjgt z^~YM;#JLx-94&dWza68y(G&e}+%=oiZ~U!O5FDPkOs4%axo!GjQD!#%rLAU=o{A#N znsKWI9F81TBfUy-IFu#dwF(?|S3O$``x%Tax}q(0>sH`%b2&4Z%5d6Co1v;d4DMas z(Tu?ecd2e4V`42t+nvPoF)-*>qD9C&;CLqZIU%2>Te8R=ip`$&D>IC8niQ zJ#837!dV-F;5F)4acIBbd*^NAI&GS0nMP-ROnlp>fwP;+=a@es1a|5*z+7rxm*^9J z%K7B+yHJakS=%FhleEI?N9|$ExS8R^{;!GEm)`@^%~<%n#n9WWHVJ>kR|bis?wd8K%#$PP+2&;zp`s)f2W{Go*mOAK zmR$38#$Jjj*?#=oulN$dEd{HXZ;{_U_^r6_IS#|AEX>^7DyIrCNe7+od^E;$67-5a zmuXXc^NwJw2o0iNSLXn=8{I{xJ~GkL)<`x7QwzWTw3B0Ex=FGmcM9=z`8%?g((0n| zmSidvGzKl~3WY{3^_aCEg~gv7<$Z*7ye&G{?NuG^^rkA0F$jqgWrpF)y;T1CyKjU+ zUy)XTUJH2*@!dd# zuj&{6SJV-q6YdYaq`fdC=VIq}cSui+gnmwlxkE*^scXY0#CX3qJV{Wre7LH$hW>k| z&TdZ>y3AzYk57+~Z{7LDOYDl($`2mt2&b3?Ig~sOG%TNuP`$83nKQUnv%RqT=+eTr zzi$fSJ=e(Gs4^d#aJ)92cF*8A-ve7=OH^U>uEW1aD3OEVZ~Y>!?~pT_}u42=w~)zSySED)iJfpF)Fe zqLX+pg&Iz0tZEoYle-!88E%Bsf=`szLlR2@!qph`x9tL#iId-I`NVbYe>j!Adg}S z)sU>8ud$t<39kH3#znYW;UL#x)M4}RO(Gb43OCEN2@V6oXN^(=Lbi!#M?Xb2lo-~{ zyEv|N`O*5XP;@?57P#Dqx1?yOJ~?~nVlNQ;`4oxS)Tkj_-oVTTz5lJ@yYPk(xVKcZ z&_>_3oktSvLdgw|8*k-slN556JpK2c_#fV!uSC9zW7dky^gh=kXSHWWv=X0j%6(}j ztpI1qbKr}=>+!so%I~f{5A8#;+_H{y9@c(~3Kow7ckx&r-d^Xon;pCL+F9E^_gj!5kG^y3rE6OAq07+Jc+tDkgb^j`WKdji5r#p?li* z?&bv3slx5YQ}Q15_x|T3N>9Ys2Gs%3TjXm^wk}iF$!N0XVK;RO5_bzkmjrRET!yu@ z=TJe|YRbMH!&$Jxhz+v4G|DVsLQnZZ>izP=g8<0ZTAa#jwitg+X-E&G^3WC>2Q%ElmOI13w|msD`5Q#BgG2q! z(n5!(=ER6OD&8t~qAvt2d<+Uqk(@fCG)*TQf2D_#Ecra1%dn%;v~OgPQa>4)#C2<8 zAlS-#hRv-lDFla>5Vb-ynU+Q$sVDP!Cv~}=L(qyvCyn2d{qXTh@RGgn&6fTN=7I8F zEC8I zIigH_)D@vyhG39(=2zCXp?*CJxvRp$(Xx9Y@-J#CO4J(FZLzR?`Tg=;{2RR=OXn#5 z0`FF(s!V!gVg@qU6?S2XFZu+}pNXQbuq;>ZeOczaFI(cT{9uVa!><-A-11FH5U{Q~ zMW(>SE@U=W@9+w~_-z;2m&}-&SQNrwyVBE8IU&08DY1(6jPvIXZ|xPS!+oNBS?y5N zPg6>}Ww{cuKjhc?(q?a(YL1z9=2d}-Q{|J&qCB^jYf6%8XNVx+)l_nG_i@IP;cTdxR zD%R9X`QXSgrA~jn;s&u-K+NIzFy4Kn6r)V_ek(Voa&*1&ozK^50aitI;Rj1{T<>QS z@;DyWa4&zH@G)y0G`?>*h>H9yN7!A2Z*~udha!;RV`V_=+EH7 zBFbF~!f4%VC9WHG}|#yi)QcE*Q({ImTo+>?6#SYgEdfVKF@dAM?NVJ7Y+3XxG{5?vNhgn1%#! z&bckPwD8Kn>5^F8USCAesc_0ur1MoilWtY_#*?DlWU-*MHNSnkv610BVJ+!G%qrnR zm8*-J^<#}%cxSK``tYS`?BQd7=Q)q9(gg;e$7HRlqYo6Z)jL&3Q9YM*Y_;GU%mlE* z@Z*{GBRV|2FAmoR?tT27aIj1z7*Su{5s!1;Ui!2Def<|09)9`GM7m{f+ zyecV)v>tbfk}Uk>=Ac}~A6o+zvJ^w5pXNvk9xfi;G*)5*>e|%t!SNYjvc&rYjn=ZK zi4LsyDIMI1m6$hGhTg1KOhyPvb?^!)Axf@f&q5gL%!s@vE?BO2owc=lon;diLzVI|?_2@Pu?i zGLXT|wx{-eX3+CZR5IQSIu?w8^gMIb%{LMJzv-M$Vx9KmTU5&;5!kUIfnOVYmgk?; zw(8-o@wfQ50xtDzcQa?@vbGut|JN2&_zP`Qf*&%Hu)0c@IJ zBN)ZNR$JBfiQ{BAD04GQw|H@BOtVuCgqWemNoDQgB%xMz?qFI|VUMV>TU_|~lcukGt6_I{K?j)UC z=wMgi5Wd*(LwI`bxeYKxZu{ttJ+dp$@WY@Cb@vRF3+06&mX&P~)skN$M4n!rY!x)U z2wo)L6d58rvCiLw}wxggtXI6$h#in!Y{=eAC!^r9^t>?pk!9dH|(vKUQNun{lj69rEq-6(-^61i1|ZZn9f+`x)tkNv%Qm)4PoeWU=qFp?*5sVj zbxrf3HNZj^Lxk3638HFe_P3RhU+UG{XK7W79URqtyOD458&9kkPXSY}VjwXJbNPBJ zCNB5+ONvD7f&&9nZi`_uU4i#)J9Uc-x@ZV_B(8Hxfi%};ftiwPVPQd}_u7fPAUUsIu@lZ>Y08l&inX-Qk-s5J z$a()(k7xl@D7Ek|@2+mwOzJ&ecIg?cqQrX~aSiX-F`hfz zuC}Sr@56Qa$jGIj6H(h=s}C)k-&;lBP)d6vJgwSMC%Lu~0wqBR;Ki%={? zOZNmrQE)@UXsI?2@?slZD1z9;op#`_CVn=3@$+F`!P|ohy^6OERGu$kc>qbD#|re* z=J7nWz7Yv=6EzIWWpst8pjazW(bad+#)vS_X`nrQQ({PrdR@AWMJt9tLV~{7rz1n$ z-F__*y%wC^iB{Bx*|aZ3j(u4(#+U^s7&U|z(9vQ#qF zFM7PnJMd0$O(_f#Y`V%K#vVz>&&iClEHX_o@p=Y%nKO_n@HaAhECPZn9; z50j(x?m-G&JJerz7QN57WD}XkpDQ!IDO0Tr;N3H8 ztHy4QjyBw$sz9Vt64sUWGjxuR)z3l9;50&Soku!ASXLKrurz4=9m$4><7|R~BPLPs zJXBH0=Vbg%RwA4$zkSUFBes_ohgTas0^Tcw@Vk|s4rm(v_1)fV-Iq4o$Dt@MvbtZi z*N2eBitg= z9yxng@khg%{{7Zb(^yvnc<;IW^3ZQjMQgRR>3lV?*SvEN^O`gCe@(AMX3^=gj>sc0 zV=a(=YcOcMT2_tZ^PKb>a$r$xb~kSA@NRMOQN)#TL8ByS>Hir-TDJLOhbt-UF%h@i zLyh)%B*X(r$S|QueFQur)TCl$NXpa)@a+~b+lo^~Q@A;^C@mtIG0j&y*8;s|0aw+*uR*^r*4dBCR?5R`>zgb?}K-_MYrQ zfrKxW+{3;|Xdj%h;uwZ+bCx7fGIY1GgD(mveq#EbALKj^g~Qd*CR@Nj!{zn9JpSQV zJ*6f(%uOdn71(r(g^`DFc>@$oFHSH!;o0@F$bQWN$j6N8Yhikh9oxV-FN2oSs&ACi zqXf-~kq!Yt7bWX!{INw)Q}Tfv;nm2r=YJFPZf10fyGV#*uRhTud=rma{2f>SaU)G& z;`ZijaqDbMduZqneGB;@N^X+w#ExoIVH)dRE{&LsxaWx5BmcPx;9GNWb(@z#TtY^@ zx`19G)sA|r?u9xaaD#eut3(n*voiW6*lZce1|2+^eO=Zacohval)`WR$+j|?zYs2s6agAJoY*KCN^3Q4r(JD`$oWlj3RPEh zW62VsPM3Yewg3utnE#TOlzC`eRJl$Y>5mCz8tpqbf@RZ>{&dH`7F)=&di+@7WmA>o zcoy7V2AzCsy>Wz&4+51T*Jq;2^ffI5uJ2#yO*yRt547!NWJE*r)^8zc+U6b8X@z5$o~s54`(12FTa!vB{Rp%dbVj#=rbi5Q}rRI@Cqo~{$N zI5617cgSG%hc3T)$8>v3?$?Sinel;l(x-U#7N~{Bw5#a#qih>p_ZDdAmVJ^cuq4rv z`^sYxWbwCWbi7Z-p>^Le&|pRA%=KjZ9!W6X2;@3u6S?yK_L2abKWalb1O~BnsAqlr z519kiohu$7)UuuDrxa@*ml4L90P0WE7ZSf`r+0ZM$)YAK}y>d!#A+}xEjaRe>1O@SegB? z;fkkQU;?5d*qp8^&}uQ+WAhGsae~9?@2q~uYqna%i6bBLkJ$Vvw{`E{C&6Mcr4n4=0=Gg6nvJ7_Ia$!JEK zQZt=$@-OrOM~urTI7LLAmUFL}Q@b#7b>6P=if|yh1#iz+=&iJBt&`I!>6k-nGDQh(w5=~q=#;FX zMhJzOJ6zg(BEp9~lv<`7Tkkg)8FYOvH)(xcFOl8jVH9dpQWqIshtgeG<^w5!b}WgC z5dMqTx6IT<;HpL3l``)7sHDGtPwT4_i67T#7Ab0$tr+h19KTnlqfT(?^8Hs(V2fo& zu`AFdcVKd)^#`0=1Govgy_#5HKfrP%in{dWWs^}nD?8>~@lP`n)pyZi>?g+4h~ZfI zgES5rfbExY>=Q;LM-s1UeSms^o2^_ zkL{kIDbk4YIRR*U^D9ah5@V#-a;k;o_%*U)5V~6aIhWKxc)uh{+y;|kBbg8S#3{g< z+*leiu>LUr)()7L?Nt?qute15QC>%jIQ>$5*oUUkK2;)!!pZHhEdFqk&`S5g8D7#p z#jhEBeXlIJsC2?6u_+}tK=_AqZFO`qmes%DAQRhEE>>qc0?_{3eN`RwQITsqEZ^08 z4j;eliC#H6Lf(6fm=Cw1nq(;CHB?5W2G7J01!xrcFH~L{6`-F4dvkc6Zf9zbD(tzd{DS`SCwh;8E|tXFAb!5joxa7)l(MZb}lPER}k)RJCG zMY?3#G&!RXvl#a%U2NZLW9Fzt2q_}Lqo2!RALXK`{VyejO7!*@6lm?H4s!U-V%=Fq z==e*nB3z#Va(Gr)MI=fhEAxAPmfEMOtLE;}*8&5?-p4<*5R+a0OAQdvANC$nK1RS} z`>A0yMpkw39nsA#6wD%01nGUggb64B67h~O*$TEtaFK^Updw=yjL3BwbbpsE>L5A8iX<{0U1=T0DVs$^=4)rPi?|MX;O3W83KsKLr|C}o_=_RGwFJDi!;$KahJnQj`;_{byO3<;8SV-D~aIUQ|mt=kPmQ>=$ zN^qdu%vDa52w6Y`sfh4SQnR8PJW*U$ORx$MV|_M&KiQh(0fWRq{daku`LVCIE7kQ+ zzWMq!DJw#)9+%*;$6MdzM3T5nWy#=gc9o2j!#xorNMRM0dqXRiWB}wbD=Fx+KMgQU z0-nzDd%kbN)+xke(lRM7B1R_=tRwCYP{VXVGTa%hvs+~FDu20t68~ky@5!IGWQkl6 zEI3Wh-|%~n;6mm_IoxSt=54^t#UA6TyR*qND;pxtNLKdXuPnME<=0TIQ?_nm;|YCV z@7d-VNt0qZ{2q{`YAU*RUjaqInG`QMfXe7lTCZ^tX+HOys|+IAYS{O_V6dgrfm|Wk zzMNwwU$*k!{6C)2nz$yNuefyGxi@Hv zyw-_90n7|W=+IGr_b8s#~F2)j2)e@J|KB^Cr*)&2A@#E4VwJgqybaL->vlSVk)**=D-`PJ@iP$r5&Xh#^U)x5GFE-MRJ$&hDS7Jz z%VM$R>%Cy>OF{uvlps8i45Ue)?MgkvD67{&AtRCpDl~ZjAHz&x)T!oQ^uEe)b0e3@ zz_I@VCJDV0cq{GU^`AT+&|Iv{LuyNfPg~UgWZj`-0EUpz(A0o`JP)dH zxOf(6&t)B!e>_YjaG1)fbHpG2$msqwpnr7YmH_*gp26Ah_)nDl_xqtTfd0r{UN`s? z!2j!Zk-qmLjimEV%Kp9lzkg9t^`7KZ$Nk90&OFTfT+jFP zAD1+OqIeNgO*?q<=RWw~D8-Ej`ytUe(DvUe`OnWW_l!=ikV9GGKkY~6=X=)=gYDy2 z|FoniPOv1Zoe;;be=qaDwnTy&?8pCq#Q#3-|1Tc#=buv2Guih+4PA!RAHo<1jyFdK z?7rQsKdv_$AgsE*+)8Dto%&wdegV|~{ebb9=lA*Bt_Q8h`ZZb*=?Km9ru)f1C6g#rz{%Qy@_aeC-~kx2<<_s z1|T-4%?zjU9S}C917~2B2MTA&_~Em1uIT)B~rLM%UJVf7XF)IA5MB>wj^_PZ{*jslnL>e`*DM&_mbYs1FqYGKh{GdhnlYB$XM3P+oyBX=&H? z3P)LK67ap+1Aq1FnFz|h#t^i>NN7=mRGV5hsfJ(kC71LmzAXzVy(!bNjR;Ke|-6N`<5=V<66wY<3C%kiE7<)v0j#{ z6o7;wH(91zwa|_P-t46=7kihe=gz2KZUDUifGC5Vp^j1~MAU#^qv_ryXf|G?whqja zMM*Ud;1_p%hUGf zR1ncxMU!zOdJ%XoV)`{6n?c^-)_cE%dYKM^Q^@^RffpaPAK6Vm$Iz-9ekzb3bU;P- z7QPO6pUvY371}?({~bj|_$lJcw&iotuWtG%)E z0KP!G^ae(>ba&wILYpxspGZ+4y_Y)OlIjC9m_#WfDb1SC9uU}TR4>jF2ZLiKgzu?* zv->TK_wKyM@!lSK9Ciuxo$42QVtUI}mwQ!}GEk65qNEXgV|5(|xPDxJH~At#E?Sq9 zZLoeH+6k?|Xl0JkXf@>iU+46W#vTC2aYx5|uHe?7z{*^cjTUu$3Vy!B7Okbk`@4>R z#)XfwVOW!XM?wW*0=IPbTHnIo@$#?6zXoCQ-uFSh*WVxhuViR(--a^jp8~7L ze^uE(KS$Z$*WzpR*0nzZkN<4~#AU%(TU=DSVoM1=$km$zDfm$(aRma78_2B#AotFN z0ReE`Cdjs%V}-GNTP0QfhiiTFR)!*fSzqtvqa^%JtY1CPxPkSp_&$<~<^WC94>%(0Eb&xu1+r-pAHvQpGXA%0sk6p>7U#Um@X$_<@KJ5l@ zr)D5U+XeaUaxjRTStwR0Lu{Ca~l~U+_aH za7POOYc0%*;oJ3L*l4EML*&IFecv0dFD-{(FMvR@uP&_hiqvagOmgAbAW+9Y{9<0@ z1tK{Oz{!gjXW#Bh2kz~`LO^14mev`DFL3X3JoO$DIy3p55370jn78$!o%D9;5efg; zG(b|TF_l(xt5dnG5+>}Lc4{L`-VP7(GHX@S<>@BH)`y>guN45mzbimV`&mc7jI0Bg z#|a3_zA&7WfCHq%gg=ywq(oagOnPpviHC4|^2K-4^RS!Lx8&3-mJG)mC{kgTy)o2$ zj-fI%3~||r5OY;Vz$juyu!U_ z^j^2A)@3T;4grC9cx9wyH^?W0aI>hD{c>};r9#Aa5y!*D6 zpxqBZc5Ch_gNKegHIszyT$WY9R3UTa14hdJ`}H-k1ED)dHko60^^K`OmqT+n&%)Gm zjq+9lTM8#rZTK($mLshq(Bb4s635HE$3iF9&dP{qe3N1vDWqN+>afd_{fP}^HFles zYPNzjmy~J1A`FZbsto*PHMLN<=ulwN+{n9poJ~P3BC8Q-rGH?jrV^@k`h$F zDRsW+rTIy84scR#Fc@gLt>nF`C0UcP7D$-_LN7G2YpokM^MAiHiPtrn zHoCX`ybuN3QB72l%Vsoh*yyyGyI72~qB==n zx|&WSHVW=f7n(iZnVoC~?#;I~{!i)dAwet-1sfj2?+6@j`vx2)JrE)N3ZuX-7O{C? zO{so-=CV6CeXl=W2SNodP{~|YQm)rneAy(Qx#tR)v56SqBei@7^&hh36&q@<_S2;ct z_NuSMJ<`|i{Ezhcy=WZ!k5L7Y9hfN43;j6Jb^0N}Kr6=l{X~u(tE_z)uVN`wH#;t* zJmE!;x?$2BxKU@U0eLQO$;`a!A#ArGSeJen5=Nm(3Tqb#t$Os*k4^u(qEV_2$|}*3 z1n#pzz(uVG_k~?{m1)-vx&!Euq~iOqqn9}lA>&KpLL#;i_>2mF3ZBh5@=GW^su!+V z_FoD1@5Exn_-Yp`&+P!gLA+_(Q}eNE>ho7%x)dYE!9Cvyjgg#f-8%)=X8OcRXIkStP1O43xN*K-U3zHby+qOioKM>=+a@#Rdv>rZm+){ zYM>XVv+$GH;lZTGG@M-Fmk2M4P=+oq=`0OB+G;Q9gtG8rYG^jL#Z-CLy%HzfIjwE! z(BGL!2Qs^wY@W`>Gw&4pDgd9nO{$zLjnc~`x)9m)@^~|)6!}2fpn;+d)(`YmC3qNs zgc{~PC9U6YZ63qxQ2xvAkL#ccK}U3U{yoNqDY);~09^GS5prt=!rr}?#adpHf~)Ql*4yh zN|$SmKMi9hE<$!fNI6Z92I%ruK;PM@wrN7a$qPiD{UUc43eJX(KlI->&I(~J;)D_vGrdJ#P-QgB?5ndq{nCbzwHPse z@$;v$>y#KJVYoqJh+?iR4XfCW;PDHD6;fz#V``YoR>n)U-)ux=4*mP7D2g)h4)G!7 zScq?|=|Vy^L#c75xz-r9r+xe*f;R#vYM;_4k{@OLMSGoq%4kou{2rDlE@;LagRras zM*#OdE?++N?L@8|9`WYo0i*s%O<3dY-R<6;Q!sGXv#uz{I2?(oz ziQlrOzP3Q8cEcs-hhDCiku=o?uztQv=XxUp|3qvFNK?4&6%Lz-0yTAX;#iUd*8Rr< zl40QC1(x@nTcPwXD-`~ZMu-nRDV1e0?}_=t5dHen!K5c}z$mUIv5s~5}SDjRsp@p+gvtMbOG5_cSFrRX$mz>gak%)bn~eG~WDXw_l14gAWZFMjn)X zZ(=tC_S6)??_KQGH!8vvRM<|>zpToAvY~-LL~pPxDK(!k(K_1AVtP@t* zXnnN$U@tMZP?^1IgyQjef++bg!&By>2SqjO> z5=?x7D>t^3iYAQmL6Gx})IeIqcrjJxE#X-FsQ! zK}LKrZ5(+RuZtKnSjU~7#F?r=b(Ip($N`=gDA@CqiBf+}wnerAhR@dLL?vGyfbY== zpY^Il=eSa%N7a(1OzN@TF7J%Li|QY$|tN<9`-MSBdlayV+DtHc327!?2Myb}%UznU>GwVBW7T81xTy zeVN&#H70qDq)T4Pp9%f3!K4RdTnB;FT(Cl}byv_@`No$LiomuVNrWGxy4&6{V@JoM6vWFhfjU zsshE037nWk$EJEx=&@|lo8jDp>dNsIDsc=tcPM3=OqaWAkP6>%SpAjikUzO}(i1Cv zTJ-TpRgB9Y5$}h0uo1m>YVGDUOwDAHoB?gW@3f9)vQ=6$_NS=Idg3D?uhPYK33g!? zAj%qBBo39^^+&YfhO7lw3p=1(Q>qM5N`a|(O6kw-9Bq$2KwO+-X5x2sPOJ>|p_KeI zBSz-^z1hcF58yC+$|KWVSr)$S(c$UX%EKK|DkIuUJCgJ^6CZURvcS4D&Y7hswSE3y3efv<>sT(9CeR`<DJEnn|i&n#Tm3_f~`lg-oXppsENx z2?!$^>|ua_L*lgG+nJBxBOw2SrN|&X93W%Xtyds=PC*hTmlW|!r<)BdB&#EU!-DKD zwYHwU2y9Ow(8p)SCs`K(86@GvdS2Kwx%7wFSO||_I4+A&Hp>vMPi&S+cY{WCNQOag z0^o;~^!}s3|8MN|`)A(JQ+AXeZHHmPCOXT~da@&+$4pff1}(Q>`&ul&0&ti9uDN~U zn@8pr0TsCVnGeh2f`w>F>DjAQ83mVpSO9&E9n7;3L$cO7eof$Hei$4?X1E=hc0)KL z>CD=n9KqiifOd%G0O+^AiX-8*fXz~hzgU969t@EiXgf$**vt(^NoXWmu<4 z5gcDvxWNNX1~Dnv6LkweR;1isIy|omc$ImT1dc*7SOMtWw$pSBY^^&1n7_WyfD-j)j7>P>SPhbAM+C)yHjt5UR%a03Pz0UW`ZaYrl*`<};L3`C4 zB`ScWlE#aQfYy5aW2j($fPYKKB5goj>Y8B;IgAmVyMZ{V;KvDnt?4~wnZn`Ko(MZo%LWL z_QNg1mpwY2HkI?SG*%TX7L)r%IzuY{SO�p=(?Bx5^gFy|yQF!>ADq(x9!QgpPvb z7qtg*3L>b-Q~BZWFpK2}3ZwEg-==fX&PSc1ZVB@*dbD7oG4K8R2cn+6!tXXH_5<;yZFz0HCwPBl(FF&84T|L-f8`m*NM|@D=dbxs^v%SeW=g9 zX#lX`?ed?(@+$hVQVE|N$=y(A9aEPogxpoi2AIKZ%N3hbjFg}&d{I*4h*xi;+7=wB z>wC3-au*YNARvDU_;RbuvVE7mVlkFRS^b*&;ZRll6uHD%3J=yy{V%xVB} zB&Ur!B?_3Mzz2&5)yAeh-=@eG%9vsHV2oY;N6<~NA&H-)EE7DFS#+M!kOc08jb1?V zbfV^*;%m@|Nt<*gR<)}ROQh+Mr~J`Iv?qY&)h`piEuUjSyiQZAO#7{U2oQJ=1^qUXHKR2z*X;a3z!1?) zAv?McTM=_YmW=Qn5b{>4dxaRS$6O{jvyVm;03A1i$Ti3)w2oWiQmlJXuD~%m2a?j><=ss*5>=d;c!i2gAh)1v8WP0ZxO_;8TlIM}xrOwFJ<43qUIALprVy zj`x}4gbyD{+UA*4^d!%~=zq}q*@F+tp?(*B8sUCXn!~6_!vIMv{o?1Kte=hmjf!fu zXL?J*XtmdKrI*Z#IVOT7NM(1cQi10K)(X)n+b>}XIFP1^G2P(KSe)z{nK0Hkq-J~@ zBuqo@819{*^Vx&Q4v0U!W; zlwIBWtMA7kw!=yIf4C~ z$85o&jA-AaDO)L!6kOs_F^bd6h7%O&QKATRzo5bwIW0PaOJtp+{Mf^EFn1K3K_#0J zAbC$i7|22_4};%K@KTHo1B$^OJy@cexwtaOJ69w~{fXN<^!Jd>P5`}@>5O`;^?Ga> z3!9c7&y$#wGF&~F<*cl48au& z590(b8W(`N&uzd;hrigAL;gSjs&CWwd=7|)goZ;tJ&!|in>K#pio#H<0P*Z=I<;`- zy8^hptG!=3L#YA;*^o|id2>Da#|@e=Dkzo|Drtr!?r4Wy!1bO+j}!i;#SkLGG80>A zrt+bIK6e9oyVhpv*ue(bePi? zea*O(U0wZo6K}5@bX8bN{wo5(aL3dplFuX>vkNc= zxaZ=v8hKZbOAnN8gu-koHJVV(Ai5ts?uBoEPoP#4!{e9)@8{MEzMi?g@4=gSUZY#SlG5x%KUlqQJ^-57%w9dnKvP1be zDx?q&feaVf&3gUaUZ%z)7oD`%MNunoHFH2t^=iS{Xw{l>b&D(f;}OV=EY{#`d^~Xa zCXZCJT(8(gRM|+&(+Jw5yPn#PQ5XVc*tM+G0`PZzH%olG(wwMX`t&8v4#GJbRhyktrIW)D!^%8reL)N-zKDhyO-7)de?FPZ6IlBo^K5fV<5!~ z|KS&;bOuzbTx69NcH^KVszwF_HrmR8zDIkjLl75JK)*WXh`(h`^T{WBz%{>3)W^67rHIuJtl)oQ|tASYsm!Eko2R^a>6avovU4gvc*zX>= z0EI!=)%Ay)9Bj?F7?81HM%e?4rlh7uFLW|U@(l}pyo=N$CU)hDU0@orJ2sa^dtP`&^Pm4?a%%s=whhihdpJ}>{V6D)xCF`KnOGlq4gg-S#*6Sq`5lMFj z#lt6cWH*<*?^QuKaOUD_>79{0ZOVf+DW!Kz7ImPi@1BS8FY zKPKR5GTMOSvp)MZV~^*k9?TSGn-82iWxC^NgH!h04BQh)mG9Vv*bwojFDLFz?@AQbqb>8|oV+lO_nTRqFD8}22+F&GhBxz zvgv+@O}d4KPK;{c&CnjWRbQU)tWZ4+pK~ui#;q;^t$$-5W4Ez53Ov#4MYvs(ff|?z z4VsaIB?Gu2%!@CLW|-j#55DOedTt~We)=6o>c{4oP9W^pvd?6s2jP$;bf9|(t20p? zqdVF>=vU0cy{n=E$m|aLB?T}cm?1zz{5+Q{z=0ndv9ck6fEJE%70vcyHEf$S67jto zk(K1pYRD2R(!75zemcA9+idAmG`ByE>?z7w>_cITQal^Us)xD8g;@16DesbsDxF0m zJs?&!8+X?SQ}6DyIHuvn0!83T(%7g066lFWDm0%7Aj`)rnyGru5oMeIm`@_b9t6%A zBL<$c@ra}2_1wijld_1=p^=-q2iHKGD}CUeY#A3)h^&`!dlZR8PTRSF@LmDXGmc?z zTS66ydfJg+#r=c`FTzC;7JLj#j)IC55dmrXSUCb|BUX#34SV%<1OFchse!wYD{N4g&<0Q!O*@#{wD)Pq&AwD|zargE4 zm`V@v_jtU5F?bt9-Oq{EIzd&Ta%;?R8AS*w**l78;jA5jRA{zN^RLw5^s>5{nM*!fJtRoLi9Gjya}M#D_^{-Y@(Mw&ox{{SXt62e{z0zy=Aip zOc)}TWUEJmh#mZ(R0*hOxa*`uGhSk+#ZZ`z!yJlJ5(ri!>!t?34v zff4WO6t4lhbE*MIkMHE^w<>Q$bM>_lBD&}|bkY(*u_4XuVI+G3mR$)lmYY``Dv@u# zZS7Yndi@y}Z@~?dltkW`tt^I8PowSN1;K(iOY*;K$WMsNS(K4JkB}7xMCdA;La9ge z-4_avG3HgO2Voa&`>Nk3iX6+B*xwTbTGW09)fDXSUk=IEbN2Mi0lm+tENzxqi5MyQ zcnm(f*xK$2jex_*ZV1*(neA>JT=AC{vMdh)Ik{i6H6)vx3z4WOWfApI(dk8g91!%- z>iW?6^B8h}ADyTn&n+o7XZKEK7HnW-r2ypCWNXW+mYsVfVbRsk;8v>X`FcyQZkhrsh2fY^cm_ew@ z106pJC#a~wo`^?$=Qqd(XrtM2Rd?nQ^hIhe{-+@H;3L{bf zIgetI+f2Erws{RTGe|jquM3|peB34&=BnZRO_|?mgY0lr|Sz8O(b&NpJY+y^&g|m!fKw4O(&oQ1B-YeXr zSF)3Q|9ZFf>>fuxSKA*StDLm<>ZqvEc4*;?cI;DlJo@O_uXN6*u(Z8D`tjLG&-66) zpc*WfUePzLSaI5BvUiX{ALupTDCZndUQ_h!qJ4Y=pIQK4slDXplInN#k zzC|J0VZ%IOF>e6JPMTTq`Uh+gXA}L9C~{ia7To6#s{Nn+;0wD6hM=GE%?ZVgtN<;< z*WBUy*#32xe%y0BlaVG+2B9M|-)^pc_8j1l*&KZ!k%y~3yw!g6#Sx}h;4bp~+qkoF zT*e1CK;l%-uy+LXBI=s7?;B}t@9`%ba3fPq)7U3o()7UhytkUIEshuICeW1+XsexK zZvI0TetD0i4ztHH4m=C3c3cq?ob`{J#E?U3Pk+-GV>uPP29l$G*Lt38E?WPm8yi5- zDp?{+M+l}^LzUA?hqvn1+Rw2acws*+oM{*+BT;~;)LZHoay_R4(mfPuT zatHEB@>K+{CC+ z*lIco*&~%By|@q?3|zK9yzaF}PC^4Jt2z>v1j>ogGcH<}kP{xaXz_w&`nO2J!0L2kmJb z7=35B4f`GXyFjXXrt#6|B`qy5k@CM-ml+UEM>12=f`HJaGPxSt+m5oQWSrn{aDH>@!FUce3A(D z)o}H<-e})whljN%_Q?0Lp&{F(^vS(VQvPuo_*Mz*7HfSm+}!moajp6mH+q1oqlo4M zJ+NEcOZHE&;Q>vPxk;fiuMpfWFrMczK%+j$PlfKn@>Qww6FD6gTNw7tq~%{xu6mL) zmmrXi0XW2HB)gh_2DTQ3Hy$xSBmib)tldWs*}eTC%Z;LKa=6?X z+jHe86-&Gg8h{=QsDU&O|NKT+yfzS21YK2>E2xF5O$2aSjNqHYUow3vNKA~Q(B;tB zX}$-BF4U$UWmv;n$-O-!^JO)p)u+K*H2W(rP`^dE?|hftY5~&jImscFYBIh!{nleg zx!^cjKw@I5DZDq+}%76UoCmJzcAU)?LW^L)wQ zu#m(+Sy^=&FIl}Ud^nmig?=m&hjHFU-99DK_I3@Pz8QxeYVu6fMb>JazZkrdex~PU z`WWf`g|wl=y4;MII(xaF3nJq-YG;VMK(Dl+c+Fg4XJZti=)y1#;UGQfI{I_IPnFGEF|qkTwW2E5J5!!?YU}iYCE-m?lJrl?LwEzP zF_pex*pLb{2#!Ty_HAxgW1-) ze@z(%ZLT=H`sC7J(kXEY9@gA|kXMz1hI_7|O{hs&7W;iYD=`{c;0nmRGS=GAz%;E&s8 zKml3xuZEhz#O4I!=_OQwkz_``)^<|a&W6|_gmh0(g)WR@k{>;!RKUISOSf4wWx8qPF~y+t865 zAu2Fs*-`}ueVv~tAnn9i^T7a<$HKh~8@$g{t%Og%zbXP%VQ*KD(?233ItYV04N?%T zlk-y!Y1#SA+~Gjiwf=W#rQ>xROMXTKoW56Y9;JtbLPP?F@chuguMGtSKw@Ai9E#~S zZTy7vS;>cxq8hehb9&;yEVtQW$E+&aemo~q>`26w?z!_i264fgz-$I;e5BrE5(ttViJItwe{vPX1KR6Lw>%ag54h1Vc z0<=4j+r$;eM;xj+WmwWX-I5`q@0kjdHyA}SyZ0qe9L2=55`rc2mNW@A%E+d? zk){E|rbKZW5St}utu0l*A=bRAa>6KMx#$p}VK9x8&klvGDlEecz|+j2+IGqOIM^5; zVD z#0?0`cE}G?n5)6T=Ewj^av5mPq*VGh{$=7)ehmj)5HZEN&~g}Oz75#Qyk<*( zaM^GqK=;V-)7Y8*zh1*5nC0sx8_`nY9~AN5_dr<`h6m(^4Ho_{KjA;h?4Ea^1U&Ml z$;9}tt}b{`!xRQRqe=Y_K>hF6{^wEUDD_|876?)3k<9I!?A|J&sabGEiyIl*>TvdA(R0K^5d&Y#0ZP&H?A z+f1gNP8lxQR`ufAlZ?@VwC2aC#6KsG9{tZI?uiir zPfmSSA7;|7?)hG#IbOE*1W*NLSkq)P!&^JR67qoxJUX9n`rTY~kZV*JfS>-P()|X2 zw1QjUC;w{0fq{xLP@_yI{qkf>Gkx}tJYfptW-}P5h98Ci(Wbrh4Jz315>mOKI4uGm zUxI5xht^XCK(Hyo9L4o(lK=BsblibJVq}p8XZ9Y5U@YwV|9FNdz*DU{`?;n(oFwgo zm3(j>67&m#_-77-t$+P4aANS-&8C6xnr9+0d2P#OVPLQ(h)@7}C}E&~OfpgA2*D`q zHEn81l?JiUX6s zp+EOH%%szyeA&ecgs39xkjH4xh#SO1xT1jxCf3IX^V}uz$UGTwkxjs7 z_)oJ(9d|_(tPDH1$>88%2&w3h7N6Ai3L70kl}bWsu5)Qu$ZYKMPa&JYoBdlf)8qFQQbbQGlJ_6P0$ z-`+F-p z28AjsMK=ZnyC)D4sY~}TaEAPUuD*l_Y_>@G#xQvG*I$Vqn!3rtCBx1jIj;;oWW=7# z2X5hhx2Kxo52-iNA}9XifgX@E0<5+BLNu7^VASNcmvQ=x2P||MmOzvn`Fqfkj{+Co z?GFK3*!yWgBx55f)#sTp!>R!?To+&n*jS+D|FbcK17WGcFfXa0e%uMLC=D|iwzZJ? zR>9^V+*=+9bQFB5ZU-X{zH?hjuFcc<2u#f)EwToiFeDmtVp`1&pnwmQ_`eA(GgZvw%j_X=ShO!|cjfD@ zhu-f4HJo}g!4FnAiZ5i4u@}2c?qC=sw~WJ&Q2?6{mR0o;+5?{1K7u+0Jq`|HkzqyS z4vH%JY?X&nRwW2^9X+}&;uTo>>OyF)|dFkVM4cNAnt5b*VO-!<*^#E4^(#6^&8+Zy=C8aX#oW> zWlf+6o?8Hbc&w>L?DtpR!gm+>Fm!YQq+ARUM4n;%eg8AizX%lbZh}^LDGLNFHKH(b zD>VU?zye6(-UW+`#2q&F4UpZGQ&N-g-$ZiWe3P{=tDAI&J!go3p8Fb%)^ct(1PNUB zAPl1MAXz8wxKNc!{T=KE)sn*jqVD6uh=K2^dcU)cT%%m5_1PtmvAcTj)uq0-%=VlI zh|I1sVJVngXb(6qx8NseZo^D9(ezS4D7OMkSkMuDPhhKC0Mh>LqAWL^=Rkkh3}U`I zKua~sE{kvsp6Shtt=`SRB{mO|DxK_seajh)F;Y%Md^ue|ZP{A)95@YUJ<^_n+#KoN zri#s`Zp z>zq*U?{D@{`F|Om)Pb2h-(Zsk%3(7Pf%bFUu7~&lbTwQXFUu=2k^NNXBtRg>OM@(K z`U$+h_F5nLA@Gb%yWRkkm95Ihk1w+bRWxmeNd0dx>>5_y#%uDb6%sL)R*J1J4JD~P z_dU`O{R&3A<;*z%Dn4E_P2z4(zPuDX4BUusAXJ(fuR9s7DDKawGkBd$>}z0#!kq>H z4>+4I67~%NTr-8y^yz%FbPQoKBV)aC#okv3;l{`mknA|Xm=%eVKX?U1z0&whKAHZUKZW0^8=AH%? zT0S^iS&!xF=Jyvd`}WTbW)c~Po0iKwC>^pUY+bb5)Wm`dU%)nQE(&HE*cJ=GmcRI! zvYYBR$dO^3D=Z?+f>{ISfn9O=#*9`|5SOh5{vn2CVi-V(uH~g)x_}jLAEu#$ z4p2m`^UMLr(GK7MV`tm6NU=~Mu!@YZQY(QLaeUGcEXYRRDCX~`-g)CKP9~)wSIUX* z+sD73Gv9FnC$ni40b;QSc7|JfDIS~FV-Sv=w+|$#wj4RdJpvKzs^ef{3yDUDOtA~| z+4gi*ej~B5c{Eeg&kyC^=lk{m7;v9Z;EYK=LPG{(NTHd_RJ(nkrWi_wnu!jjrhJ)b zgRRUjQi#jvkxI<_%mNTfI*c2l$58MN`5m%W%3F@*pfeWl8(~&h@XxXyWuM(vd<64e zYS0)nQmNcue#HQM^0`g0DS1xDKrz#6lB^u=jk0uUDj2l7(S^)fy8)g8Q}zKq#;yL{ zD}AvfHoMlBXf@hNENLaH6j%`Ve4u<8B>_NB-aspO5V);b+-Uc-Q#+TGR?8(S-V!lG z0X5eQGd~XH_}_V+gHD;(p~|#e*4PxA2mD$X5vYpn#V{u~u1ovR#0+nV=-j7`!y5eo zEQ?AV|JMQ&f$T(x%nAU1wLx`b6g2KlgWZ~&_)QDU%{z4pwqTkmhG{G>11zt3wAX_@ zq;^waJHw{jiPY~RZ+EJ~(0+tT4G7rcVrEwFK%rEeH=WRtsSt)AzZ z$s#%Ze9gF@kThRZa3HIT-$+6o@v|9rN4anVQ<_LFi9Bn8rY8-ts69`j;UihvYd0Xg z8{zlOOdNQU9CI`Jy={#p7^ba@_kU$%zNkZa{Lq+^6uGx;oM+DVD|luY&FKRvyT)cR z=NSp-__E2GuWi{cpm>b&`Gq|*x)X+^q(M-8Kb6rxI>5^)p!EDs|9Ak#9K&HEPsFS1p zz#bhVyB#g;fZls2BEJy(ZBrKJ52SDqG(OJ_#nqwH0mDHl_kS$D=$lVy{79ahLF*YR z8bZW$F5tjS$gcF^Y4N&1u^KcV;Mqbt_Kh2*q@j0pZ?ds>&TzZF!XY!Edk!ZlCtW5s z(z3|NGgV2{Zf4j>-EC;>wCUQ_sdIs6bJ98-E0ogTLhi4FH4-Zg2qExMaeJl?O-udC^d#LK(q2yo2YHf&+l2Az+=X{(Jk@G6dFoVxD9etsJMe-}GMM zN$rD^)m5u=IYlAByFm4%;BqTx32s!CwtR3TkxpF4COd=?-6hh*h3aZL!4vbEa>N~> zVRvnnFD;ef4nCJ3M8jwKv%lV7Z(u8IjN~Nk{Dx?gpAtDAt0Sj;9R;cr5h7|fu-UnZ zv^_d#yOq#sh||8p1~h4`Gwc20ELY$I{=w3xob#&G`~JgiAQB*zvJWvaM<>evm8l|b zhF09dPvCW}q`T~7s!#IsLdJbx>kBl;sML5tA0lK?$|Vu78VkO#1o5lTt19jFUdQN8 zJ>?u(hC8`8RD80@NFmQ zM#+rV^DD#goZ|u$zTl|cGX%!FpU7NotDOY>GY5uyO9{_?iJUN%X>2$+`{n|#6f z#Uv#=OB8CtPn*ozTVJv1Wzt5r`1H90XGP0#Wbfd5p0 zAF!p*Hv8W%(P)}q8KCV^^AAa!fumaiclXcEgl$epAd?H#<8Qc|vf7(6(RJYNfQn~} z>2onuk6fL*${IbVu3DR$yw8w&lTx*ftGfj5uLD*N%LB}b8_{{h7^Pa6oy011TjeUO zc!|;1`#v$QkS?iiJu$#bVX^Xcp6`bfQwk!yMT(UKY(3Z2T{(UG(X&-wYEw8|^%l`2 z*wuqwKwX+I;7yDirPbhMP&z#YwNckzD>#60_?XkK$yTT}TU0Sg$dc3E$y{R6mYDzb zE2qOHj688CCMD*bGLRv{cQ0O{d0Agr$znSq?rAe1$5>`Iny^QoV{{iuV4J~) zTB;?7!9g6)-1&r`=as?gew;SFS=t8{4%wBnI#APd7dL&KjdB-?u297)@mSEo{kwSc z0;AUi=vy+8?3a#?YWoBtczv}Fu`?*Y_KTeqs9yPkYPvTpQ z9lI*b6AXGoICbC1KcAC}9Nj*kB@Y4fS@@b*FCzk$Yh>4}=!aur+0YAmTj>tnpxVn{4{>|O+&--cU&U<5 zwoP*b*FqhXhRD9bxW*vwO=s0;RNg;X03XxX1;GUK(aAMN#hrc7NlM3(&NpNEWSI^_+D*2y$=*jDUE&(BB1mt~~?R>61euxRR8~v6F4?vFO}jd|J_ebEvU%W9Knk~YWt(Bu%tLRA%60v# zrWrsJJoKzQQzmOLJLct66&^g(*g>K1(z&ayliD9;eSM6DR%7+eLw0MD_H=JjMv5Nl+bY%S+QnpKd0>?RbGf3l5C zvQIb9Bp#q`(;{{6cnwn`Fy}w?l|gdIF%o$IEnveC#gLqTA2&$X7Nhjl4wqRdw4VNT zS_f>H$)-Q?^I&SidvP>DvypJdnqFMpG;?%qT;(B%)%P>8d*hJfPxB&PdC&e{65%4$ zhy|mGZ&68hP(k*+%_f+qRP*E##-L?^`B@q~z!c&=gMA<^zaIkA=FJH~xxpVG7UBhS)*vd<`D_n&|L>uD}Rg0-ypG=2tT zZ*ojm5ua{?imq0@*t~O{*R_z2M==6gYHh5)GOR*v;ZzdBZZ!nn_SLfkf8*%5GE_z)%&TkjKEIEzUDJHLuLS zY)(&kaW^?F)bPv+g%;CS7kk8;eYaOFdC6|__t=G5jc>|(Kk5c<9dA7wM7kDWJ#6y0 z$&jo3wDsax|6}B-_DO+Bh>oRi+nRQiCnjZ47J!HPOKT!Wy&1TbR++i&i=#=9xv;yn z5ZpkkykhS(# z1dPr#;f}Lp~R7TmR;nnVBm8}tJc8QaUu_|pML}54>`gzLF_n~ z=RmQ`ruWmc*+KU#hRNrniIEuAS*Mq=mUUaQ(J84ZJwdB0)Ou0KDzCRezqu0GuNQoQ z)8L%BvcT(^7o_@VN&&7sjBM|68heLw_p@jD)UZHAnngpWiT2?9gDL$ajOg1>`LWe} zV(jKR?6HHVRwsRxir}?J-0|^PIqyMc5VyQisi!J0X3S;8FyAEWd)sQ+&4-72UsQXM zW7JLdyAR?m2RuevM;_7F=K24$p#Y^n?QA@adGf_-GMz?6J6WpQ0=PwX*Pb6NGx)c48Cw!FtX#JA35yp&k6T=G4oUx zcaf^>7B-RNs^?%%ce_9X%!VOduQzT!TI>I4Kb0VMZ!t1^I25YK?RSjpi6zc>j!V&a zEXTagZ`dj+SD5iy6|MczSQM;18S*u%A7~$i9$m^B*y|pMTbnPQ(rEUwwdYYt_V-oXjgfj!pc zg$+HWiV8WLo|4b(j>Y@$J;%LDA;b4PF`^*m!1y&|8;Lzf%aN>(#l{-e(sR4U zj4l3t>YlJ=lt+QeiCwykBey|4Yfd56DSkyi2&!k4q3swN0&(VTYbs85XZOp?77>?B zD&H|mb;N4DM7gm<(Zu6;70-N3Iwi^k z4xJAKhM+>-2SFOnJ>ea`0U{xH%x#4oax=Y=es`CaoZQ_^ImnzxbZj_CXc5N3BQr;W zJ%WK@#uO|E0Q@wr>zyQfmFPe53zSTPHuo@$=FEjCxk`j<6U?B}zJ-m@NLO2ZCMHFv zFh;=YYjV?r%?He{BT!~8$w>BjL771$iZ_RXdo<8Wm zK|*?WCqd*JZV@pwA4I2po9Adr=o%UI)kEdnIL2EYU}P#q99a?!pxbv_Cw)M9uS|0J zMiryT;`YGo$!4T*|Fcddge`~?;FX2^(gl@-7nXJ-K2ybn5?fkNEo~# zybQaZzXHpI@r3DumsP@pnIxUn(c@0W6byf0AaFYH1$bXI0b_yk?A3t zMf|yI%uNsDFmYMyXl8RWPUlWm%294O47c?5F=bff?vI!s03F=MvxVP%d z-f7tNclI|_9^4fXt*CahuV{7kG8Txry8W7ge&I`+77 z@x5(;I=1+DP0eRFu&HdC=B*DUAU8Bs#b{noY2w^I>m%VA8CqmyDpMp?9vyRyrS&~T zD#+y5ta8cXN!)w2`A9=LdXO=p^ku{HB4)@E))xO3x!rNU6N+l-Lg`kGX7$D_n(N7S za=FIXLG+36uxDDb&9Tp^9A5Joo0P(bc+fda#d4e9l>3VOm_Nulp=U|c^lm>JU@WC5 zPxliu^F7C9h}WF$eLl+Uy%B5Ie@cNc#V{)m=6j#|R141aR9GIJS`C+NyCqHLc(EkT zsR)v0eY@7Eu$R^@Cm7tt&L4Lsv0e96E!=199ZQlMOB`xF`0fl)CiwFhE@(c0JRR{i z7bW?H*sQC>znJ;XHfD#~)PePuPV3YfU^Blr@Ps19r-hC2oRvGqGG$jBw_GhH^ZBH6 zcF)b*qoPII$9I4PBQa**OpN`DHxUtRx}y#1nhz?UVclB{!Yx zMkHh=$HWi90uYyn;yw=)7%6!|q0v%%ILK6I@L8s65pau*Uk_q4<2Z}r!$P} zhHJ8vS_6hYaH>GFRkQa&V zcTZL71IETE>%dU3Hgueq^1SQPXmM2A)U30yv9t7uJOb0OCr_NYN8;0#9NVQ6wqBTP zH)vezRit{VDz~8{k2kwW3cdQ!8^5N_*wQoR`k81%mS79$dX*<);^Dpcu$|b4hWA1bimuvHbZcK-uRC-ZO>hTbn_tlN z;9>1q%X{4K^B#8qf5#O>la+kGk%_8j7f|It=#s3y;?wKOfw0ARThRWiyD@Tk5nKX^ zg7Ni?)Rf1V&pEWbH{56@c9mIk@&qeNBk?SC&LXK@?ni2!_!q)|hY<_=JxS=J6^W+0 zf1s}U2(f)dwVdGTgDL6qe(i()7#^k1eZGZX0?}OJQbrQPS*H@NQE%lJy}Nur5}p)r zJp-seMpUpTj!Pzhz1PSm1_2LeZC}%{!^x_cSb9=W>G1cXrR@@Lj>odM?mZu&iyqbR zXh6*05q>VJK=_$SOV5s18?~1|tG|WbnJE$XQR<&|Du%!!<<&m{g=4prB zO|l%nM3~1XyDQ&ODd9egjlWoDmnZ#JA94Np?P+1QIC(jWlm2ono0P-XRWQhCL3A5a zNZhkZ2`-SF?t2UW`!x?5cuC?eDys6s2#3^UUm@y;TQ5`xU9z6MTy6FkZ5Lm01B8@L zQ~SEpyLXhk4oth3(FqX`e612T1Z?T2YTYJ=61EthlltBFtdZYgV|m?tQ0c(rWudnX z?n+y3?4e^&_u+Xk7rJXwEGCfeJj+yG_6y@l>=U zQ?=fc?s}`77&fKPzkR7=#Ph0Ew`7G(m5Lf|+O9qG+Ips_5BN(NU9+((SeZ0o&-n%2 z5?qvoaUKMh6`zDtKZ9Y*Ag?r9&*4*xBaBDgOjBW{dW^npDc+;Vri`aG)F)J{DsNqC zzq`#1pY^;kLH+EhY9*cNQ7SM;Yd&ix%C`jTv8S5d9s0Lmj5L~)(97@>dm_)0Zj+EO zi&D+iNs3?SwN0U1@uH$D3yC8;Z=oqLg>E5A#$xT0jh3w9Z%9F79|F3ZA2J4C%>DLA zY}wc%&wj4QcH&oi^uwGrpDvFD-+Y6lwok}FG10AtIFW`w>7p>n1tAp70E6J5PSSIQ z1wqIMST&HCaU6oI0u2#=^%1@0LL7<|i_9whHL>%NNy4jQy)iZ+z*YsJbHk%qm9B&H zY4u$ngOiXYADZsakH?06!b!{!sx$jHsPd#V(h80(d~aT2;C~L)ML^wOU=w%R93L0D z9x2V9ez*bSw{wpWlAK4w-?0q4vbFg-H!g0;*0ohoT^_c6_ryfAVoq-^IwiN!ravk0 zanvZc#3S1$Fxt2 zc~JHF(*}Z2Ceh!B5eHHH5${-6TKt<@Nu;ZmAMI-t%(02Jw5QYO!;OozbkCjE+I<>+ zVz!aLns7=?v5O35Gpk2Rk8K&_?=r>;yY?nar`b}-7J?z>971f^CX2!|k&f(LswKo$ z+l{d}_+8!Z@?)+^-YdifLZ#o)=i!ZqMlmB?khZKR-D)z&^X`fcC0la_#)LYtO^Q04X=?;ug{vlQxy zyJ(4ZeYNQOVh&2iH9u7pZV}c%;)Dc<##J-PspX8Ld-(XN2ywKapc`p!)7~ zk?XZ>#}QB(cvPshAVaVu&+Gw(emmymD8} z;$9?VO%%;9;$xbMAoyVBTdP^xTmRv9`%NIk%!X2*f1gIFF9l+VCITeSC``DBkFGyt zAGl(+Bc9G<=I#{-@`>-?2Aj~1V2L(svg7?Jz1+duiZ$cr0Ho98OqNCYM5heT*j4p* zJ9ZyKqc2Y)$m8e+8_qRv@pS6hy)lc>9qaQH4cWgT^L~$b3@=l6parFW=GpERTP1D3 z<`}Q@+u)THf?}I)lI=(DcM#D6yi4-4wf1*OoWe{f`!)MnW=OG~v)d3xUERtq&3?x2 za$T_YuCYTPlQvRjC1aKwUX2|sK#eww<$i>qBCKP+mbmqn_@e2a_0r7(mL1ytq{bc6 z`t^8WW!zbUohtjUxbt3GaLzSG6QcEwI_&MFTypy5F=l+1W=5!eQ4S7jn+iv>am$RA zEkWh*fiD-`^vezD%N_AMF7@hJaCsD~X?^#?U(_41qT6B&>eudz`+J-D(C!ZAf$6)j z_&CU*W0m^y#pS<2kLM=SA-~0|TY?H#e`DN&iQT5X+L)5K$Ns@n=OqU|woE6<982ShUblb0oPwpZEk9nKhLowS+eX7hQ zp`|{LE94aRmM>$}{jPZ0(V!>w7hPaeOX$YgsnxFX!Ci(t@{DwADiqsF%riWqJt5|p z0R_9+EW>1G$tl(f+^yKulpnIXK^qnHvn;Wqjc;z4&s~mHdd=RR7J1mmKW3I>fXaLt zG&F2_V3aO9w(%R6OrdWj!D57f|F^rD%-mPvFK+E}T1+MB6}_sz6M8R;u7wWk{-D_m zuhhS1yRGy$W}BLEmVDJcD8S~7^7^*X5@DC*{8x=Ht~tN*F7#J%zb|7@yFL4J%|6EW zN_FM;Da7XCO{X?#5m4}7tc6QkhgPfb&#a^zWsxoCx zlX}muS$*ZQb5#9tLsYWzRVKFg%*Aux+*<6e{M-^>WICF@Uuod;%wIYCSIT+uZ#1v) z?MA3SzPrZZN*C-M?w}+BfRTEAT*o7{2TMbgZ10rz+jKg+Ne6GLFBm*!qVN5r=HoPR z=H>3In&2|q7&$J(=lZPF6ifX#`i(L&lcg*Ag$uWO^cr=joas?l&R66pA2k_o#i~lE zCHturEu)qm*O?b^Ct_E~^=RT%3J_h_?p23rhz*@p?96>pZ68~3_A)Nbr# zSPsNtoLl3W;+*g~H}UH*A}zE|Jle81u2zT+kUxo((KjwTc`o=)%r>?;I|I5$rgt{l zYf*S5+@WZsZmlSH#rS#aL||``kzrGMQ^%#U6*RBd*!le?(V*Iev>L6)&n4g0-u*=* zMb6gVd-(aCzFC4C^dNfsEaG|}#J@O}?)t*A}z#4fsSK9 zdxwC=sQN5H!-bl1|I|V)H8rU5XGbmTadr6Ydx0jd`=s_Y*xZ?ChgV^a($8faVO=mB zDTbzSbxpT+leGd<^Yu|HP0Q;@RJd*q@#xc&_sO!(+_kQI@zIyO!CyJ|rra*xf7u&X zn$_e<<(A{+lq}J~g*WOcIvTfVxA{1`EUAGP8ajBRj^oN}+F@3P<^A%$Y@(OQS8Jz8 zdgNj=(FS%WeuWybcilxRbn`iKZ)lxAdKDIy6kn^@Eu49OGM@KlXZw?G@kP;f~wl&XqtegTmJC39U6_V_?3pIX@p=ztb2t6fdgq zGP9S28AwW35~2zIcr`GC$?4M92@MUXQa43o4-7*cZ#dOKooeaw)#xDdnlcmJ{hO@! zvuBg4dGSW&GgmAQ*=x8x{<6MH(zDAxEZ3(mx91s>9&t&iUof*#9v2Wre=i^1miQwM zJ6qVjIZVyrWuzeg%jg8Hdj&cEFMfkt10FXK!NB9GJMUx9V9kaAYHnk$nK}vP(?l-jDqCzQ3yW%_P$r7vCb@<|* zxA*2qIxaQF-r%E)50p_$AZM=if-yS6x{gc5Rp&fI5;b3}b{W3!i4O^V6u8vB)OIZEIDF;>~Q&Di6|WGafP{LG)9Sb$@3UG>zMPQWkwY6RKXSha{L%gIN}@(~X##hP|4Kcym2FM^wwL(1K(~$B zM{qfo5A=bq-cj#ul)iwdsb6237jdG#E(^tGn-^oEJPC*99`V-{@z!%>_g~E~ z6ERF{9Xy-;kpIFdU-g;(pB14|VLNY?q%(F(^AQ7*kA$vgRuyqta~k+Ev){*-U!5dm z&cxyAAjQHX0njD8_5zOvs`uTuE|~l?d@#l)r>kb}yt);2k&lV=3tM<9fP~9m%U7}D zC+_9aC0sUQ-}36i_O>p}+_*v9lgd^#%v|I!OyckPcA+Y7Gk5S;@*H1@^Wm@xV)1a9 zJoI_abqDb-iA}r(8tg8^PistVayxt@ z@s|!I%V5b;mk%l(H)cYa(+4mga(oSwl2GKqxV&#|(Eq5)QFJ_jIZMRi^2Vw+klT59 z(H>^CstN-Lu8$V2xVCWrsmepTaGa}u>ucXXkdT+(Gd=v9_v6BiIl>UDC~XZL-?8r7 z6A@n9=OTU)6Hu*P4Xs5$rh6#Xy6pE)t?~zOlexGm#l#f<&dhvGtQ*$8KWPI#LmS2s znjnrAQU{uGZ8}aUmRut9h|#Bm*p*Ww@bKs!S55ffRJK_eW_ze9Ebz6SVX+a}h$Y|Q zyX(EJzL>xsCHAeBe0F9kc7FvxZ!}!QnuI32yseLo)-KMxJ|ZyT3*v0_dbW9dH3`1?k*a~TRoEW&`Q#Or;6e#pP&@5p`M>nESW$2I0L znMQ$)l(Zi7GM>qgs&E$8qn%f-&CYYa9#mWn2uFZ}u_oj*>knrCZO>W$vpz{lm8 z@mSminW7DWWxq&5hql^q%PxKauzyjM7Y7T1!PR?q!|sjzExorxTvxXgjxkBp$VG#Cb&*UbW|#LJ@jzLgV>J!H35 z!QaQ+AB#NnE#Q1#jI#;x+( z!s;j01u%?R_cNuSh^{yN8?lnXs5dT}nU$r?h(>42nkgFLEJP8%O`TWhJS$zl3Jf6~ z$`ogbvHMdu5ksn5;Pun5%$x3(>rmOI7gbAkE`b-OJWA!UMm8~8w>iGPSue%XK$yOc zhZcBvm{0DI>)SU6v}}m>^#74>aNlkS5NXwl<)^gDBm5cl%kPJ@+(tMjmnX1);tGEs zvq%uU7Pmk5uN=okKFruQ&WlFRw_vY{97*&TnLQEUYJT)4%|0!K&4VSxWJ=ZA@JYfi zgz>eUdcHds*Was*oQa4qjL%D7V+RO&LWRL~k#14~E6WeL3RM{9709w)%OfB_^Gg`D z*fB2L?6MMH9YiET`R~8ZN)A(y-J{bUnSLLDIhwn>-If@)We46^3#^*f@1*tCdG4Wl zf>=5J+9Wpsp251DT+u~=l1=_HtB0tp4{yu!XJ2NM8QQRFUQs?wZVnP{8@2d{_Zj2# zhfEc4o)n5kKC@3I^s2=tN_sq&h?a3Wats&bO!KY%1DgKcdV_^Qn}=EW!I&w!b;?-qeGwdjo}nf@}m zyoSZb;{Gv85}pw$Mv;KBgoR&au7_Hp2fhichDu%OiV0%zY^kS|t&c*jwKPLtX&2Th zeM)hySj#_Q;A9mY!;Ni1l|}bV z*8*NMCrjo}K{ib3{u(8Yc~LGutR1|L+dmYj$vcUYYz{W|H=M@LW#Cy6K2`fIpeUwj*)mC1$8c%k(K^ z-=j&!Yx-(WG3jz1_x(fmv*nd?w2_yPX{*cVIr{!_8%5AmRU(NZc-xyBN`P8Hd|YI> zGisyx&vL(BiAea(0Gv zma#SfN*$No8~j0d@|4FY(BtWMF-YRM>Aso`<*Sna z9C;+l&rti){?nzm*g{gh*P)|kttaHQYmYepD8$J&hY~Txw?XiUEXX5wJ*%>AE~rkmwqHsPaBKm?&)Y+4>c7rpBsJu3+nyvq$Hg zCYm?f+WZOZ9A%p`NP(5W-SPvR)?|xalT}*|!nuk&>xgfGBz9sfACH-v`lKXtKSGgDyTIXEEeM8J*8vAAP?~3B2`iza$ze9s6Of1nut?pNHlfOGVS4m@e%S^Hd;pI9I zu%^(eN%0s2Vxd{$rwt09I`MrhZkv>MyQH5WK1$zc>$$$2am_-lVoT?M7=;LP3)Q%0 z%8%*1@RP&6oGp7@7n0Jh0o+Up+8+(-FY!2_dTOD{rcr?xFq~!XT4Tv z(pwmN6R;;+5Nq~aWBF$LD~amA?m-cg&j7eI$u1fqzg;%8|K;6;M=ZfE{a(`LF5b%B zh5>gKy(XPy~oh=Hmu~iRc6hZVLGwE&cydAI2T-56Qd^gbH#^LDl3v=S4_Wt`VT~K`igRJ~^!3Wz zb(~mclFWWqqFi{}reRk!vhgO&Hfs^ACoP~ywxzji4TJdl1V;6Z%D%9z$=8~DNa`2bK)q6;u))=Iq-!({IT)d~P&-PIOk z_MJoMJk7^gIY~~yrw}>N{dZ{!u!gS<6|+A<)P+DN-lo~!qd8uxJ(Bv76XOgV_{DLegYW`auzv>3Awas?wQaOLRlDdc!r@N8OYALo5I{@d4Jr-W#*WR4=1(PwgP zPk{fYG=~$i1Jbrf=eSSxtF~bD1ik$K`iU>WP%{4UuE^>T!yOeNWOHQMknDmj;bb5V z$plmtsT{O)7Q4|1iFMMx1>c7HE}o2#!;B>IZ0it1#~=ZV9Z!azppiC`Lk+NilutJc zSo4Qv@o@+lyus`?k+ulJm4LObQka+x4%i?JQ{4nb0R4~)w3g%UPpaB!enSATN9f9> z&ueVmG9RzqsOjb`ytdMMEhfo*33QoPcmZwe9&;KM@5M9$Dj8w9t<>sY299DekpB{k zQy9#MAz&|H_TSpkuNhKP5V)`EPpqba^bIfs3UGkC_Z|R2 zas+P;cZ1)m87Jgx^Ya)mZg+s+ZfF2DPv)P8os}g%c z1cZa;3(56dNX!9WxN4IUPkQR72UU<9&C9}#gA((M153?d4EWq3(>Dd}e^br3S&>CxjYS_05 z{RY99H88mB+s_U@y~iXmmCs*6iyeWxx`DA*=^sfDqnH3slzZ=JwFi*j%8EoTIg(Zd zUk3i-P27YL9425ZhxyFFD2S|=>MY47TuhC(#K-*Om6So;r0D|oG!N&A^)*4`QrStH z#Wwkrjk=9o<1MEucH|nN84>6be?_s;cYlek24vwr>-F>0ObjaB_MW=rrnwq-ic-(# zMGq*y4JB%xJ&+e3bIXdX<+nP10R(zRK&5ulJ4>XT^EZZ%=8%?MIlc&SoGf=Ej3_2> z2;j2?y1W|#z0+;w2G}3IFsF;&$?a@3Dl2ZY(4-zb*K0 z-2s8T2>ZU$F%zr^J6bv(MSl2ar^fjO0&(Uravif^13`_BdoFiYPj;b(A6oYop!G(J zR}hm#IFd6FzP8d*z<9ywWB7|LyQk*cA$f}!x$@UUyYTcU0d;6Oas^I1KA{{$(u;>&Dg>gdrWu{q-*tDhBa3iA3#WQ0T9v^6B*8|p3b|h~YBoXwA0NmuzPMCEX6kA%i4YyGeNz0? zj$`hA{O!n!82lSmCw(mWU`DbFf~%7mZc`TF$p~bg2bjlEXm2om^^L<=x{I|%j?a?E z3Sw6el+kr&mJ+(odENfQLT_7hm%z@#|TfG?h5A#Nsc|E2^0@#mR{Sz zVJtHe1)TBz_$wev=LDO_cZtlB09mQ#3>YA5(v8LG3;aPE-5L9yOI8zr4*Mj=tg3yI z268HaVuU=2ruWGmNc*8^*^N49#8JtJaWE+#j8v=AfY>f2cYLv!{y;$=#-|qSod~A` zlVG6^8mH^!@3WnCZK{UHS}zJiOFo z6zG55;5Kr5vydMRc$hWJ!!$kBH)N6r#L2C=ulr2(Ut=vO)mg3bujeke580Nho zdCc_A=kd$dy*GbKE_Zby@7gQukfDpA?SQm zam!&Yu~-cv>vfG12e~e1bRS#1x)|AikMBzmrr@F49l<$C>ziF938sJe9$iz zO0%smsFNB|xIcWnA_@K5S>SVL=|r|pNQPMch+JOpUk{Q)@KdS>eY`Jp;rrXGD+?gW z?bZ{jjLNylqx~k;(`68M>uz<;$^G}KQ%Lz@0Gq!v( z#A_W?2$BS$jKGkZy3I3x^hur%GC^szrMzYy&0+A|ds}z3K%Fh(uo7DDb^RG4kMhOS zkTVC_nXAZoEHYwwe0E7@k>OmbNRQ4-Byv_4$Ewaw1(=WXv;r`=mI$-T;fA-h(5NkD6g^f zfozYyZN=8 zU(EtyOIXLUL4G)J`>3+!yG{!tGVU2+sI>c94>O!_7j!TtKncf6i}znrrCUgJ(=CWh zi@m%?>m>ZZ!Gxr$Z=*synUMsO>4Q9`#6c)*$SxRzI~~h*v_?%o(;-HRhB3nF9T`va znYx3YEd;ipNF>NbAqs(Mf6Q3|a-oyw8H5AR(e6yn%yQSa1K7SvXdzBs)KH zr8O}bgk-6G{iyu0hbYncNjDzfS$e-t&_m9xk9dfYUv?QWaDmkMI8B#lg`L&aS$P^I z9y9(#L~j5_-82y^{q*8DIOnEA`TUt4Mhc9WU%q(lE>S+NKWwW$pZo0k@7Jx83|z~; zQaB`EI(&!Oe}*C>~3yPfY8l4V(u*jLY$A5hv6(*1js2DR6gJPaN-{(Dfb+0jf4mM=mKxANC&#Ro3r?d&f z_SvoND^k56Xq!=EXR6B-x(=Z12{;vJx4-%xHI7>bVGoh`gK=~}5{=d%H*$u^5(9b@ ze-mpSn-V+*)gYyBj*y7eS-i>^*;@M4;X@DI82|FHd6rjXCy&%72DYZnM!u!u$=?Oo zj*~3nJfQs9A`;mdE+b_jy&%lmW!%;(68Iud<{4ex3zMiD?R2Id*ApNx5GP+T3K)k+ zJ-NFvOS=&bHt;jh&lp8DbbXb89Bs9x^fX5k95=-J_a^C(H%S*&-hNyhF{xa|Xk3L< zKSYVBW!y(?)|f-gD;a@p?~9+M&`-p^9-ig=1)hHq;#^wD6)6dz0Om|^A&z=GnvS;k9kPyZNY2`5L-N2A5*;_lYD-qe zYf3nbH*Ha;ZGdtRtV(+lcJ9gZ$Xc}r(*K=Rk$iHgCK84S ziZusF+w7*C$W#!Kp9W}m_q|Yd)8mACp(FMKQfCwKs#M;N`yLU57n-yxQZ zgCxRH*C};@9C4R#<%y|RGU|_&2oZZi6BL~bS_UqfJBUD4*m`UU9fo4un+Qc&IMGhp zME(bubFT53kt4_Pz<`M1LMI}Rt?jM$sGOwf#k}8{3!Str#^hKVsW?NA4>md);PP%6 zqRfc$+5M3%Y5>#xI&OVk&W_C>ra28&W)+$?pK@~)+ zMSWP;vxSut!PrkJueKwCTQ=sBRL3PE#OS{1tf*7SiRQ&o>q2uJ){-2*5_nj}C#)y+h@!wA|G9+Y3 z)xL%}d(xTtzklX)Gk8OlN zuA&A?&{`xRMi{ZMIqfzK@OS|R6lV+3$T`O(6XNkCG~K45ELu=8XF!mI<8a_BmLYZx zy1I2ou_m?P9y$q4KRY)#IT{cfA$$$sXJZU$o)i|ZDsb`bF-BpBO(1klJ?GL`3=XYr{_aoPF{MAm9svWJig zSgC*{-ikdsn1J3kQ5;&o263-NT$blRc+MhZEBdsTC5kNf%nqqHNH6R7PrefdJNZNC z3Tv&*BMoKnNm>9rz%#9tJ0roqAnRGti;U7HnIgDj$8F#V^Yvsy8M)A{rDr3c2QQ|? z$t;Lm^yt9J4V2M@3Mg0z0wgZ+>O5;&nTfa@1q2)3YiHw`*!`o`{tSrHinv}&V&-;> z24}+|m-VSh?$ErHhg4j%^jf!|^HVMYUp)y~4`jsyfT<2zTCtzLJ!gJ`-4KJm`dkS5`g)y5|ER>k& z!OuIO`0GiPeHMmW|T7iynPoFpe;)E%n~$Ht;41IV4K)#@otdm#L)nP zW-asR;ME-q_3%0BGmFhg&5+1BBy$vlB<`XW2MX;M_911PKm-%y6X*W678}txB<0Kr zo+ec)zOXP0-kH3hJI%U~Mj`r)1r3aS$kQ;}&e<|zy4`|&c-4OwvO9j3GvX%TfJGKk zC1sz@cyR?5+B(P)KE#cM069RHDp@g%D_#sc+~re#lL9_)P9l-Vrlw z*L$s)ejSk|^l_!ZmZ0XAo7u`5`u3Udb}ng`&{rZs_eSyAO>T1MHF1T7o{X!9;o~PoA9Vb{H?jmWxU4q8o|BfORI)B(pqg(4=Z!a23 z@W8|L>VF@!OAp;JR0AYhJi$2V9yr($gz*X1f}QY|tSD{hd=DHd^u-y)eh>%l&jhT< zP1HY0=3wU`)VBhW%b19l@9*3ef8d~8aITp)M28iTCH%NXSetmFdKPY*HsL2&%7E)& zSV~w2w(4WWK;qs*ehM=6x25>a56fhm>tAZ?(s!W?UAykF75PwY*O0oQl?ut1A~(YGb}xKv1}>WwyNkPiySqVMQykLM@_1iC|Eg zIYa#h{{OEQG?K|UBBG1VQlwnt3Nik3qC2+bIVi^rhp|a>p2J5b5~9S*JTZg>L4H+x zs5>h0-f&c>q;&}gBN}3;B!2DZ<&0WNjCzB}jM5s9R92S5#9S*3_ay9E%0rtkbw1bv5?UmnqzSJmio5wSG@k}R&lo)kWBJqX9xS4^Bm~s4l%(+NB-3V;2r$K63 z@Kvy+2h_|S71exjWwdb)ohSLa4MD23B=;%f+jplk0!!sA=d}K@9aWx_M!|97mB#w> zv&pkByAmU5)5r|q>s&kS#3PkK2Jt26dA^&g#=!$?_^bG?-_K>{sJ54fJg5!A)Xu0x ztNWqe?+&hr9zB9m_A|B7a@b%psL)vZfLgc19(2G23awZt=4JClB^=3KR)D-#)v@_x=qMtsyp+tz#Btz=)9Na|zi{ z^E(7sgfX$u(|^@vS)*s95@uO~E31f4db8D^-%FpYX|Y;Oz5sxPW*kc>9lDl7Y64$j zxY)Oi7}XzlelaxiO+HmUs)#jboYF-SJ+x zO?xQ-IPR6se+WKJqnFc=j-a>KkjQ_zlQ6SixmgIUt34Dh^;;-VceMre)kL4v02m_m z)1^YCZy{r8VG0eiJjt@lM}OR)M_Lc!%qfFG({D`LpO%T0qFD1fpv7liEQ~Onk--MH zH@~^*S=bUF_x{MQ`hQXCgSSl3%G99$GJl>C%i>mUgUhd8*HfVVG`iaIDQ!1uNC)p($ zS4pBRpcLtPXLJ7z8w?PuWwg@pq79EkhBKs`7UEBho@@qtb>E!g@{3`aGtdG)_arPp<;d5Km8)Hq($5CXq&gN+wed^ zt9B-cfBE}Fol`}~RwAciegjKvVik@8C{aM+<6t?Q(|K3D@j-LTTM!+NUd^n&YK`b< z1%y-Rr*-B93dpH##OC6U@mxDQQ!%IBXj(w8&DX9XyCFekJCO>)FMo%AsMd0w3RN%1 z`C+V$EIO!`oV=2;i|G2|;3;h@BW!@eAe?F{fl+kgg`}E=5E@5WYU--sIBAAfmgctL zAJ=W<0_JgaBed>&yNte+A#j7f#Wmtsnbq~wKRE~F@t zbpqqOBQv*(FXH>)7SAQO!ge;0I^i_0e2e6jUpr*pAdN?V*E@da${~TOhj3G| zV8UwK&YEUD1DK6(jkfsnuHmRlR;aRItn+W6`;(dzISEbZUR=NHB&~n*WeQl+jAw27 zsw1|F(zj|S5D~&{ifuL>>&GOdC;&`3Duv+x@d{6J6dt59(SiTlv-J8@Ry&^rBYaPd z?VQBqanE{N(a$d%8X|gySYo%9a^j9#sLhI89yFz640qe0b@>A_)=bv$q!0a&KUpSX zn93E%6p*KJo51&1{4Pni30)yNvU=8?b)do*k!SB0_24i9xJ8ty!;2n-Y;@p!T`yTvD6>@@x<2&u6c}@Rw0rzzk9B9*5S^WEWfJ4}fP=>uJk> z-KStjC4vg!C}!TnRG|Ayh~ct8q8|nI*XqQwJROB`-vJ_uh&bc~ zLo2ba>_VjG6a0c_-#n*07V99_9#&g0o^%?wsR??xT0vzaWFdBnsW{G546%1>98Xh& zvkawku?o^~$36?sr{J1CLRCPF9QQHLIEeR}l49QNNCH4KXZYI_A89~Ss?E`dGt>Jv zcT?zDmvz2N7_gcBlqgLr>S#rsn53nc!kE`_Fo19KPR%Pu>+dazgI0n%>3W!+d z_$ar{u?}QNO=zk2cf2@nJIzmX4bOInQ$xM97}P&_L5HZ0Ka+K2`xoVH6iVmo=X{-U z>n8yiXdPx&5sQ}5t%p$L&Gbz~c80ybudZ>@L9f6NiWAY&l zhx)99@CYO-H|)@75veJ4P#6d?;}5~)7$G0e#%7T20fFtMMzYX0x{Z;`+2Iq9^!H{A zV$*AiyK?9IY&1+6r-THH%*MNlJ$eYZ_F?>?6chhL&+!lJ?SP9fQB1W~`q$eJ7^_U< zXdd&7M?JgE?Np4k)S+vsj_qD}m1`0K-_#__f02-~@o#GU)z3Arpm#l*3dR~@lO!rl zll8YHm$Q&AVfl$LTV%)_CkE*n9b)yr;(hEFS+^bztn9A+0!e!SwR-di(JvwIT~W+;`eDXF zFUYm8i?ZR&^4<>-xe!}rLwa#TbiBXJ7qqM^MWBW*3KTW5iDwlZ321cV( z%o8yGSbRRN1durW7Kw(Z6^QEqNXT<|>VJNvFPQztD|7fJ@7|ck38dz@tyoEpUPoI+ z?ooA&8PmKZR=)u2tMko9JN1IJ=d36*Q##b{P*`!*))6kYJTh>Efmi*O|L-Ay;<;o9yOggpOFu3B ze1)A(HfFU#1o0{97}Wcts4(JI&1j3GhO%R42(Ow1KfVrEc6VFo4=!Hiq)Y<13Ar`3 z6>&UQs}rO2-NB$9;qOO83g`=ftIF4dN`uG6&HkGhy2QR+W*tY;TD@9F5MT3ND05UP z=@|PIWW{uDaIN)#8TkrbsX9w*T-v0PxBJ~6G*)Uc*Y^K|Koa`bGflIl{TDq{TyL7xWL>uW@o3y`X-ba&T~&Jr4jQ7v-d_Iy-&E8Rb71qE=@Es zjw4)BjxMop$Ii`jkBz`r(|ZW9Dv-T_MxC_jt$TouEwWXUj`P_s@jWaYR;afsxmIrL z$(7W}MdY~Z!J;H7ou{_1g|b<$2bu@>cudV^XcY85zC#-D6t}8R^tT6)Z%RqKr|JV= zG)j2G=q|-(U?UkwFHBKvt2Urk`w=15n0}R~rLdbfUg~;-m&7-x^BhEq1x2Y)XVXCM zP)4-+S(S|C5_|*iW_Nif*JD&KR{>MW+-v!9B?j`{7aE_irdzdQa3X(c7UY?zj7`5# zQ>tlx@|qDc;UO0Eo2x4t;bb&-DqbHhz}k^Xz%p&59+bk?y4 zZRv_eR4LJ#x@H@cTF{vOrIJv$lz<+O+z8IUDb= zF^L`XS4XR=TVRCZ^d_L}(rt8%wav$u?&y||bib<_xyI?wC+ai_E`#4|1&wIgPO{Sc z3$OlRh#*HI2j;g%Y zXH!;fadkEOrOtRRyTSX~2O6R3pNnHVu=;bp-;J{@le6N|9Z9ue`}1PmCZq{wvFp&S ztkyqEA0R?i*IT7}MxHLj+<0HP4<_e_e+yDZdcc_{YPsmZSHJd}=BUXT&{5FYXa$&U1ni7%siEJM1M@>_bDh)l0go7Dwl{(`E$o9F((nj zc0W9`hXs9wA0?|+N~ANnRM{-P65RT&;s*6{y%f3bC2f|qKz1n!Di3=1i4kvr+11&U zb3YKMfNbpEQ&#${@o(hGr2Op};9wtBxye|l?MQw49p>*~nlNy2(Sf|qo#)d=v|x3@hTiJ4I?qaG}=s=N?pc&yS-|8rsC*LZ!x1-OP3HQ z9)P|qvVsfShD2!BRhT6d;uYee*hKfgGgp*Pi$6bim^idvpipS#|iE9Kgv^3pj8=iYqsN zKvCTwlKHJC1KRj&0ISAr`|lvjysdIFu>U}aDX%0jN%%yKO)85FJ3iHN?3`t!m)}U| zIjcyp0NMHuN%~XSFi{wQj`835nUeMbi63fLLl~9Puk$fYZo>y#N+xWnwCq`RT=l(?ntqrYf_;HrV z0QFfTwaUlrRfy(^o0EEZq?3Zyh|SGKk*IS^^DPd~Vfgpb7czgNiit&N)sw}HOzMr# zbuW8}7v!z%eb2W;lCm(riKra^{}BF!{ur30q8`uyqb-x{jD_aNO0Ax4UE8<%a_9o5 zN#P`tfkTEy+&z2Wuty)6F5r+~#paWRUaFD!{mXT8#9Y5{F?XP0)`0Z*xUKC?7=1c@ zkN(`Rm|%b9Gxh^o%+xm=bC`9kZzj|WjGg`aP3@ZP#B~HCL{5WGE`x9fv<+Mm+Wa|c z9jxb}b@%=6!7pWnhQkApxx42n`Ut$Lc0g%*vbJ5&4Vkg%$`~zO`sb-62FqPc;@`X+ zS$hoAUu#no2nn{Q##m}x-)xOI0b(!h*6)|e9GWvJQTgY@`AE6}5M`kdb^uVw$Ne_P;n*qa5GHF2jbw2=v)wKLyZgc) zUR#T>5Vp7Am!c?6`94`0$n+v36OHQazpXCjgQ26~UPHS$8Io!pn5$4@JBj*%U}egA zdy=wDIVx9)6}XKG74R?g+|^yaJmkt;^am1{qJ}B_{0B9FURdRYQBuHoPSNi*Gc85w zm$lJ`RE(m^`tMz*+PIL(@bOM}y_>b9_#&g_f4#IA?)qJ8W4~vA$QY-hPj2XL^oMGY zs$-N)um1%NP9lk%+e>4Y3Q>ha-DQhFo9PyJ=#l&hn9KkeuRe^6ac$$OiMcLeYQL=I zuKXaeEx`Whef{OPyYI=LBq*|vo;dc1{Xrlze?baLW>47iDe=HQnZvRbx5TcH<{_fq!6oea z!+b->%WpQ|O(R77&0>NMZkscM>a)bsH|ZwQ0(Pf+s%wFxAI$UzF?(OFppjCJ3;!*3 zJk`xoLV!j}w0H6UfdC}41~IWiBL?T-0C3Jpmo5!pB=3aQVKUy7ydt3~q^%VUC>X81 zOcQo_ohwWSU}p}afGU9KAG7VOlMd}8a7L()p~SE@CvPmK&jyGn)os1J(&TwY;e2LY zNZwd_78>LBh3?}2!+7%#iCZbDoxv3e23L8x;_~&{bOX3;*8?xtmlJ|Hcpb}2pd{L# zQ0CV*2oqLw@q{$ghf^0TSEcErcP}hPv9GzGb|TZ~H7ExR z$@rOD4yMS&5t(<9k199lyvYhQs^jSfR@yw$4qH@r6NxvJ+u*2~B)EMr?G?T>$Eoga zl+pSYfV__4;^beWdo|$yl>V}4ngq=0+{0v}voeZFh-gy0u#sNCk>igzarYL_Q~qb= zeT+}LY4+odcr{;5UdI5aqEZzjx%8LxAp$md$vjjlnI(t`!_GOtT&9i6Gu^*j`nu`0 zKUyQZ3Je0~wh^d-Zps6mClV}}5Aeq~vYyQ=A`U6hm&vb~4%~pUo|c@79e4K)+mnOf zLxrNuLkX2J)B6$`7MYcI=n*E%C>XO>DlNZ0Z$=SLw4X+r*TAkNk^X$nGuaM6ZEG;B zLmK%8jxKTQn4jAwxp_tgzzb^ynKg&>=#=VMmOIh_)bS51Rsra&+& zL(R)zQLp93|F%LA_D_{45uB0Bd!(0t2gMIDRx6~IpUIU6^tyTiS8e+=Q@rwqn(g6l z=Az40#@Z3C_UN-04IsDeOPzIxCgua8R;1ghqS}T>23v2YAUp{(GCnsi|dORfg6qgDDT& z!2}vu`u8)ev!;*(aSf*UF2{1_h9n_uJa-ira1Vl!1nDt2-1ROO#2;1`oqRChSbLJT zolvKM6eM#LW)@=@JsO=#Y5J=1%J(29JiPL*TAUzQj${lAkz64wX@Yn-U$v>i zlS-Tf@j`wMRyozgP-|6LQXc6Pd8LTugUo@eN9!4X8s0UmFa8xBSebVlxV8BBZO@C^u$>kZ6Oqq>a72KTG`KI znDr!*9lZR{ewkDdJ(!kkdux9A#L4^r_0v(&uRV$8KH3H4|Bu%$Vz>Jl^P;Q9=h37> zaL;7Ff}wnCE4~pzE><-sFIy%Ka-?Z(*G9`P!kqsV;=Bh8Oq*X94k7u>_1fF?;43*g zSiCLx3}_hLj3G3!6izdj29^DAo_`vMzXd+2(PGyza&IQspR0WaF0x=)`VM;nV6WxC z7C1;HeWd;YQxt?*bgL{AG>Y|g@4}2Kz|cyR8-XSR;@LwA@pr!gM9p0NI>)^$JpI{$ zgvmffox1o}9@~*O4*a_ViLy3w0}bqXla9Z@57U@v7Y>~qaP8r@8sUcWNzW9H&uf^i z0f~9DkV+Oi|M;vb`ghj^VF{tmZy7%3lfS9t{ssTkcy#V5+Pgld;OA%JPCT{6{t_WT zJ&i&wB|+=Y48Il!l7OHX8l1#!n1@VpB=FYjS9q5IOaolV8=KJEdE{`GCj9hjWEn^> z&AqxXHMiwXpT8(GnsNk;$5US^!0s~x%#GSRAMebf7gzESiiIq&G|a&9JkjnI81nMl zPm4i#x{qt(ua9+lf$?G(*WSiAMf9_rUZEeeUuy+C(S%e1D_mkbUfps@K%#TN=rz;o z{U{BQ-?_mTQV~BqpHd0PKM2&VceaRn*kXhj(H{R%V-%ZJ;QZ~*d;0mi$xVKkL<~xkJuk} zi6bvOJM{qmUqyiZS`Ag(`JeRvBn8I1u4BniYF=u4SN2z_4v z4P?I{TPBPdUPxHOoc@)QzsoWD<)CEyk+yjk{n4(B2ed;G?1I{Bje^*Ktm#n?z@GL1 z$%qlrJqL?g{ku2jNmFimI5wB0;ho+k`a9StZFbsEf3ck2_H}FoijmlZCA1(rDEz3O zrU34p=23YVJ?>mHYYr==1sAzwdQl*Ztq!nP+#2kIGFG zPzYbB_bPJx)NK9GtWt~`yB>xBpwTb!6)At8-}ii+pdoMnQrF6VN5{WXwHqzB07{!d z)#oD|EUW#(e{EJSUY|U*1r(HAP_d^%+o`)Y@yr=0Q~ETM8&?)`|AxL6YU1Uev$2m} z{r5zVp}?K<%nF*Gp>m5;a3FLuul>ClUh*dxugjqGJ)VKFt#XK7egCu*Bi7Cs>HgmV z_qZSEflc21H}YJ6@BD6J3#=7!IDa}FJT$=2&eOU~H3m-XUDS+Lord-<9b^Z(u!ryI zWC|^k5P35GZ=Xd5pA}p#kNN*T3wRMh0qGC5?iXAJi=)dg)Fs7a)eMT3C+TE~i8cjo zr$LR-QxwZ4gi!2TUq4;|wR2~irGT0G-^~z+gm-MSTt3s^4~^muJKHw$&@??$*sKSr z3Nbo9y%h4}LQz+GLU?F6ya3hdY7E?bqy8C+iF7Esyf@&wSh7`Vj^9ff@>s(JLE<5XMsAcVF81u87!`fm~=_ z+o|#<{WC;9gZsh-|K2PQ4JoX<*|1mt#+npi6nn^4s?_BHoX5-G{<`Ha;0R}V5b^s; zvnE}zBQ`pI1-fycnP>*fuXlc|P4geBxHD3Ui`26LOd=Q%av-ys3hmlKz zR`TV-!Y+Va&LFCg1W%D57I#T?&g{{0j!jzGvDJpDG*tOlgEXAxIug-aE%*Nm`mK=W(Dz&2 z|1P60$#76kXd|V2*@e0a7N0K8>Wh6$D}yDmtIrr-7C*fvC}%|$UhUj#Sx^KOO*cGr zaUiF1DcO~+;85?elv~$y|E_T!ns}VxRqlK2E+RTugTH@yg5!zTjM{VP$>u>QlL4Y{ z9i;sJAV8#ErHj#uBcK$65;6@rQA2G?JDsl^gy5-X3$ZzWAEge4Q2D&Dwprel!+6+5 zfm-Lntt^8XqZb-70j(#K&wVd3*?gb6rr5H%J(kriwR{Aypg<@ z!X2ab-;L%FDA{s7ouU2v6t$Scp$YC#OJI`qaErSN&5>cd?!#+b6NVpURp^3)QaJ#pFOYJdc!zCh2CXK`n<({vKkpADbl%WPs!N9n zecBO)s3Vd3ieA)`2gbYH0*UekK^J!MxO4pBH;9j_{PJo&_?dI+QV&Q}U5=*$)%yO% zTHwF+iw+Y61)YiGOF@T$Mp9=QLUh(G(F^2_gcCI`j)5l`V^WRhSFy=tQl3*Eu#J|_ zT%B!fcSWuN4Ie0_Zt(BM@KJ|g`oU}I!QrYV(XxZwvbYwp&V~K0c{&@m7Q?j8a~{kM zy=kz^*L)C1JZC2ykSzwDlk7V>n14S>M+mv5&zVaNhu1W8#t!oS&;w&`Vb0&BD!;WM zxp6aCd8tn9UDD*4)rt z63^%*im>&c|3+$D5yY!|Mio3`hpRW`JW8UNl37N?p%Bd0DX<}Q;q1kGfB=058c`ck zA9w8%bPL_YNWkdL63>F_Uk9qEd8oT@LBcwtA?esbDQwGHMM(WO8-(?fWen|JI5NGr zTI}6$coR9i>kwio)c7EeINQ@bu$(70T>bXqn=;~HTL=h*Ot*lH=865OgW{oyBqxWvc1L38jP0!h>VlwG>2@l#T^x=;wqr*cJa_@9Jy}e>v-zPBHiIJ(Ou_2`mZV%l;bi~GaMu>C9H%ARK+9|p?80dP807q4*=)cl^ zx7+ma>CFqng?``e@v=F5dMEh9cnVW}^z6LmgIb6EyTEWEhM>(gt|L0rqU!Ltw5SP9 zPQhln9nvwtt?awI8cl~U<q-5^j)2FJCi#S^=5=&lTFJwJJ9S8Tlh z0PfpVW6;8Q*hrlA|NdVCEQnnHR-2b4Ps&l$u04}zpuIwe9Xk@H$k-deljaP|Gkk?R zi#YA;`A6sb{`{Vy4kcrmJM)L;f#tJ@)}?xHE<;ACb{%=B=hvs4&hzBTm>30xh5;X= zUcl-&?J|1dlO{B%nL?)It8+)tDPh^I)%tld8nACuE+OsmQ;N2g6r2JYyw?*2)1^-h zf1E~k@N=F4pp>7(&X<&ohC2IchWDAS>T?*5aL9*OmGNF@;Zc2$$L)qq{L1CN1Z86i zDjal)9S$q!mLp2jaOaWEMk`6>5B>xkbbqEJdaM_!P+?ewPG|P|&zBM%p-e%{O+qi7 zuDr2IhDk2B6AWjN!9PY>3DmC$htvoPoujy6?uNa~O27E+Mi`k0DS{3%Z(WA4*<119 zw|iJbBw1!u6!9kKlqTfu{Ot|jlheSg2(L}n^}ilq0n~!y2s~m3g4w@Ixs5B#XK+^& zR>Hap;Ro|`|0Zy1V)L~wYO^@se|xyUTY=-B9kwotY>40$Iq(19UmyM>2cZf?_1FJT zzBycr_~P4qK{#2xyb*}c)ZC2S-$;W7aI8d&{od9Jzz-Di-x@dPfg-=s!&{yuZYu_U z-^`1Bx$lK_uGzwbjcORw(~|uGF20MEuzzzN=&Cdr2_h1Z-nE&k0>s-12|JwKt(DPq zz=+bp(RBvtoLz?RCn75vnaUIh2o;f|Pb6SMUQjlll^aVFJ`6>>p{NVBZm9r4YOLq# zw)W{acOyNd><6z%#4X_i^8+Tg`1GcO6ta3W`D@qGX&oVZfF`RUn}EWeeZ z(_;10D0wJ=%aZ{B>goxubDuo_7Utr;Z@7A;_+zyaM4VhS(%W8ivIa!UPBAMjxxokN z5&(AWdR#Zdfe{iPU;_2NpiA>+YhS>!O21(3qq-nlhAmx&FqSZH0Ury$qxC*8J z_4})-hbg3!7^%EZHNz3C@|RAv_EBEfI)LmcpQpaulC29E_+3CLE^>HxvO`CTpwE5g zNVS2$Qmk!%H0FV2s*#@KTY!%077YICR^DHwTSgT~FYhBr#K#u_q(6M&(Q)h(Bb{fS zjC&fj@-FVjgm6F|@%oRjLD|B2NU@pzYZKiF(4fiU9i=a79fY~vI)JLg;V-v!`9dp1 zd<&s&dO&gnY3u;R zkO3fq+1}1K+bF6=yLHC{Kmniqe&#w-J%&tl(RyL-IlU24x=eOKO-@}de2gjq2POXB z2wAN)eJAp08WA;CBA%O`m7MJ+C=9RKoG6=Ny^*5L_zff`%PmpE_xNdhpo);~X8*HJ znj_<$+d@V!1pPjC**vgQ1Aibv=^>~ql-B*F9J|(GdNh8sa}5h{nJt_ z7%Vf?91^VP*&ahR1e0$j$6$)&Bx!pua6E3zD%Bg@6KJdPgRN4P=XDmrr{5lDv`#T2 zFgzbgVrc&fWL)oo{dbogB@SOQX#5WKM=uEb0t9WF8(TIn103=lvNOFRt7F(ONL9q4 z-VyN6SA>94txizjy1QBj4f35&=gZAG2W$rdCGK*q9*KDe4J|R>Hk_rIqwM~^`W__9 z_l@UMgvkSSp-gpLqZhYf8Hh(lajYS8E>^Uc4Ou#$gBY{<*B9G^G=%KEpKQ+(GcDJ+ zj^vr(!akLCFV(KkP-I7M%)%hY!J+v212DcS>o3J5i%Om9$blvB%(n01b08&?) zRanVSmIfL$jy}E+>}pcA^LfwR9pH>_Wu38W!!#`40KVP$cp}rXE1Fd88PBb zD9dU~FXML`e{Up*)>uPeN-}%qYI>z)N}O#p6oXc2sTtt$A-)@J$-y-0J^R(ww`>eA0(Z7#`h6Oc$mMbBBx30LhYNR znFb!NWnJ|gj682qZ76um8%vTDgo=E}vx$9FrxCq+NX(scX|&RKJOa0^a4*X)KZ8a) z5QzMw?F(gRd56Va!`9y^cKFdN*QHE)zWjKupYlp+;BBn5M)3BRu|vJ}4luPB6t5Kw z>_F^VYl?Bm_2^XYbD1rhnLNjx{LDC6j=BM}&+&P~et#t8%D432o4^)Zs`xIxiF*+b zc{-5k$g!s(l0lfw)&LSlTxY|#G|)hd0c-A5brPb%XT)Vy5X4KbNbf>=eMglV$x+{v zp<6ZC|JwzV#}YJ}=8wf)I9@Ps19jIA`T|jWM4Vi3eaH*`M_M$GWGP0^gkHrH~zbA3R)kxi+;Bj5=dc8a5?hSAbv zSm8>j$?CijU+aUp89^z`S1xU_?=7!{yHW{|rU98$Slq$u`?xdR%_@C5s$k@iSzRp@ z+RaVhsbOvTqgg--4#WGLyg6eSS%dlm>!+Vme zRMMdrF}EuahI&;UcToJ&E?n89#NUp=r;MXYmbm+Qk+AjkOmG9{Ejc_HvzvZJ>A|su z3G|Y@$8St>R7BXULd5k=8lq3L88H_%Kb5wAgu;)v(P5fb<1p1T+5QTpdX74*XK7Ho zoXkRdTLb>j$yM-?c|`T0;HU5R*DIWu5cVMRDj|>uEjh7iP)aF}v19m`c7?qImCx_F z5gEaHMY$Vy86VH6K8Z(d3K@peGVTV&Nww_b#3FENy6P#N8NT+!WJj`KHLj`Yka0u! z@m*r!M%F7^cY`+GUt)afedZYB#oKb$C*1T_08nB*Q-8J?^HA76{|uflLF4_gc}HXM zI|(b=QLlgsAn%~rUZ$9zQM0MALkgXydli)T+uSkh;!u$M3SN`*J-nl1=qO!3#)@7T z_b}zppBpB#6~krq>HS1(zBmoRQ`sN_0>MFT4ONq?p2-UHKqu1TeORB_cd1g1RVlUp zbecXn@9|cxS9gw&dg|mcDDr;s(Nq>h$&xer3myz{8TB2H2tm%c%+V|Inhmbu52B!ZBz zSn#8Wh@f00`g#kuNq44aZOk5N3@O9n{14~3kv`Uug5qv?U~E4mMrkX!Vy|4AOO%~h z1R(sQSn%l)ZWEl(bJi!rrrA0jUFYulHY0D68I>zBw&(*c$@hKPr`j+1zZd>~N%es+712RT z*f;dR?(Dv97s+Ag9)e9NP)I&$D za@ep(9nSZK^GRR_9dsJZF?ZVUdNr|l6ODy| z*coC4Ms)lJYtcv|O5ezpxjP1-J4t`o6E&QDG-07O%SE~yL#)-3g}n3y=efIwXb%@H zF0-IODR^QAHm7!tQ`zq>M9${UE~LXDbss9-i20i-0F)i;R*9%T0PmW94a>93aW1X; zk+-a#)$S6BQ{&TGBqHY!wYw>mJj8*=LL>=dT0`-r4RRH{R;P8da!(#H#M?)y=DCq4 zC?hDm>8Us=8IMLUy5m9V(Rnnhnqslgww}rVf%t#Bqf&85i*+$!Q1a>d*W9~2TZE1V z#o8f}`_o9Z`JLD(BR5 zonhel7M;bB=Axcbc^5|2UF?c!jxGuUJ?$+GhM5~=WvY_4_y`12wtM0gxd|vS1$b`rIjo73L$u{H}VaoO!AcV+4S+i!DJ|NzTeE`FbO7lewd%4=VX3JV% zo$Eh_6j%jJY+P?o7k&uZQ=d$F`R3@UYT)1n@;tBQUw-vfUImFiAw>gy#_(gjR@?hJkrOy?u0^O8)(pF1?r?+KZPv&9+|p1Ne9X6>@TR|(~h zeSh42es5IvgQ}0iG>@Jvk$(G7?Hpg)kw0x6j*QjEe^#0=YgK+CUO@Hh>$&z&tp0mg zmmIwa0vzz=#L^-8pF)emqtKqT^>Lb*`EYM}3M(| zi18B}*`}2F+R|#IctC!-TSSeh0?r2*P=7sDl17USv3g57Mo?t2j)|DO-g`^*qXz;s z>}C*baW{@1g|S*UgmKHMOd)G>ei54q-u!oxL77~UT@l^fliA`(YFYs8kEt8^;1&<| zs+Sn{#%vkp>S|S|Wxeq-2bq~EQC>@)#QErk0pFhma2KKI)!Q%6&AmB{sA_1AqpY(1 zMG@?EHyC28rh%EAPkv^E^KK4!ZVPSAQ@Q?i+f(&F+V8!D_V7D6<8@x&JU(Yh{Qcye zkjv6Z^#-G7$m$y4#Ek{0p0mKI*b$stIr-S?`A{2m%BNz}N9T*Dbee6F3;9BU8;lF` zKAt1#@ylC1IfWrAcsiyqjV0X9I*!~WnolC7eK)*cG(l7>LwZy})_uaM;do$6TIPvU z7s-64aG%bca^;;==?7@5i#5C}dDBOhKS>V(G6L0cO|+*JvNZN%WeKs=;8S#n`)zcR zaN|2Z?FN1W`eF;LQd;|240jlN12kuK=`_j7jbdWnZVpNdp$}=Wl6WZv3IUMx( zAl7g4t!rL$@lK7Ob=VtU+^I`FAVi;Z-?M)eKIA*yH>l51lU;$5o&i(+9*BxqE6ZhyT_Y)`vG&m zJo7OUk*ABptQl~M&$X*bxWl2sn3#YV} ze`my!&E!Pg#1<_Kp1bYKS(w~^lF?O}emHZN4G44VRO$rNVL zmWjpE;%B4R>HB6qd7WvSryGetkHe49{*R?*!ApO6Mgl)%4$Xq&4PTmxa1EbjQeL6d zc49|107HvM3|HAdv|Sl_=p@IrEQk z!47hs*EJTqv~rGC;oaf{@wDwCz3OzW1Y#uh?%_>Wijn?>U(gQN;ENfCQhV@(c_~QHlcq2MCZ~Uocryk#nmts5*W#O; zTII)V&YWpvbCFXT-40RO_lFJ;x z1>$}|Y#D#1HpMf)*s?>`b}X`Yuyap2@os#&1!?~wwzQ5WOf0zvwbC(xa1CAkNAZoB zX(UIb)6BIkV~;E_!J=D3=BKE?*UE4=kzOhiksx011K4&2W?7z|0y?HKnt~z765TP( z?^v5C^I6+rX&NhCfp`5-wG+fWQ&e8UhpVameDrHs?_;D%rRKdtmkRQn@!=kVDX0^_ zmf62TCbT6M$oRFOIYONAgKyNfRopOxD2nSEGRg?B%3$V7~jU8hlV!c7$6oNk)K(Jg&ps z^K$tw@rg&hPfKMlOu=5h5^|X>2iF-iJYVMB*>m$@c;B!W5%oeKr@-+bkQv}}$J&SS zR}I_otJkD!N!})3;?BBve=f(ixkmf`JHO^PO78>bFO;pA2ho1Gy zxEw_JyJ$;%&%`k;GpI9gx1c?s3m9qCoW{O#;{A9c)>q?QSIbryhkNtdyKm*b%_HGC zNqZ~T0L5g;87@K_mL~v=&mA0^$KhId5lTfs+W1OX2gv&A$452p{)pUz_e8cj+{j&U zK5>0(~hk~GVJd**P+#eXsWnNg{J)py1%YKADp=65Ie+tZPUslpJ44ck00wXHr zCFWr;U{EM@Qj^CfDw3n-eG}m9-969-5$Dn05;`e6~2X|JE{W`#7tyT+`#4kv8Als(8=j0#tSawj?X=D=vw zF@elQ3ZH2uk&Q8^xpu#lO|3xrd7brVl7$sI@5#|$rwWvR`|B(96*~N$Ind&o{gZ6> z^%ui!!h{UoI9kd_xi7d=4-C-HqE&Ia?wuh9B271v)Dp!d1hNFrbx>w`a4^apYzTp(4Wm@syvVXo&J=-p}74*_Q}d{$uAIV^;nVy(RfJnvgwB=Rkt3 zRjYC;u44a57|XR&fC*4m=;SkVu@P7naElMI&QntX4v{g zxcwsIRU@Nr^EPTe*JS;{UbR{~WJpk;>Bc%&6y`Y%Pe#7!T*@+afNm7ojz3TG)#5X3 zgY50ahL1l~WLJ94#RxYssobnNJ>Cnvyfdft=kBxf=H;^Hd35C@sJg&9_ItNMJ1Gpd zgir2?UM~zatW>_TZ0~vH2{uI#2|dQ8;4${*Rg5osWVL=Q5oa>LTXu4e*p3+-FLvky z?SlQx>ApN!hHVsPdxAEjQ{yNBLvGlH7nS$zlZE(A6vs6RmaVv&yd$>lG`I+79&*!f zr4pJHzGJq3zGpRBn8PbM*qI?NJS9z@?O`IWMiCsI^y<|A{0`#l^SDKW+wg zmQW68)UYI6&VDfhuZayXYc5miD2|+w4vFooT7=Cee}6@L44AtU^0{x_N1Z`)SdC{1d3L~HPFX@=HsrtT0& zun5HIV1M&Z@K0P>HfxcH>Cx(PPN$rk{t|WFcm&nKxSf;=IsqL>FMUr+Y;@(^T5i=Y!YNx=ewEYyzXRH{RRS6L;|l+!uvLU zOJ3a|`*LS-;!090jwZQzbX_Mv=gk;?i_z!C%+uS3K^O|aZFk&4b}M{7(RT5s-(aH{ zNgr%B9NrFD5+i4`Bwix-+5fp`i<0na+l}~u+iey?N08fZT;tP4QBHTP&}C^9@Pd)r~Fcm|K9awcc+OI)!QFM zyiM>tRq$u-OW9>%d=bt|Jv{G$-^rL#x`zUs?nx{Dv)-N+@5}*kUcfw&656 z!_~NGbj7)7J>|kHqU%`FlGR_T^*1zI?Q0dDVoCOYbDMqb_#Vgc5Y~!s@L(vJ>jmHN zE0sO36;Z{GzJnp1VV5pF3=)~xacS`LuK&^!0`(Yt{nUr@>{qjLJ}cTOksyKP!MjNQ z>VFg~8oNaaMr={!E@_sk(Mia$8xs7#S`fRBX^hv`1+s5wdi>KC{Yh48<;B{MC+kVt zd8g1;myG4PZNiHPF&;<9E749H4zg_c=vLB*lU4NPV{tEX2CSq#%wCz{BU<{b=#E}s z{xFV%YF5^5BjE(rj9Fy49~@$jCD9c*6s)=obI?DKdzi0pu}(=ZGaSDFhgFnTKA0|+ zr$Rk##a6!fs^DF4GMosAtwOpR_ZW;#Mf2~RrKJ>4WRk{2_2Z^37|Zv$Lf!(e!aJB0 z%`EQ6PN`(T9yOe4V5_J^&$5SZr{*y~z?rFqHaHf|J>SVq8IzID%5J(p@p8Db;=rr2 zb(2ylY17`Tb%&{WU#dSlTKm3w^67o$tE=aJH&a+sfBH*Gt00^1fXO?CCq)|7MHI$- z<;H$^dAIYLQcsYYk%0*vzvYR`^P6%Ywid0mrlZQu6YkM`@qxsvSJNCh>|u40W$DFk z&|`t{I=-&8@ak6rpU`!7^9s0bXZ$oWJWx9R=S_KTGqSx7swBD(%XboL&MQ}2zujh( zCxU*)xqx|>FezOMj(VT%&e7Mvw%kdeH3=dn!*9p?(#4BWj#92RX=7~f} zsCvzjMP~68NR_bm^v~Awy?K^GE}=ffKCg^B0g}#ROyNkW5p5G*e~p9eAhIQxa_`7O z?#BD*nd59%pi+C*Zw$duO@%w;hAbC8BbP$}~r zVSBkDp2krRkQasIW5;Qz&=qB^T+(e#r*isthr+? z51D3uAyXc7v>q`%lY9g-nJM+C7S3&JHf|@*38+6llhm?-mo8xG8i@QHja1 z>8HfSWy5q#P;QsB(RuoPrpxb_5v;r?x@3=5DB?~~oU(jv^$??;C)?u*+CQIWSVFiw zLU=62^6%Y`w{Cc%Xzr>Yax24scC%3HS&4a&@#e+A+^9)q>fMBgMX$Fwg}P^aBdUx> z*+xFCvnh~f`n+3rGwo;;F<74~%o**c6p=!YkolW2V${ zK0~K`Tu~{PZL%@&>59cx?3b$`g3JBWzQgy!W$M%;;^SZTrErJhJjLe4+`%RjKhwl?9Dkcz%^UZsIXQNDqm%(!({L;48xB~2U+zeSDXOh;U;IMjjG zN!y$?G?fFKMGdx8Pw$gjGnziV*!SkqI)H={w_}`E*E=ZqySI|BOI(!uP3Mu|^y6M} z=j=~qeN%?^5<2^H)16T-QY$5t&2|q;JF*%L>*EjTQcSMzPhZ1Z7vWKV*bhY^EunOD z^JDpP%s?ct(1aDe<_(Eb^Gy|w7-<0@fkW9H9UP0dut0uvwFN45Mv8rb;G?@XxHZ9Ci4@Z zEF0u`|A#yAgnPM0(tnSIznZ>Qzkq;^M=wY6G%~OA_FY}Wwv<3%r5OYr!mzH0Pww}v z@G80^#}LZZo|f!D)YAOQyeq7@YX$_*tx|+Jayhh}?!R78ip$VCH9Yt9V3nq2xYMK% zdnV82{(iEIOcG}uSpvrck40rOL}3nI8Urcf{>EEIe;7MX;79LdxHm?yXfPFAw$WW5 zXpu9pGmlawopY_%;o){W!jl3^ZD-&?}ob7yKuXlsyNw*8WPRQQG%nX3kqKpH%4i+W9` zup1znFOSI8oDy)6N2r{-(|FI{IwqSCYP*V#p`HHRj|9&;C1U;lQRvNbB%4|dP*d2Ol+il8LSlVDcz z!-=8@7LPk~-SX$nkCBH;cSXmzMoH8w4EQRNE21!fg*Vgj8w3!! zvAoS!Xo?d;Tb~Eh!13VoxpI~fk1mt&wIjv?TSN+&m6ezdQot@5!e2CMF2mEI?+b`J zq zoXw)QpCJ)*-sDtPu$hq3TI7@Vd|I;6)Ny}#S05S9qgGJ_So!szk+JMpxzzj-Z4kZc z^EX&=4-gpy%(9TB&2nRpu(@@;zHj>N&7BuM3py0R!#XDC)mUM@U%kUpe`@INkV;20 zGCPKuWHRR%zTWM9_34VH9}`^dumNXZdj8YKoz_6tly*#j7Tdh)qb;aH16m z!-?X(qkS+dCT)F(tX_m2qgC^+W-;r@&`d7RsJh@+1S#4(HrV6o$Q*%b+k57IuYbd) zJrs4XhF?E7%DP}G1ONG%{o1?z*Hc%zqDp0xYYp4I%l+ha(n`c1&o6|4pw{1``uChu z#;enz0wH-fcr0BleH+H*f}S4tW!yghqw+B3(xv? z+MS~O2EK$uJa&XA4lt0b*aAaTH%9CJyBa>Gx-<6pmDGth=P3))bjM$oWf-M)FUby7(sLA7vPYrhEQ;D^RPGIrSdMdwoL})`?i< zM6&3%96%tXA6jm2;Osq})NYPdzWVB8-wX4DK9`Li`B`*FIF&(%Dyx>d zfyTzq^?1!720P{Vkya6?={WPR~88V z;8qx9{xzVZxlD?2D@UXNYZpMu*2MRnzb5p97Dtui{a*1>JuzVf1eFasy&K~Rt?YhFp;DeaAsFeephf>~W&di0mF~!n zb_3G&8U8Miy)I1n4k|#tT;)_=O_(+QEv@Rw{K0I7LUIkZkNgs9Db${0O!z7FOvN2yD%*^H= z&2(3a`zDB2jS1qr4ue|#n;r&}3yD#-O2@MhC;SbL7#YbOc% zGEI;AZ+0Ym)OWpbgu`~21vIHtoq4q${%)s9bwQNVR7Gg1lB4&!<=WXu>E_wAV(Uv^ zsYZGNlYFdwnE2*HIo;`xz%>~Ot zbMv))?4d{-qttd;5~gEpi8@ zD-btc1YHqN{d-)sR*kQ=>mb4djIveCXe|eW;FCh4txMzFR#|o{coWM7mz9C!{HKbIBML*{sK`6pBGLq z#$%tHUXqT`XcpjCaq3FJ)jL-I=BB1-+?giCxM_4|uh2(1aB5N|aLw?f6=+<@nCJ*F z01rv~ZP(Rjk{NeC)LbBatBN079xRE!O=N9Ra)wIYAe|Y}W&JV)`@w;&dc{B7l8zK1 z&lqw5u@ijvMPk*{)E}JS@O2I&RWcPf7`f{xO9GsDZQf0=eOaVks1H;e`&#QhnaioE zThaqCyhd;0k@2?r$kI-U1~_Z!%(@1Hwj(EWxuwq~cBdD)(j%C&K*MQ)keGAP%oJ>b z5@IfKD_pVObP>P1CMCe%z||S?>GE?A-FIKqn2nl<#yCv|NE769^gc5y@O?_hY4Z~} zcA1)4J0DN3(W|k7Ibg-kpGc-8Bp7in8}uN}Km z=euNJs3e2h-;O(ZeI@wwpS6{~*wUEXma2h7r#($}##6>CpI*K!HPoO}pqg9Ozg22j z-d<2DOI5Jgj}F%JeApOHL>gonp|HAv4~QtamAL(`PkHJbY7M63h!}cmpuP<^6&bE8 z^@XQ?^?1*YzI!|DmbFaNU34eiS7rsh#Lj}CRt>CK6RFZN(G4b>_-E%L%Wo%}bpfV% zWz#q^zLbr13MwRQK>2ug1NY#cLDEH953!lOfPD#u2jf`{FlL@mCJ=VaOsqr_8!FK&Nh?GvvUsy!-zk&5{Avyh@i|2ibwCxd zQ?+s99REMkbq+!r0qcgE=a)1~!tvMn#dgAizu6cEa^-V4b((%93t-t#TT>azPVLb4 zSLw(B_x;xD`{4qE7wd#$>}_>cD1*BGCN3+q{gEKLIe*??H`ht9+OhvA5$Kc)`62$^ zHJFBR!^bn^lK7CJ`O{Ax)zgg^F%QJ1s@c;t+z1`y$@$aX$34MV#u8b|qocx&6?nJF zlUopV>FC?Fp?mQm{Ly(t<1M}dxg1-jRBrT(CmyL;*IdVg;~Pd8fHK>5Eb{3g1K4#6 zCFa=;iI49A@qB=)7%ECZ+I_+#N|s=d1(D@1&J0(x1_)9r?#G2YxujzP(Y< zfe`4*95m(pArc?6&qWoHb|i!8-$Fo$a&)8XzJ8uw9haFwHR7gH6_Z%Lz}O=I-w>@H zCT~l=Wa-0;FtG2dh0G05@p=sJR92}y7;3_; zQySRifBgeXG6m;BhM073iCCOKzv-7o>Iow+h}+&DmL4L&fq z@SnTlFcOfZaly5U!6ww=5Fuo|N6{_8>Yw`IACry?N6V8EDV!6-;s5@hzthA)@<{FS zQaQ{W|MT~|;RNkTLjm56!HrAvMB!j37`pEbGv3f?7U$aNa4VO(QaII} zz%Mq#e0+Qg>fhtzH|trDM-J04+ANNmL0?CscLz)mZOM2E+J7X5w{ak>n1`91-CdH9 zJ4_{xKbAz@xQz&FA_y=fD!}\n" + "" ] }, { @@ -373,7 +373,7 @@ "\n", "Please also note that sequential model might not be used in this case since it only\n", "supports layers with single input and output, the extra input of initial state makes\n", - "it impossible to use here.\n" + "it impossible to use here." ] }, { @@ -396,7 +396,7 @@ "\n", "new_lstm_layer = layers.LSTM(64)\n", "new_output = new_lstm_layer(paragraph3, initial_state=existing_state)\n", - "\n" + "" ] }, { @@ -413,7 +413,7 @@ "have the context around the word, not only just the words that come before it.\n", "\n", "Keras provides an easy API for you to build such bidirectional RNNs: the\n", - "`keras.layers.Bidirectional` wrapper.\n" + "`keras.layers.Bidirectional` wrapper." ] }, { @@ -432,7 +432,7 @@ "model.add(layers.Bidirectional(layers.LSTM(32)))\n", "model.add(layers.Dense(10))\n", "\n", - "model.summary()\n" + "model.summary()" ] }, { @@ -449,7 +449,7 @@ "output and the backward layer output. If you need a different merging behavior, e.g.\n", "concatenation, change the `merge_mode` parameter in the `Bidirectional` wrapper\n", "constructor. For more details about `Bidirectional`, please check\n", - "[the API docs](https://keras.io/api/layers/recurrent_layers/bidirectional/).\n" + "[the API docs](https://keras.io/api/layers/recurrent_layers/bidirectional/)." ] }, { @@ -481,7 +481,7 @@ "\n", "For the detailed list of constraints, please see the documentation for the\n", "[LSTM](https://keras.io/api/layers/recurrent_layers/lstm/) and\n", - "[GRU](https://keras.io/api/layers/recurrent_layers/gru/) layers.\n" + "[GRU](https://keras.io/api/layers/recurrent_layers/gru/) layers." ] }, { @@ -495,7 +495,7 @@ "Let's build a simple LSTM model to demonstrate the performance difference.\n", "\n", "We'll use as input sequences the sequence of rows of MNIST digits (treating each row of\n", - "pixels as a timestep), and we'll predict the digit's label.\n" + "pixels as a timestep), and we'll predict the digit's label." ] }, { @@ -535,7 +535,7 @@ " ]\n", " )\n", " return model\n", - "\n" + "" ] }, { @@ -544,7 +544,7 @@ "colab_type": "text" }, "source": [ - "Let's load the MNIST dataset:\n" + "Let's load the MNIST dataset:" ] }, { @@ -559,7 +559,7 @@ "\n", "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", "x_train, x_test = x_train / 255.0, x_test / 255.0\n", - "sample, sample_label = x_train[0], y_train[0]\n" + "sample, sample_label = x_train[0], y_train[0]" ] }, { @@ -572,7 +572,7 @@ "\n", "We choose `sparse_categorical_crossentropy` as the loss function for the model. The\n", "output of the model has shape of `[batch_size, 10]`. The target for the model is a\n", - "integer vector, each of the integer is in the range of 0 to 9.\n" + "integer vector, each of the integer is in the range of 0 to 9." ] }, { @@ -594,7 +594,7 @@ "\n", "model.fit(\n", " x_train, y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=1\n", - ")\n" + ")" ] }, { @@ -603,7 +603,7 @@ "colab_type": "text" }, "source": [ - "Now, let's compare to a model that does not use the CuDNN kernel:\n" + "Now, let's compare to a model that does not use the CuDNN kernel:" ] }, { @@ -623,7 +623,7 @@ ")\n", "noncudnn_model.fit(\n", " x_train, y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=1\n", - ")\n" + ")" ] }, { @@ -641,7 +641,7 @@ "The model will run on CPU by default if no GPU is available.\n", "\n", "You simply don't have to worry about the hardware you're running on anymore. Isn't that\n", - "pretty cool?\n" + "pretty cool?" ] }, { @@ -661,7 +661,7 @@ " print(\n", " \"Predicted result is: %s, target result is: %s\" % (result.numpy(), sample_label)\n", " )\n", - " plt.imshow(sample, cmap=plt.get_cmap(\"gray\"))\n" + " plt.imshow(sample, cmap=plt.get_cmap(\"gray\"))" ] }, { @@ -685,7 +685,7 @@ "`[batch, timestep, {\"location\": [x, y], \"pressure\": [force]}]`\n", "\n", "The following code provides an example of how to build a custom RNN cell that accepts\n", - "such structured inputs.\n" + "such structured inputs." ] }, { @@ -694,7 +694,7 @@ "colab_type": "text" }, "source": [ - "### Define a custom cell that support nested input/output\n" + "### Define a custom cell that support nested input/output" ] }, { @@ -704,7 +704,7 @@ }, "source": [ "See [Making new Layers & Models via subclassing](/guides/making_new_layers_and_models_via_subclassing/)\n", - "for details on writing your own layers.\n" + "for details on writing your own layers." ] }, { @@ -758,7 +758,7 @@ "\n", " def get_config(self):\n", " return {\"unit_1\": self.unit_1, \"unit_2\": unit_2, \"unit_3\": self.unit_3}\n", - "\n" + "" ] }, { @@ -770,7 +770,7 @@ "### Build a RNN model with nested input/output\n", "\n", "Let's build a Keras model that uses a `keras.layers.RNN` layer and the custom cell\n", - "we just defined.\n" + "we just defined." ] }, { @@ -802,7 +802,7 @@ "\n", "model = keras.models.Model([input_1, input_2], outputs)\n", "\n", - "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"accuracy\"])\n" + "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"accuracy\"])" ] }, { @@ -814,7 +814,7 @@ "### Train the model with randomly generated data\n", "\n", "Since there isn't a good candidate dataset for this model, we use random Numpy data for\n", - "demonstration.\n" + "demonstration." ] }, { @@ -832,7 +832,7 @@ "input_data = [input_1_data, input_2_data]\n", "target_data = [target_1_data, target_2_data]\n", "\n", - "model.fit(input_data, target_data, batch_size=batch_size)\n" + "model.fit(input_data, target_data, batch_size=batch_size)" ] }, { @@ -846,7 +846,7 @@ "will handle the sequence iteration for you. It's an incredibly powerful way to quickly\n", "prototype new kinds of RNNs (e.g. a LSTM variant).\n", "\n", - "For more details, please visit the [API docs](https://keras.io/api/layers/recurrent_layers/RNN/).\n" + "For more details, please visit the [API docs](https://keras.io/api/layers/recurrent_layers/rnn/)." ] } ], diff --git a/guides/ipynb/writing_a_training_loop_from_scratch.ipynb b/guides/ipynb/writing_a_training_loop_from_scratch.ipynb index 6d96e1cc06..0d6d62bb35 100644 --- a/guides/ipynb/writing_a_training_loop_from_scratch.ipynb +++ b/guides/ipynb/writing_a_training_loop_from_scratch.ipynb @@ -34,8 +34,7 @@ "import tensorflow as tf\n", "from tensorflow import keras\n", "from tensorflow.keras import layers\n", - "import numpy as np\n", - "" + "import numpy as np" ] }, { @@ -74,8 +73,7 @@ "instance, you can use these gradients to update these variables (which you can\n", "retrieve using `model.trainable_weights`).\n", "\n", - "Let's consider a simple MNIST model:\n", - "" + "Let's consider a simple MNIST model:" ] }, { @@ -90,8 +88,7 @@ "x1 = layers.Dense(64, activation=\"relu\")(inputs)\n", "x2 = layers.Dense(64, activation=\"relu\")(x1)\n", "outputs = layers.Dense(10, name=\"predictions\")(x2)\n", - "model = keras.Model(inputs=inputs, outputs=outputs)\n", - "" + "model = keras.Model(inputs=inputs, outputs=outputs)" ] }, { @@ -124,8 +121,7 @@ "x_train = np.reshape(x_train, (-1, 784))\n", "x_test = np.reshape(x_train, (-1, 784))\n", "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", - "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)\n", - "" + "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)" ] }, { @@ -188,8 +184,7 @@ " \"Training loss (for one batch) at step %d: %.4f\"\n", " % (step, float(loss_value))\n", " )\n", - " print(\"Seen so far: %s samples\" % ((step + 1) * 64))\n", - "" + " print(\"Seen so far: %s samples\" % ((step + 1) * 64))" ] }, { @@ -251,8 +246,7 @@ "x_train = x_train[:-10000]\n", "y_train = y_train[:-10000]\n", "val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))\n", - "val_dataset = val_dataset.batch(64)\n", - "" + "val_dataset = val_dataset.batch(64)" ] }, { @@ -313,8 +307,7 @@ " val_acc = val_acc_metric.result()\n", " val_acc_metric.reset_states()\n", " print(\"Validation acc: %.4f\" % (float(val_acc),))\n", - " print(\"Time taken: %.2fs\" % (time.time() - start_time))\n", - "" + " print(\"Time taken: %.2fs\" % (time.time() - start_time))" ] }, { @@ -357,7 +350,6 @@ " optimizer.apply_gradients(zip(grads, model.trainable_weights))\n", " train_acc_metric.update_state(y, logits)\n", " return loss_value\n", - "\n", "" ] }, @@ -383,7 +375,6 @@ "def test_step(x, y):\n", " val_logits = model(x, training=False)\n", " val_acc_metric.update_state(y, val_logits)\n", - "\n", "" ] }, @@ -437,8 +428,7 @@ " val_acc = val_acc_metric.result()\n", " val_acc_metric.reset_states()\n", " print(\"Validation acc: %.4f\" % (float(val_acc),))\n", - " print(\"Time taken: %.2fs\" % (time.time() - start_time))\n", - "" + " print(\"Time taken: %.2fs\" % (time.time() - start_time))" ] }, { @@ -466,8 +456,7 @@ "If you want to be using these loss components, you should sum them\n", "and add them to the main loss in your training step.\n", "\n", - "Consider this layer, that creates an activity regularization loss:\n", - "" + "Consider this layer, that creates an activity regularization loss:" ] }, { @@ -483,7 +472,6 @@ " def call(self, inputs):\n", " self.add_loss(1e-2 * tf.reduce_sum(inputs))\n", " return inputs\n", - "\n", "" ] }, @@ -511,8 +499,7 @@ "x = layers.Dense(64, activation=\"relu\")(x)\n", "outputs = layers.Dense(10, name=\"predictions\")(x)\n", "\n", - "model = keras.Model(inputs=inputs, outputs=outputs)\n", - "" + "model = keras.Model(inputs=inputs, outputs=outputs)" ] }, { @@ -544,7 +531,6 @@ " optimizer.apply_gradients(zip(grads, model.trainable_weights))\n", " train_acc_metric.update_state(y, logits)\n", " return loss_value\n", - "\n", "" ] }, @@ -622,8 +608,7 @@ " ],\n", " name=\"discriminator\",\n", ")\n", - "discriminator.summary()\n", - "" + "discriminator.summary()" ] }, { @@ -661,8 +646,7 @@ " layers.Conv2D(1, (7, 7), padding=\"same\", activation=\"sigmoid\"),\n", " ],\n", " name=\"generator\",\n", - ")\n", - "" + ")" ] }, { @@ -727,7 +711,6 @@ " grads = tape.gradient(g_loss, generator.trainable_weights)\n", " g_optimizer.apply_gradients(zip(grads, generator.trainable_weights))\n", " return d_loss, g_loss, generated_images\n", - "\n", "" ] }, @@ -787,8 +770,7 @@ " # To limit execution time we stop after 10 steps.\n", " # Remove the lines below to actually train the model!\n", " if step > 10:\n", - " break\n", - "" + " break" ] }, { diff --git a/guides/ipynb/writing_your_own_callbacks.ipynb b/guides/ipynb/writing_your_own_callbacks.ipynb index 3350e4d054..beaff073f4 100644 --- a/guides/ipynb/writing_your_own_callbacks.ipynb +++ b/guides/ipynb/writing_your_own_callbacks.ipynb @@ -50,8 +50,7 @@ "outputs": [], "source": [ "import tensorflow as tf\n", - "from tensorflow import keras\n", - "" + "from tensorflow import keras" ] }, { @@ -145,7 +144,6 @@ " metrics=[\"mean_absolute_error\"],\n", " )\n", " return model\n", - "\n", "" ] }, @@ -175,8 +173,7 @@ "x_train = x_train[:1000]\n", "y_train = y_train[:1000]\n", "x_test = x_test[:1000]\n", - "y_test = y_test[:1000]\n", - "" + "y_test = y_test[:1000]" ] }, { @@ -259,7 +256,6 @@ " def on_predict_batch_end(self, batch, logs=None):\n", " keys = list(logs.keys())\n", " print(\"...Predicting: end of batch {}; got log keys: {}\".format(batch, keys))\n", - "\n", "" ] }, @@ -295,8 +291,7 @@ " x_test, y_test, batch_size=128, verbose=0, callbacks=[CustomCallback()]\n", ")\n", "\n", - "res = model.predict(x_test, batch_size=128, callbacks=[CustomCallback()])\n", - "" + "res = model.predict(x_test, batch_size=128, callbacks=[CustomCallback()])" ] }, { @@ -351,8 +346,7 @@ " batch_size=128,\n", " verbose=0,\n", " callbacks=[LossAndErrorPrintingCallback()],\n", - ")\n", - "" + ")" ] }, { @@ -469,8 +463,7 @@ " epochs=30,\n", " verbose=0,\n", " callbacks=[LossAndErrorPrintingCallback(), EarlyStoppingAtMinLoss()],\n", - ")\n", - "" + ")" ] }, { @@ -552,8 +545,7 @@ " LossAndErrorPrintingCallback(),\n", " CustomLearningRateScheduler(lr_schedule),\n", " ],\n", - ")\n", - "" + ")" ] }, { diff --git a/guides/md/customizing_what_happens_in_fit.md b/guides/md/customizing_what_happens_in_fit.md index 0cb485f03f..0c9e7fb78b 100644 --- a/guides/md/customizing_what_happens_in_fit.md +++ b/guides/md/customizing_what_happens_in_fit.md @@ -40,16 +40,14 @@ models, or subclassed models. Let's see how that works. - --- ## Setup - +Requires TensorFlow 2.2 or later. ```python import tensorflow as tf from tensorflow import keras - ``` --- @@ -79,7 +77,6 @@ of the metrics that were passed in `compile()`, and we query results from `self.metrics` at the end to retrieve their current value. - ```python class CustomModel(keras.Model): @@ -104,13 +101,11 @@ class CustomModel(keras.Model): # Return a dict mapping metric names to current value return {m.name: m.result() for m in self.metrics} - ``` Let's try this out: - ```python import numpy as np @@ -124,19 +119,18 @@ model.compile(optimizer="adam", loss="mse", metrics=["mae"]) x = np.random.random((1000, 32)) y = np.random.random((1000, 1)) model.fit(x, y, epochs=3) - ```
``` Epoch 1/3 -32/32 [==============================] - 0s 2ms/step - loss: 0.3701 - mae: 0.4972 +32/32 [==============================] - 0s 539us/step - loss: 0.2468 - mae: 0.3989 Epoch 2/3 -32/32 [==============================] - 0s 2ms/step - loss: 0.2283 - mae: 0.3842 +32/32 [==============================] - 0s 417us/step - loss: 0.2288 - mae: 0.3810 Epoch 3/3 -32/32 [==============================] - 0s 2ms/step - loss: 0.2193 - mae: 0.3759 +32/32 [==============================] - 0s 394us/step - loss: 0.2171 - mae: 0.3715 - + ```
@@ -148,7 +142,6 @@ everything *manually* in `train_step`. Likewise for metrics. Here's a lower-leve example, that only uses `compile()` to configure the optimizer: - ```python mae_metric = keras.metrics.MeanAbsoluteError(name="mae") loss_tracker = keras.metrics.Mean(name="loss") @@ -188,19 +181,18 @@ model.compile(optimizer="adam") x = np.random.random((1000, 32)) y = np.random.random((1000, 1)) model.fit(x, y, epochs=3) - ```
``` Epoch 1/3 -32/32 [==============================] - 0s 2ms/step - loss: 0.3244 - mae: 0.4531 +32/32 [==============================] - 0s 551us/step - loss: 0.3190 - mae: 0.4600 Epoch 2/3 -32/32 [==============================] - 0s 2ms/step - loss: 0.2864 - mae: 0.4263 +32/32 [==============================] - 0s 484us/step - loss: 0.2653 - mae: 0.4113 Epoch 3/3 -32/32 [==============================] - 0s 2ms/step - loss: 0.2715 - mae: 0.4145 +32/32 [==============================] - 0s 403us/step - loss: 0.2458 - mae: 0.3959 - + ```
@@ -217,7 +209,6 @@ it manually if you don't rely on `compile()` for losses & metrics) - That's it. That's the list. - ```python class CustomModel(keras.Model): @@ -267,19 +258,18 @@ x = np.random.random((1000, 32)) y = np.random.random((1000, 1)) sw = np.random.random((1000, 1)) model.fit(x, y, sample_weight=sw, epochs=3) - ```
``` Epoch 1/3 -32/32 [==============================] - 0s 2ms/step - loss: 1.0058 - mae: 1.3402 +32/32 [==============================] - 0s 570us/step - loss: 0.8900 - mae: 1.2322 Epoch 2/3 -32/32 [==============================] - 0s 2ms/step - loss: 0.4708 - mae: 0.8719 +32/32 [==============================] - 0s 532us/step - loss: 0.4081 - mae: 0.7768 Epoch 3/3 -32/32 [==============================] - 0s 2ms/step - loss: 0.2220 - mae: 0.5591 +32/32 [==============================] - 0s 495us/step - loss: 0.2023 - mae: 0.5147 - + ```
@@ -290,7 +280,6 @@ What if you want to do the same for calls to `model.evaluate()`? Then you would override `test_step` in exactly the same way. Here's what it looks like: - ```python class CustomModel(keras.Model): @@ -318,14 +307,13 @@ model.compile(loss="mse", metrics=["mae"]) x = np.random.random((1000, 32)) y = np.random.random((1000, 1)) model.evaluate(x, y) - ```
``` -32/32 [==============================] - 0s 1ms/step - loss: 0.8495 - mae: 0.8096 +32/32 [==============================] - 0s 502us/step - loss: 1.4667 - mae: 1.1263 -[0.849469780921936, 0.8096422553062439] +[1.4666608572006226, 1.1262973546981812] ```
@@ -344,7 +332,6 @@ Let's consider: - ```python from tensorflow.keras import layers @@ -379,14 +366,12 @@ generator = keras.Sequential( ], name="generator", ) - ``` Here's a feature-complete GAN class, overriding `compile()` to use its own signature, and implementing the entire GAN algorithm in 17 lines in `train_step`: - ```python class GAN(keras.Model): @@ -446,13 +431,11 @@ class GAN(keras.Model): self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights)) return {"d_loss": d_loss, "g_loss": g_loss} - ``` Let's test-drive it: - ```python # Prepare the dataset. We use both the training & test MNIST digits. batch_size = 64 @@ -473,18 +456,14 @@ gan.compile( # To limit execution time, we only train on 100 batches. You can train on # the entire dataset. You will need about 20 epochs to get nice results. gan.fit(dataset.take(100), epochs=1) - ```
``` -Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz -11493376/11490434 [==============================] - 0s 0us/step -100/100 [==============================] - 1s 11ms/step - d_loss: 0.4090 - g_loss: 0.8741 +100/100 [==============================] - 53s 533ms/step - d_loss: 0.4849 - g_loss: 0.8301 - + ```
The idea behind deep learning are simple, so why should their implementation be painful? - diff --git a/guides/md/functional_api.md b/guides/md/functional_api.md index bb7f29fd4b..e6e89087bf 100644 --- a/guides/md/functional_api.md +++ b/guides/md/functional_api.md @@ -19,7 +19,6 @@ import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers - ``` --- @@ -56,7 +55,6 @@ To build this model using the functional API, start by creating an input node: ```python inputs = keras.Input(shape=(784,)) - ``` The shape of the data is set as a 784-dimensional vector. @@ -69,7 +67,6 @@ you would use: ```python # Just for demonstration purposes. img_inputs = keras.Input(shape=(32, 32, 3)) - ``` The `inputs` that is returned contains information about the shape and `dtype` @@ -79,7 +76,6 @@ Here's the shape: ```python inputs.shape - ``` @@ -96,7 +92,6 @@ Here's the dtype: ```python inputs.dtype - ``` @@ -115,7 +110,6 @@ object: ```python dense = layers.Dense(64, activation="relu") x = dense(inputs) - ``` The "layer call" action is like drawing an arrow from "inputs" to this layer @@ -128,7 +122,6 @@ Let's add a few more layers to the graph of layers: ```python x = layers.Dense(64, activation="relu")(x) outputs = layers.Dense(10)(x) - ``` At this point, you can create a `Model` by specifying its inputs and outputs @@ -137,7 +130,6 @@ in the graph of layers: ```python model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model") - ``` Let's check out what the model summary looks like: @@ -145,7 +137,6 @@ Let's check out what the model summary looks like: ```python model.summary() - ```
@@ -174,7 +165,6 @@ You can also plot the model as a graph: ```python keras.utils.plot_model(model, "my_first_model.png") - ``` @@ -190,7 +180,6 @@ in the plotted graph: ```python keras.utils.plot_model(model, "my_first_model_with_shape_info.png", show_shapes=True) - ``` @@ -234,18 +223,17 @@ history = model.fit(x_train, y_train, batch_size=64, epochs=2, validation_split= test_scores = model.evaluate(x_test, y_test, verbose=2) print("Test loss:", test_scores[0]) print("Test accuracy:", test_scores[1]) - ```
``` Epoch 1/2 -750/750 [==============================] - 1s 1ms/step - loss: 0.3486 - accuracy: 0.9025 - val_loss: 0.2256 - val_accuracy: 0.9326 +750/750 [==============================] - 1s 1ms/step - loss: 0.3528 - accuracy: 0.9005 - val_loss: 0.1883 - val_accuracy: 0.9457 Epoch 2/2 -750/750 [==============================] - 1s 1ms/step - loss: 0.1718 - accuracy: 0.9496 - val_loss: 0.1468 - val_accuracy: 0.9576 -313/313 - 0s - loss: 0.1388 - accuracy: 0.9602 -Test loss: 0.13882307708263397 -Test accuracy: 0.9602000117301941 +750/750 [==============================] - 1s 1ms/step - loss: 0.1684 - accuracy: 0.9505 - val_loss: 0.1385 - val_accuracy: 0.9597 +313/313 - 0s - loss: 0.1361 - accuracy: 0.9605 +Test loss: 0.13611680269241333 +Test accuracy: 0.9605000019073486 ```
@@ -272,7 +260,6 @@ model.save("path_to_my_model") del model # Recreate the exact same model purely from the file: model = keras.models.load_model("path_to_my_model") - ``` For details, read the model [serialization & saving]( @@ -311,7 +298,6 @@ decoder_output = layers.Conv2DTranspose(1, 3, activation="relu")(x) autoencoder = keras.Model(encoder_input, decoder_output, name="autoencoder") autoencoder.summary() - ```
@@ -422,7 +408,6 @@ encoded_img = encoder(autoencoder_input) decoded_img = decoder(encoded_img) autoencoder = keras.Model(autoencoder_input, decoded_img, name="autoencoder") autoencoder.summary() - ```
@@ -477,9 +462,9 @@ Layer (type) Output Shape Param # ================================================================= img (InputLayer) [(None, 28, 28, 1)] 0 _________________________________________________________________ -encoder (Model) (None, 16) 18672 +encoder (Functional) (None, 16) 18672 _________________________________________________________________ -decoder (Model) (None, 28, 28, 1) 9569 +decoder (Functional) (None, 28, 28, 1) 9569 ================================================================= Total params: 28,241 Trainable params: 28,241 @@ -513,7 +498,6 @@ y2 = model2(inputs) y3 = model3(inputs) outputs = layers.average([y1, y2, y3]) ensemble_model = keras.Model(inputs=inputs, outputs=outputs) - ``` --- @@ -577,7 +561,6 @@ model = keras.Model( inputs=[title_input, body_input, tags_input], outputs=[priority_pred, department_pred], ) - ``` Now plot the model: @@ -585,7 +568,6 @@ Now plot the model: ```python keras.utils.plot_model(model, "multi_input_and_output_model.png", show_shapes=True) - ``` @@ -609,7 +591,6 @@ model.compile( ], loss_weights=[1.0, 0.2], ) - ``` Since the output layers have different names, you could also specify @@ -625,7 +606,6 @@ model.compile( }, loss_weights=[1.0, 0.2], ) - ``` Train the model by passing lists of NumPy arrays of inputs and targets: @@ -647,17 +627,16 @@ model.fit( epochs=2, batch_size=32, ) - ```
``` Epoch 1/2 -40/40 [==============================] - 1s 26ms/step - loss: 1.2709 - priority_loss: 0.7003 - department_loss: 2.8529 +40/40 [==============================] - 1s 27ms/step - loss: 1.3097 - priority_loss: 0.6958 - department_loss: 3.0697 Epoch 2/2 -40/40 [==============================] - 1s 27ms/step - loss: 1.2632 - priority_loss: 0.6977 - department_loss: 2.8274 +40/40 [==============================] - 1s 27ms/step - loss: 1.2982 - priority_loss: 0.6946 - department_loss: 3.0178 - + ```
@@ -701,7 +680,6 @@ outputs = layers.Dense(10)(x) model = keras.Model(inputs, outputs, name="toy_resnet") model.summary() - ```
@@ -754,7 +732,6 @@ Plot the model: ```python keras.utils.plot_model(model, "mini_resnet.png", show_shapes=True) - ``` @@ -783,14 +760,13 @@ model.compile( # We restrict the data to the first 1000 samples so as to limit execution time # on Colab. Try to train on the entire dataset until convergence! model.fit(x_train[:1000], y_train[:1000], batch_size=64, epochs=1, validation_split=0.2) - ```
``` -13/13 [==============================] - 1s 79ms/step - loss: 2.4461 - acc: 0.0962 - val_loss: 2.2925 - val_acc: 0.1450 +13/13 [==============================] - 1s 96ms/step - loss: 2.3007 - acc: 0.0950 - val_loss: 2.2903 - val_acc: 0.1150 - + ```
@@ -825,7 +801,6 @@ text_input_b = keras.Input(shape=(None,), dtype="int32") # Reuse the same layer to encode both inputs encoded_input_a = shared_embedding(text_input_a) encoded_input_b = shared_embedding(text_input_b) - ``` --- @@ -844,7 +819,6 @@ Let's look at an example. This is a VGG19 model with weights pretrained on Image ```python vgg19 = tf.keras.applications.VGG19() - ``` And these are the intermediate activations of the model, @@ -853,7 +827,6 @@ obtained by querying the graph data structure: ```python features_list = [layer.output for layer in vgg19.layers] - ``` Use these features to create a new feature-extraction model that returns @@ -865,7 +838,6 @@ feat_extraction_model = keras.Model(inputs=vgg19.input, outputs=features_list) img = np.random.random((1, 224, 224, 3)).astype("float32") extracted_features = feat_extraction_model(img) - ``` This comes in handy for tasks like @@ -920,7 +892,6 @@ inputs = keras.Input((4,)) outputs = CustomDense(10)(inputs) model = keras.Model(inputs, outputs) - ``` For serialization support in your custom layer, define a `get_config` @@ -958,7 +929,6 @@ model = keras.Model(inputs, outputs) config = model.get_config() new_model = keras.Model.from_config(config, custom_objects={"CustomDense": CustomDense}) - ``` Optionally, implement the classmethod `from_config(cls, config)` which is used @@ -1072,7 +1042,6 @@ This is true for most deep learning architectures, but not all -- for example, recursive networks or Tree RNNs do not follow this assumption and cannot be implemented in the functional API. - --- ## Mix-and-match API styles @@ -1123,7 +1092,6 @@ class CustomRNN(layers.Layer): rnn_model = CustomRNN() _ = rnn_model(tf.zeros((1, timesteps, input_dim))) - ```
@@ -1192,5 +1160,4 @@ model = keras.Model(inputs, outputs) rnn_model = CustomRNN() _ = rnn_model(tf.zeros((1, 10, 5))) - ``` diff --git a/guides/md/making_new_layers_and_models_via_subclassing.md b/guides/md/making_new_layers_and_models_via_subclassing.md index d0d1bf586f..c02eea1d19 100644 --- a/guides/md/making_new_layers_and_models_via_subclassing.md +++ b/guides/md/making_new_layers_and_models_via_subclassing.md @@ -17,7 +17,6 @@ ```python import tensorflow as tf from tensorflow import keras - ``` --- @@ -48,7 +47,6 @@ class Linear(keras.layers.Layer): def call(self, inputs): return tf.matmul(inputs, self.w) + self.b - ``` You would use a layer by calling it on some tensor input(s), much like a Python @@ -60,14 +58,13 @@ x = tf.ones((2, 2)) linear_layer = Linear(4, 2) y = linear_layer(x) print(y) - ```
``` tf.Tensor( -[[0.04719363 0.01185325 0.08139521 0.03705199] - [0.04719363 0.01185325 0.08139521 0.03705199]], shape=(2, 4), dtype=float32) +[[ 0.01013444 -0.01070027 -0.01888977 0.05208318] + [ 0.01013444 -0.01070027 -0.01888977 0.05208318]], shape=(2, 4), dtype=float32) ```
@@ -77,7 +74,6 @@ being set as layer attributes: ```python assert linear_layer.weights == [linear_layer.w, linear_layer.b] - ``` Note you also have access to a quicker shortcut for adding weight to a layer: @@ -102,14 +98,13 @@ x = tf.ones((2, 2)) linear_layer = Linear(4, 2) y = linear_layer(x) print(y) - ```
``` tf.Tensor( -[[ 0.09742574 -0.05855173 -0.09288181 -0.06019699] - [ 0.09742574 -0.05855173 -0.09288181 -0.06019699]], shape=(2, 4), dtype=float32) +[[-0.01331179 -0.00605625 -0.01042787 0.17160884] + [-0.01331179 -0.00605625 -0.01042787 0.17160884]], shape=(2, 4), dtype=float32) ```
@@ -141,7 +136,6 @@ y = my_sum(x) print(y.numpy()) y = my_sum(x) print(y.numpy()) - ```
@@ -160,7 +154,6 @@ print("non-trainable weights:", len(my_sum.non_trainable_weights)) # It's not included in the trainable weights: print("trainable_weights:", my_sum.trainable_weights) - ```
@@ -191,7 +184,6 @@ class Linear(keras.layers.Layer): def call(self, inputs): return tf.matmul(inputs, self.w) + self.b - ``` In many cases, you may not know in advance the size of your inputs, and you @@ -222,7 +214,6 @@ class Linear(keras.layers.Layer): def call(self, inputs): return tf.matmul(inputs, self.w) + self.b - ``` The `__call__()` method of your layer will automatically run build the first time @@ -235,7 +226,6 @@ linear_layer = Linear(32) # The layer's weights are created dynamically the first time the layer is called y = linear_layer(x) - ``` --- @@ -273,7 +263,6 @@ mlp = MLPBlock() y = mlp(tf.ones(shape=(3, 64))) # The first call to the `mlp` will create the weights print("weights:", len(mlp.weights)) print("trainable weights:", len(mlp.trainable_weights)) - ```
@@ -302,7 +291,6 @@ class ActivityRegularizationLayer(keras.layers.Layer): self.add_loss(self.rate * tf.reduce_sum(inputs)) return inputs - ``` These losses (including those created by any inner layer) can be retrieved via @@ -331,7 +319,6 @@ assert len(layer.losses) == 1 # We created one loss value # `layer.losses` gets reset at the start of each __call__ _ = layer(tf.zeros(1, 1)) assert len(layer.losses) == 1 # This is the loss created during the call above - ``` In addition, the `loss` property also contains regularization losses created @@ -357,12 +344,11 @@ _ = layer(tf.zeros((1, 1))) # This is `1e-3 * sum(layer.dense.kernel ** 2)`, # created by the `kernel_regularizer` above. print(layer.losses) - ```
``` -[] +[] ```
@@ -411,15 +397,14 @@ model.fit(np.random.random((2, 3)), np.random.random((2, 3))) # call during the forward pass! model.compile(optimizer="adam") model.fit(np.random.random((2, 3)), np.random.random((2, 3))) - ```
``` -1/1 [==============================] - 0s 658us/step - loss: 0.1063 -1/1 [==============================] - 0s 873us/step - loss: 0.0202 +1/1 [==============================] - 0s 1ms/step - loss: 0.1555 +1/1 [==============================] - 0s 927us/step - loss: 0.0336 - + ```
@@ -457,7 +442,6 @@ class LogisticEndpoint(keras.layers.Layer): # Return the inference-time prediction tensor (for `.predict()`). return tf.nn.softmax(logits) - ``` Metrics tracked in this way are accessible via `layer.metrics`: @@ -472,12 +456,11 @@ y = layer(targets, logits) print("layer.metrics:", layer.metrics) print("current accuracy value:", float(layer.metrics[0].result())) - ```
``` -layer.metrics: [] +layer.metrics: [] current accuracy value: 1.0 ``` @@ -499,14 +482,13 @@ data = { "targets": np.random.random((3, 10)), } model.fit(data) - ```
``` -1/1 [==============================] - 0s 806us/step - loss: 1.0874 - binary_accuracy: 0.0000e+00 +1/1 [==============================] - 0s 999us/step - loss: 1.0366 - binary_accuracy: 0.0000e+00 - + ```
@@ -547,7 +529,6 @@ layer = Linear(64) config = layer.get_config() print(config) new_layer = Linear.from_config(config) - ```
@@ -592,7 +573,6 @@ layer = Linear(64) config = layer.get_config() print(config) new_layer = Linear.from_config(config) - ```
@@ -638,7 +618,6 @@ class CustomDropout(keras.layers.Layer): return tf.nn.dropout(inputs, rate=self.rate) return inputs - ``` --- @@ -810,7 +789,6 @@ class VariationalAutoEncoder(keras.Model): self.add_loss(kl_loss) return reconstructed - ``` Let's write a simple training loop on MNIST: @@ -852,27 +830,26 @@ for epoch in range(epochs): if step % 100 == 0: print("step %d: mean loss = %.4f" % (step, loss_metric.result())) - ```
``` Start of epoch 0 -step 0: mean loss = 0.3500 -step 100: mean loss = 0.1260 -step 200: mean loss = 0.0993 +step 0: mean loss = 0.3577 +step 100: mean loss = 0.1258 +step 200: mean loss = 0.0994 step 300: mean loss = 0.0893 step 400: mean loss = 0.0843 -step 500: mean loss = 0.0810 +step 500: mean loss = 0.0809 step 600: mean loss = 0.0788 step 700: mean loss = 0.0772 step 800: mean loss = 0.0760 step 900: mean loss = 0.0750 Start of epoch 1 step 0: mean loss = 0.0747 -step 100: mean loss = 0.0741 -step 200: mean loss = 0.0736 -step 300: mean loss = 0.0731 +step 100: mean loss = 0.0740 +step 200: mean loss = 0.0735 +step 300: mean loss = 0.0730 step 400: mean loss = 0.0727 step 500: mean loss = 0.0723 step 600: mean loss = 0.0720 @@ -893,17 +870,16 @@ optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) vae.compile(optimizer, loss=tf.keras.losses.MeanSquaredError()) vae.fit(x_train, x_train, epochs=2, batch_size=64) - ```
``` Epoch 1/2 -938/938 [==============================] - 1s 1ms/step - loss: 0.0746 +938/938 [==============================] - 1s 1ms/step - loss: 0.0745 Epoch 2/2 938/938 [==============================] - 1s 1ms/step - loss: 0.0676 - + ```
@@ -950,19 +926,18 @@ vae.add_loss(kl_loss) optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) vae.compile(optimizer, loss=tf.keras.losses.MeanSquaredError()) vae.fit(x_train, x_train, epochs=3, batch_size=64) - ```
``` Epoch 1/3 -938/938 [==============================] - 1s 1ms/step - loss: 0.0749 +938/938 [==============================] - 1s 1ms/step - loss: 0.0747 Epoch 2/3 938/938 [==============================] - 1s 1ms/step - loss: 0.0676 Epoch 3/3 -938/938 [==============================] - 1s 1ms/step - loss: 0.0675 +938/938 [==============================] - 1s 1ms/step - loss: 0.0676 - + ```
diff --git a/guides/md/sequential_model.md b/guides/md/sequential_model.md index 7a9c0748f5..c43a4a4425 100644 --- a/guides/md/sequential_model.md +++ b/guides/md/sequential_model.md @@ -14,12 +14,10 @@ ## Setup - ```python import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers - ``` --- @@ -31,7 +29,6 @@ where each layer has **exactly one input tensor and one output tensor**. Schematically, the following `Sequential` model: - ```python # Define Sequential model with 3 layers model = keras.Sequential( @@ -44,13 +41,11 @@ model = keras.Sequential( # Call model on a test input x = tf.ones((3, 3)) y = model(x) - ``` is equivalent to this function: - ```python # Create 3 layers layer1 = layers.Dense(2, activation="relu", name="layer1") @@ -60,7 +55,6 @@ layer3 = layers.Dense(4, name="layer3") # Call layers on a test input x = tf.ones((3, 3)) y = layer3(layer2(layer1(x))) - ``` A Sequential model is **not appropriate** when: @@ -71,7 +65,6 @@ A Sequential model is **not appropriate** when: - You want non-linear topology (e.g. a residual connection, a multi-branch model) - --- ## Creating a Sequential model @@ -79,7 +72,6 @@ You can create a Sequential model by passing a list of layers to the Sequential constructor: - ```python model = keras.Sequential( [ @@ -88,16 +80,13 @@ model = keras.Sequential( layers.Dense(4), ] ) - ``` Its layers are accessible via the `layers` attribute: - ```python model.layers - ``` @@ -105,33 +94,29 @@ model.layers
``` -[, - , - ] +[, + , + ] ```
You can also create a Sequential model incrementally via the `add()` method: - ```python model = keras.Sequential() model.add(layers.Dense(2, activation="relu")) model.add(layers.Dense(3, activation="relu")) model.add(layers.Dense(4)) - ``` Note that there's also a corresponding `pop()` method to remove layers: a Sequential model behaves very much like a list of layers. - ```python model.pop() print(len(model.layers)) # 2 - ```
@@ -145,13 +130,11 @@ any layer or model in Keras. This is useful to annotate TensorBoard graphs with semantically meaningful names. - ```python model = keras.Sequential(name="my_sequential") model.add(layers.Dense(2, activation="relu", name="layer1")) model.add(layers.Dense(3, activation="relu", name="layer2")) model.add(layers.Dense(4, name="layer3")) - ``` --- @@ -162,11 +145,9 @@ in order to be able to create their weights. So when you create a layer like this, initially, it has no weights: - ```python layer = layers.Dense(3) layer.weights # Empty - ``` @@ -182,13 +163,11 @@ It creates its weights the first time it is called on an input, since the shape of the weights depends on the shape of the inputs: - ```python # Call layer on a test input x = tf.ones((1, 4)) y = layer(x) layer.weights # Now it has weights, of shape (4, 3) and (3,) - ``` @@ -197,10 +176,10 @@ layer.weights # Now it has weights, of shape (4, 3) and (3,)
``` [, + array([[ 0.47175038, 0.0916599 , -0.7113838 ], + [ 0.4508165 , 0.80212307, 0.54930305], + [ 0.47127366, 0.77359426, 0.6605067 ], + [ 0.28070033, 0.01403308, -0.62135905]], dtype=float32)>, ] ``` @@ -212,7 +191,6 @@ Sequential model without an input shape, it isn't "built": it has no weights when the model first sees some input data: - ```python model = keras.Sequential( [ @@ -232,7 +210,6 @@ model = keras.Sequential( x = tf.ones((1, 4)) y = model(x) print("Number of weights after calling the model:", len(model.weights)) # 6 - ```
@@ -245,10 +222,8 @@ Once a model is "built", you can call its `summary()` method to display its contents: - ```python model.summary() - ```
@@ -276,14 +251,12 @@ output shape. In this case, you should start your model by passing an `Input` object to your model, so that it knows its input shape from the start: - ```python model = keras.Sequential() model.add(keras.Input(shape=(4,))) model.add(layers.Dense(2, activation="relu")) model.summary() - ```
@@ -305,10 +278,8 @@ Note that the `Input` object is not displayed as part of `model.layers`, since it isn't a layer: - ```python model.layers - ``` @@ -316,7 +287,7 @@ model.layers
``` -[] +[] ```
@@ -324,13 +295,11 @@ A simple alternative is to just pass an `input_shape` argument to your first layer: - ```python model = keras.Sequential() model.add(layers.Dense(2, activation="relu", input_shape=(4,))) model.summary() - ```
@@ -354,7 +323,6 @@ before seeing any data) and always have a defined output shape. In general, it's a recommended best practice to always specify the input shape of a Sequential model in advance if you know what it is. - --- ## A common debugging workflow: `add()` + `summary()` @@ -364,7 +332,6 @@ enables you to monitor how a stack of `Conv2D` and `MaxPooling2D` layers is downsampling image feature maps: - ```python model = keras.Sequential() model.add(keras.Input(shape=(250, 250, 3))) # 250x250 RGB images @@ -393,7 +360,6 @@ model.add(layers.GlobalMaxPooling2D()) # Finally, we add a classification layer. model.add(layers.Dense(10)) - ```
@@ -444,7 +410,6 @@ _________________________________________________________________ Very practical, right? - --- ## What to do once you have a model @@ -458,7 +423,6 @@ Once your model architecture is ready, you will want to: - Speed up model training by leveraging multiple GPUs. See our [guide to multi-GPU and distributed training](distributed_training). - --- ## Feature extraction with a Sequential model @@ -470,7 +434,6 @@ creating a model that extracts the outputs of all intermediate layers in a Sequential model: - ```python initial_model = keras.Sequential( [ @@ -488,13 +451,11 @@ feature_extractor = keras.Model( # Call feature extractor on test input. x = tf.ones((1, 250, 250, 3)) features = feature_extractor(x) - ``` Here's a similar example that only extract features from one layer: - ```python initial_model = keras.Sequential( [ @@ -511,7 +472,6 @@ feature_extractor = keras.Model( # Call feature extractor on test input. x = tf.ones((1, 250, 250, 3)) features = feature_extractor(x) - ``` --- @@ -576,7 +536,6 @@ model.fit(...) If you do transfer learning, you will probably find yourself frequently using these two patterns. - That's about all you need to know about Sequential models! To find out more about building models in Keras, see: @@ -584,4 +543,3 @@ To find out more about building models in Keras, see: - [Guide to the Functional API](/guides/functional_api/) - [Guide to making new Layers & Models via subclassing]( /guides/making_new_layers_and_models_via_subclassing/) - diff --git a/guides/md/serialization_and_saving.md b/guides/md/serialization_and_saving.md index d84911f42b..5c482bdffc 100644 --- a/guides/md/serialization_and_saving.md +++ b/guides/md/serialization_and_saving.md @@ -33,7 +33,6 @@ or to only selectively save some of them: Let's take a look at each of these options: when would you use one or the other? How do they work? - --- ## The short answer to saving & loading @@ -55,17 +54,14 @@ model = keras.models.load_model('path/to/location') Now, let's look at the details. - --- ## Setup - ```python import numpy as np import tensorflow as tf from tensorflow import keras - ``` --- @@ -93,13 +89,11 @@ You can switch to the H5 format by: - Passing `format='h5'` to `save()`. - Passing a filename that ends in `.h5` or `.keras` to `save()`. - ### SavedModel format **Example:** - ```python def get_model(): @@ -132,19 +126,26 @@ np.testing.assert_allclose( # The reconstructed model is already compiled and has retained the optimizer # state, so training can resume: reconstructed_model.fit(test_input, test_target) - ```
``` -4/4 [==============================] - 0s 978us/step - loss: 0.2587 -WARNING:tensorflow:From /opt/conda/lib/python3.7/site-packages/tensorflow/python/training/tracking/tracking.py:105: Network.state_updates (from tensorflow.python.keras.engine.network) is deprecated and will be removed in a future version. +4/4 [==============================] - 0s 670us/step - loss: 1.3573 + +WARNING: Logging before flag parsing goes to stderr. +W0611 15:19:08.423139 4624426432 deprecation.py:323] From /usr/local/lib/python3.7/site-packages/tensorflow/python/keras/backend.py:467: set_learning_phase (from tensorflow.python.keras.backend) is deprecated and will be removed after 2020-10-11. +Instructions for updating: +Simply pass a True/False value to the `training` argument of the `__call__` method of your layer or model. +W0611 15:19:08.460778 4624426432 deprecation.py:323] From /usr/local/lib/python3.7/site-packages/tensorflow/python/training/tracking/tracking.py:105: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version. Instructions for updating: This property should not be used in TensorFlow 2.0, as updates are applied automatically. -INFO:tensorflow:Assets written to: my_model/assets -4/4 [==============================] - 0s 998us/step - loss: 0.2538 +W0611 15:19:08.463594 4624426432 deprecation.py:323] From /usr/local/lib/python3.7/site-packages/tensorflow/python/training/tracking/tracking.py:105: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +4/4 [==============================] - 0s 705us/step - loss: 1.2203 - + ```
@@ -154,15 +155,13 @@ Calling `model.save('my_model')` creates a folder named `my_model`, containing the following: - ```python !ls my_model - ```
``` -assets saved_model.pb variables +assets saved_model.pb variables ```
@@ -195,7 +194,6 @@ Below is an example of what happens when loading custom layers from he SavedModel format **without** overwriting the config methods. - ```python class CustomModel(keras.Model): @@ -225,22 +223,20 @@ np.testing.assert_allclose(loaded(input_arr), outputs) print("Original model:", model) print("Loaded model:", loaded) - ```
``` -INFO:tensorflow:Assets written to: my_model/assets -WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually. -Original model: <__main__.CustomModel object at 0x7f92d80e9910> -Loaded model: +W0611 15:19:09.533702 4624426432 load.py:128] No training configuration found in save file, so the model was *not* compiled. Compile it manually. + +Original model: <__main__.CustomModel object at 0x14b574b90> +Loaded model: ```
As seen in the example above, the loader dynamically creates a new model class that acts like the original model. - ### Keras H5 format Keras also supports saving a single HDF5 file containing the model's architecture, @@ -250,7 +246,6 @@ It is a light-weight alternative to SavedModel. **Example:** - ```python model = get_model() @@ -273,15 +268,14 @@ np.testing.assert_allclose( # The reconstructed model is already compiled and has retained the optimizer # state, so training can resume: reconstructed_model.fit(test_input, test_target) - ```
``` -4/4 [==============================] - 0s 911us/step - loss: 0.8166 -4/4 [==============================] - 0s 958us/step - loss: 0.7211 +4/4 [==============================] - 0s 650us/step - loss: 0.3729 +4/4 [==============================] - 0s 599us/step - loss: 0.3210 - + ```
@@ -303,7 +297,6 @@ to the Python classes/functions of these objects in order to reconstruct the mod See [Custom objects](save_and_serialize.ipynb#custom-objects). - --- ## Saving the architecture @@ -315,7 +308,6 @@ and no compilation information. *Note this only applies to models defined using the functional or Sequential apis not subclassed models. - ### Configuration of a Sequential model or Functional API model These types of models are explicit graphs of layers: their configuration @@ -326,7 +318,6 @@ is always available in a structured form. - `get_config()` and `from_config()` - `tf.keras.models.model_to_json()` and `tf.keras.models.model_from_json()` - #### `get_config()` and `from_config()` Calling `config = model.get_config()` will return a Python dict containing @@ -339,36 +330,30 @@ The same workflow also works for any serializable layer. **Layer example:** - ```python layer = keras.layers.Dense(3, activation="relu") layer_config = layer.get_config() new_layer = keras.layers.Dense.from_config(layer_config) - ``` **Sequential model example:** - ```python model = keras.Sequential([keras.Input((32,)), keras.layers.Dense(1)]) config = model.get_config() new_model = keras.Sequential.from_config(config) - ``` **Functional model example:** - ```python inputs = keras.Input((32,)) outputs = keras.layers.Dense(1)(inputs) model = keras.Model(inputs, outputs) config = model.get_config() new_model = keras.Model.from_config(config) - ``` #### `to_json()` and `tf.keras.models.model_from_json()` @@ -380,12 +365,10 @@ It is also specific to models, it isn't meant for layers. **Example:** - ```python model = keras.Sequential([keras.Input((32,)), keras.layers.Dense(1)]) json_config = model.to_json() new_model = keras.models.model_from_json(json_config) - ``` ### Custom objects @@ -415,21 +398,13 @@ do so, you won't need to provide any `custom_objects`. You can do so like this: - ```python model.save("my_model") tensorflow_graph = tf.saved_model.load("my_model") x = np.random.uniform(size=(4, 32)).astype(np.float32) predicted = tensorflow_graph(x).numpy() - ``` -
-``` -INFO:tensorflow:Assets written to: my_model/assets - -``` -
Note that this method has several drawbacks: * For traceability reasons, you should always have access to the custom objects that were used. You wouldn't want to put in production a model @@ -444,7 +419,6 @@ loading the model with `tf.keras.models.load_model()`. You can find out more in the [page about `tf.saved_model.load`](https://www.tensorflow.org/api_docs/python/tf/saved_model/load) - #### Defining the config methods Specifications: @@ -458,7 +432,6 @@ The default implementation returns `cls(**config)`. **Example:** - ```python class CustomLayer(keras.layers.Layer): @@ -488,7 +461,6 @@ serialized_layer = keras.layers.serialize(layer) new_layer = keras.layers.deserialize( serialized_layer, custom_objects={"CustomLayer": CustomLayer} ) - ``` #### Registering the custom object @@ -511,11 +483,9 @@ in section above "Defining the config methods") 2. `tf.keras.utils.custom_object_scope` or `tf.keras.utils.CustomObjectScope` 3. `tf.keras.utils.register_keras_serializable` - #### Custom layer and function example - ```python class CustomLayer(keras.layers.Layer): @@ -559,7 +529,6 @@ config = model.get_config() custom_objects = {"CustomLayer": CustomLayer, "custom_activation": custom_activation} with keras.utils.custom_object_scope(custom_objects): new_model = keras.Model.from_config(config) - ``` ### In-memory model cloning @@ -571,11 +540,9 @@ This is equivalent to getting the config then recreating the model from its conf **Example:** - ```python with keras.utils.custom_object_scope(custom_objects): new_model = keras.models.clone_model(model) - ``` --- @@ -589,7 +556,6 @@ restart training, so you don't need the compilation information or optimizer sta reusing the state of a prior model, so you don't need the compilation information of the prior model. - ### APIs for in-memory weight transfer Weights can be copied between different objects by using `get_weights` @@ -605,7 +571,6 @@ Examples below. ***Transfering weights from one layer to another, in memory*** - ```python def create_layer(): @@ -619,14 +584,12 @@ layer_2 = create_layer() # Copy weights from layer 2 to layer 1 layer_2.set_weights(layer_1.get_weights()) - ``` ***Transfering weights from one model to another model with a compatible architecture, in memory*** - ```python # Create a simple functional model inputs = keras.Input(shape=(784,), name="digits") @@ -664,7 +627,6 @@ subclassed_model.set_weights(functional_model.get_weights()) assert len(functional_model.weights) == len(subclassed_model.weights) for a, b in zip(functional_model.weights, subclassed_model.weights): np.testing.assert_allclose(a.numpy(), b.numpy()) - ``` ***The case of stateless layers*** @@ -674,7 +636,6 @@ models can have compatible architectures even if there are extra/missing stateless layers. - ```python inputs = keras.Input(shape=(784,), name="digits") x = keras.layers.Dense(64, activation="relu", name="dense_1")(inputs) @@ -694,7 +655,6 @@ functional_model_with_dropout = keras.Model( ) functional_model_with_dropout.set_weights(functional_model.get_weights()) - ``` ### APIs for saving weights to disk & loading them back @@ -716,13 +676,11 @@ checkpoint unless `save_format` is set. There is also an option of retrieving weights as in-memory numpy arrays. Each API has their pros and cons which are detailed below . - ### TF Checkpoint format **Example:** - ```python # Runnable example sequential_model = keras.Sequential( @@ -740,7 +698,6 @@ load_status = sequential_model.load_weights("ckpt") # restored from the checkpoint. See `tf.train.Checkpoint.restore` for other # methods in the Status object. load_status.assert_consumed() - ``` @@ -748,7 +705,7 @@ load_status.assert_consumed()
``` - + ```
@@ -767,7 +724,6 @@ not the name of the variable**. Consider the `CustomLayer` in the example below. The variable `CustomLayer.var` is saved with `"var"` as part of key, not `"var_a"`. - ```python class CustomLayer(keras.layers.Layer): @@ -781,7 +737,6 @@ layer_ckpt = tf.train.Checkpoint(layer=layer).save("custom_layer") ckpt_reader = tf.train.load_checkpoint(layer_ckpt) ckpt_reader.get_variable_to_dtype_map() - ``` @@ -790,8 +745,8 @@ ckpt_reader.get_variable_to_dtype_map()
``` {'save_counter/.ATTRIBUTES/VARIABLE_VALUE': tf.int64, - '_CHECKPOINTABLE_OBJECT_GRAPH': tf.string, - 'layer/var/.ATTRIBUTES/VARIABLE_VALUE': tf.int32} + 'layer/var/.ATTRIBUTES/VARIABLE_VALUE': tf.int32, + '_CHECKPOINTABLE_OBJECT_GRAPH': tf.string} ```
@@ -803,7 +758,6 @@ they are able to share the same checkpoint. **Example:** - ```python inputs = keras.Input(shape=(784,), name="digits") x = keras.layers.Dense(64, activation="relu", name="dense_1")(inputs) @@ -858,7 +812,6 @@ pretrained_model.load_weights("pretrained_ckpt") # but will *not* work as expected. If you inspect the weights, you'll see that # none of the weights will have loaded. `pretrained_model.load_weights()` is the # correct method to call. - ```
@@ -903,7 +856,7 @@ Model: "sequential_3" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= -pretrained (Model) (None, 64) 54400 +pretrained (Functional) (None, 64) 54400 _________________________________________________________________ predictions (Dense) (None, 5) 325 ================================================================= @@ -912,7 +865,7 @@ Trainable params: 54,725 Non-trainable params: 0 _________________________________________________________________ - + ```
@@ -921,7 +874,6 @@ switch between Sequential and Functional, or Functional and subclassed, etc., then always rebuild the pre-trained model and load the pre-trained weights to that model. - The next question is, how can weights be saved and loaded to different models if the model architectures are quite different? The solution is to use `tf.train.Checkpoint` to save and restore the exact layers/variables. @@ -929,7 +881,6 @@ The solution is to use `tf.train.Checkpoint` to save and restore the exact layer **Example:** - ```python # Create a subclassed model that essentially uses functional_model's first # and last layers. @@ -961,16 +912,15 @@ _ = model(tf.ones((1, 784))) tf.train.Checkpoint( dense=model.first_dense, kernel=model.kernel, bias=model.bias ).restore(ckpt_path).assert_consumed() - ```
``` -WARNING:tensorflow:From :15: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. +W0611 15:19:10.748379 4624426432 deprecation.py:323] From :15: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.add_weight` method instead. - + ```
@@ -985,7 +935,6 @@ statuses as saved in the checkpoint. **Example:** - ```python # Runnable example sequential_model = keras.Sequential( @@ -998,14 +947,12 @@ sequential_model = keras.Sequential( ) sequential_model.save_weights("weights.h5") sequential_model.load_weights("weights.h5") - ``` Note that changing `layer.trainable` may result in a different `layer.weights` ordering when the model contains nested layers. - ```python class NestedDenseLayer(keras.layers.Layer): @@ -1028,7 +975,6 @@ nested_model.get_layer("nested").dense_1.trainable = False variable_names_2 = [v.name for v in nested_model.weights] print("\nvariables: {}".format(variable_names_2)) print("variable ordering changed:", variable_names != variable_names_2) - ```
@@ -1059,7 +1005,6 @@ the desired weights/layers into a new model. **Example:** - ```python def create_functional_model(): @@ -1082,7 +1027,6 @@ extracted_layers = pretrained_model.layers[:-1] extracted_layers.append(keras.layers.Dense(5, name="dense_3")) model = keras.Sequential(extracted_layers) model.summary() - ```
diff --git a/guides/md/training_with_built_in_methods.md b/guides/md/training_with_built_in_methods.md index 453d4a8c63..8aa47e5ee7 100644 --- a/guides/md/training_with_built_in_methods.md +++ b/guides/md/training_with_built_in_methods.md @@ -14,12 +14,10 @@ ## Setup - ```python import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers - ``` --- @@ -45,7 +43,6 @@ scratch via model subclassing. This guide doesn't cover distributed training. For distributed training, see our [guide to multi-gpu & distributed training](/guides/distributed_training/). - --- ## API overview: a first end-to-end example @@ -58,7 +55,6 @@ Let's consider the following model (here, we build in with the Functional API, b could be a Sequential model or a subclassed model as well): - ```python inputs = keras.Input(shape=(784,), name="digits") x = layers.Dense(64, activation="relu", name="dense_1")(inputs) @@ -66,7 +62,6 @@ x = layers.Dense(64, activation="relu", name="dense_2")(x) outputs = layers.Dense(10, activation="softmax", name="predictions")(x) model = keras.Model(inputs=inputs, outputs=outputs) - ``` Here's what the typical end-to-end workflow looks like, consisting of: @@ -78,7 +73,6 @@ Here's what the typical end-to-end workflow looks like, consisting of: We'll use MNIST data for this example. - ```python (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() @@ -94,13 +88,11 @@ x_val = x_train[-10000:] y_val = y_train[-10000:] x_train = x_train[:-10000] y_train = y_train[:-10000] - ``` We specify the training configuration (optimizer, loss, metrics): - ```python model.compile( optimizer=keras.optimizers.RMSprop(), # Optimizer @@ -109,7 +101,6 @@ model.compile( # List of metrics to monitor metrics=[keras.metrics.SparseCategoricalAccuracy()], ) - ``` We call `fit()`, which will train the model by slicing the data into "batches" of size @@ -117,7 +108,6 @@ We call `fit()`, which will train the model by slicing the data into "batches" o "epochs". - ```python print("Fit model on training data") history = model.fit( @@ -130,16 +120,15 @@ history = model.fit( # at the end of each epoch validation_data=(x_val, y_val), ) - ```
``` Fit model on training data Epoch 1/2 -782/782 [==============================] - 1s 1ms/step - loss: 0.3433 - sparse_categorical_accuracy: 0.9013 - val_loss: 0.2094 - val_sparse_categorical_accuracy: 0.9370 +782/782 [==============================] - 1s 955us/step - loss: 0.3362 - sparse_categorical_accuracy: 0.9036 - val_loss: 0.1712 - val_sparse_categorical_accuracy: 0.9511 Epoch 2/2 -782/782 [==============================] - 1s 1ms/step - loss: 0.1594 - sparse_categorical_accuracy: 0.9520 - val_loss: 0.1372 - val_sparse_categorical_accuracy: 0.9586 +782/782 [==============================] - 1s 864us/step - loss: 0.1575 - sparse_categorical_accuracy: 0.9523 - val_loss: 0.1293 - val_sparse_categorical_accuracy: 0.9632 ```
@@ -147,10 +136,8 @@ The returned "history" object holds a record of the loss values and metric value during training: - ```python history.history - ``` @@ -158,17 +145,16 @@ history.history
``` -{'loss': [0.34325557947158813, 0.15936172008514404], - 'sparse_categorical_accuracy': [0.9013199806213379, 0.9520000219345093], - 'val_loss': [0.2094312459230423, 0.13722778856754303], - 'val_sparse_categorical_accuracy': [0.9369999766349792, 0.9585999846458435]} +{'loss': [0.33624476194381714, 0.1574954241514206], + 'sparse_categorical_accuracy': [0.9035999774932861, 0.9523000121116638], + 'val_loss': [0.17115569114685059, 0.12931881844997406], + 'val_sparse_categorical_accuracy': [0.9510999917984009, 0.9631999731063843]} ```
We evaluate the model on the test data via `evaluate()`: - ```python # Evaluate the model on the test data using `evaluate` print("Evaluate on test data") @@ -180,14 +166,13 @@ print("test loss, test acc:", results) print("Generate predictions for 3 samples") predictions = model.predict(x_test[:3]) print("predictions shape:", predictions.shape) - ```
``` Evaluate on test data -79/79 [==============================] - 0s 780us/step - loss: 0.1357 - sparse_categorical_accuracy: 0.9581 -test loss, test acc: [0.13572055101394653, 0.9581000208854675] +79/79 [==============================] - 0s 734us/step - loss: 0.1304 - sparse_categorical_accuracy: 0.9611 +test loss, test acc: [0.1304282695055008, 0.9610999822616577] Generate predictions for 3 samples predictions shape: (3, 10) @@ -195,7 +180,6 @@ predictions shape: (3, 10)
Now, let's review each piece of this workflow in detail. - --- ## The `compile()` method: specifying a loss, metrics, and an optimizer @@ -205,14 +189,12 @@ optionally, some metrics to monitor. You pass these to the model as arguments to the `compile()` method: - ```python model.compile( optimizer=keras.optimizers.RMSprop(learning_rate=1e-3), loss=keras.losses.SparseCategoricalCrossentropy(), metrics=[keras.metrics.SparseCategoricalAccuracy()], ) - ``` The `metrics` argument should be a list -- your model can have any number of metrics. @@ -226,21 +208,18 @@ Note that if you're satisfied with the default settings, in many cases the optim loss, and metrics can be specified via string identifiers as a shortcut: - ```python model.compile( optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["sparse_categorical_accuracy"], ) - ``` For later reuse, let's put our model definition and compile step in functions; we will call them several times across different examples in this guide. - ```python def get_uncompiled_model(): @@ -261,7 +240,6 @@ def get_compiled_model(): ) return model - ``` ### Many built-in optimizers, losses, and metrics are available @@ -290,7 +268,6 @@ Metrics: - `Recall()` - etc. - ### Custom losses There are two ways to provide custom losses with Keras. The first example creates a @@ -299,7 +276,6 @@ function that computes the mean squared error between the real data and the predictions: - ```python def custom_mean_squared_error(y_true, y_pred): @@ -312,14 +288,13 @@ model.compile(optimizer=keras.optimizers.Adam(), loss=custom_mean_squared_error) # We need to one-hot encode the labels to use MSE y_train_one_hot = tf.one_hot(y_train, depth=10) model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1) - ```
``` -782/782 [==============================] - 1s 735us/step - loss: 0.0155 +782/782 [==============================] - 1s 864us/step - loss: 0.0163 - + ```
@@ -339,7 +314,6 @@ reduce overfitting (we won't know if it works until we try!). Here's how you would do it: - ```python class CustomMSE(keras.losses.Loss): @@ -358,14 +332,13 @@ model.compile(optimizer=keras.optimizers.Adam(), loss=CustomMSE()) y_train_one_hot = tf.one_hot(y_train, depth=10) model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1) - ```
``` -782/782 [==============================] - 1s 769us/step - loss: 0.0390 +782/782 [==============================] - 1s 719us/step - loss: 0.0381 - + ```
@@ -389,7 +362,6 @@ Here's a simple example showing how to implement a `CategoricalTruePositives` me that counts how many samples were correctly classified as belonging to a given class: - ```python class CategoricalTruePositives(keras.metrics.Metric): @@ -421,19 +393,18 @@ model.compile( metrics=[CategoricalTruePositives()], ) model.fit(x_train, y_train, batch_size=64, epochs=3) - ```
``` Epoch 1/3 -782/782 [==============================] - 1s 792us/step - loss: 0.3466 - categorical_true_positives: 45080.0000 +782/782 [==============================] - 1s 693us/step - loss: 0.3570 - categorical_true_positives: 44933.0000 Epoch 2/3 -782/782 [==============================] - 1s 788us/step - loss: 0.1646 - categorical_true_positives: 47577.0000 +782/782 [==============================] - 1s 682us/step - loss: 0.1645 - categorical_true_positives: 47592.0000 Epoch 3/3 -782/782 [==============================] - 1s 794us/step - loss: 0.1203 - categorical_true_positives: 48168.0000 +782/782 [==============================] - 1s 805us/step - loss: 0.1214 - categorical_true_positives: 48162.0000 - + ```
@@ -451,7 +422,6 @@ regularization (note that activity regularization is built-in in all Keras layer this layer is just for the sake of providing a concrete example): - ```python class ActivityRegularizationLayer(layers.Layer): @@ -478,21 +448,19 @@ model.compile( # The displayed loss will be much higher than before # due to the regularization component. model.fit(x_train, y_train, batch_size=64, epochs=1) - ```
``` -782/782 [==============================] - 1s 840us/step - loss: 2.4625 +782/782 [==============================] - 1s 684us/step - loss: 2.5670 - + ```
You can do the same for logging metric values, using `add_metric()`: - ```python class MetricLoggingLayer(layers.Layer): @@ -522,14 +490,13 @@ model.compile( loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), ) model.fit(x_train, y_train, batch_size=64, epochs=1) - ```
``` -782/782 [==============================] - 1s 1ms/step - loss: 0.3294 - std_of_activation: 0.9851 +782/782 [==============================] - 1s 680us/step - loss: 0.3427 - std_of_activation: 0.9531 - + ```
@@ -540,7 +507,6 @@ or `model.add_metric(metric_tensor, name, aggregation)`. Here's a simple example: - ```python inputs = keras.Input(shape=(784,), name="digits") x1 = layers.Dense(64, activation="relu", name="dense_1")(inputs) @@ -557,14 +523,13 @@ model.compile( loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), ) model.fit(x_train, y_train, batch_size=64, epochs=1) - ```
``` -782/782 [==============================] - 1s 978us/step - loss: 2.4453 - std_of_activation: 0.0017 +782/782 [==============================] - 1s 719us/step - loss: 2.4683 - std_of_activation: 0.0018 - + ```
@@ -576,7 +541,6 @@ targets & logits, and it tracks a crossentropy loss via `add_loss()`. It also tracks classification accuracy via `add_metric()`. - ```python class LogisticEndpoint(keras.layers.Layer): @@ -599,14 +563,12 @@ class LogisticEndpoint(keras.layers.Layer): # Return the inference-time prediction tensor (for `.predict()`). return tf.nn.softmax(logits) - ``` You can use it in a model with two inputs (input data & targets), compiled without a `loss` argument, like this: - ```python import numpy as np @@ -623,21 +585,19 @@ data = { "targets": np.random.random((3, 10)), } model.fit(data) - ```
``` -1/1 [==============================] - 0s 834us/step - loss: 1.1664 - binary_accuracy: 0.0000e+00 +1/1 [==============================] - 0s 1ms/step - loss: 1.0048 - binary_accuracy: 0.0000e+00 - + ```
For more information about training multi-input models, see the section **Passing data to multi-input, multi-output models**. - ### Automatically setting apart a validation holdout set In the first end-to-end example you saw, we used the `validation_data` argument to pass @@ -657,18 +617,16 @@ received by the fit call, before any shuffling. Note that you can only use `validation_split` when training with NumPy data. - ```python model = get_compiled_model() model.fit(x_train, y_train, batch_size=64, validation_split=0.2, epochs=1) - ```
``` -625/625 [==============================] - 1s 1ms/step - loss: 0.3691 - sparse_categorical_accuracy: 0.8949 - val_loss: 0.2339 - val_sparse_categorical_accuracy: 0.9290 +625/625 [==============================] - 1s 939us/step - loss: 0.3692 - sparse_categorical_accuracy: 0.8970 - val_loss: 0.2397 - val_sparse_categorical_accuracy: 0.9279 - + ```
@@ -692,7 +650,6 @@ You can pass a `Dataset` instance directly to the methods `fit()`, `evaluate()`, `predict()`: - ```python model = get_compiled_model() @@ -714,22 +671,21 @@ model.fit(train_dataset, epochs=3) print("Evaluate") result = model.evaluate(test_dataset) dict(zip(model.metrics_names, result)) - ```
``` Epoch 1/3 -782/782 [==============================] - 1s 1ms/step - loss: 0.3425 - sparse_categorical_accuracy: 0.9035 +782/782 [==============================] - 1s 862us/step - loss: 0.3228 - sparse_categorical_accuracy: 0.9087 Epoch 2/3 -782/782 [==============================] - 1s 1ms/step - loss: 0.1631 - sparse_categorical_accuracy: 0.9518 +782/782 [==============================] - 1s 802us/step - loss: 0.1554 - sparse_categorical_accuracy: 0.9540 Epoch 3/3 -782/782 [==============================] - 1s 1ms/step - loss: 0.1198 - sparse_categorical_accuracy: 0.9646 +782/782 [==============================] - 1s 823us/step - loss: 0.1143 - sparse_categorical_accuracy: 0.9659 Evaluate -157/157 [==============================] - 0s 707us/step - loss: 0.1248 - sparse_categorical_accuracy: 0.9636 +157/157 [==============================] - 0s 621us/step - loss: 0.1254 - sparse_categorical_accuracy: 0.9621 -{'loss': 0.12476286292076111, - 'sparse_categorical_accuracy': 0.9635999798774719} +{'loss': 0.12538520991802216, + 'sparse_categorical_accuracy': 0.9621000289916992} ```
@@ -745,7 +701,6 @@ drawing the next batches. The dataset will eventually run out of data (unless it infinitely-looping dataset). - ```python model = get_compiled_model() @@ -755,19 +710,18 @@ train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) # Only use the 100 batches per epoch (that's 64 * 100 samples) model.fit(train_dataset, epochs=3, steps_per_epoch=100) - ```
``` Epoch 1/3 -100/100 [==============================] - 0s 995us/step - loss: 0.7954 - sparse_categorical_accuracy: 0.7883 +100/100 [==============================] - 0s 1ms/step - loss: 0.7964 - sparse_categorical_accuracy: 0.7916 Epoch 2/3 -100/100 [==============================] - 0s 981us/step - loss: 0.3694 - sparse_categorical_accuracy: 0.8923 +100/100 [==============================] - 0s 880us/step - loss: 0.3805 - sparse_categorical_accuracy: 0.8898 Epoch 3/3 -100/100 [==============================] - 0s 1ms/step - loss: 0.3265 - sparse_categorical_accuracy: 0.9056 +100/100 [==============================] - 0s 791us/step - loss: 0.3209 - sparse_categorical_accuracy: 0.9009 - + ```
@@ -776,7 +730,6 @@ Epoch 3/3 You can pass a `Dataset` instance as the `validation_data` argument in `fit()`: - ```python model = get_compiled_model() @@ -789,14 +742,13 @@ val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) val_dataset = val_dataset.batch(64) model.fit(train_dataset, epochs=1, validation_data=val_dataset) - ```
``` -782/782 [==============================] - 1s 1ms/step - loss: 0.3530 - sparse_categorical_accuracy: 0.9017 - val_loss: 0.1993 - val_sparse_categorical_accuracy: 0.9423 +782/782 [==============================] - 1s 1ms/step - loss: 0.3507 - sparse_categorical_accuracy: 0.9001 - val_loss: 0.2190 - val_sparse_categorical_accuracy: 0.9351 - + ```
@@ -809,7 +761,6 @@ steps the model should run with the validation dataset before interrupting valid and moving on to the next epoch: - ```python model = get_compiled_model() @@ -829,14 +780,13 @@ model.fit( validation_data=val_dataset, validation_steps=10, ) - ```
``` -782/782 [==============================] - 1s 1ms/step - loss: 0.3406 - sparse_categorical_accuracy: 0.9045 - val_loss: 0.2985 - val_sparse_categorical_accuracy: 0.9172 +782/782 [==============================] - 1s 955us/step - loss: 0.3359 - sparse_categorical_accuracy: 0.9043 - val_loss: 0.2954 - val_sparse_categorical_accuracy: 0.9203 - + ```
@@ -848,7 +798,6 @@ not supported when training from `Dataset` objects, since this features requires ability to index the samples of the datasets, which is not possible in general with the `Dataset` API. - --- ## Other input formats supported @@ -914,33 +863,31 @@ sequence = CIFAR10Sequence(filenames, labels, batch_size) model.fit(sequence, epochs=10) ``` - --- ## Using sample weighting and class weighting -Besides input data and target data, it is possible to pass sample weights or class -weights to a model when using fit: +With the default settings the weight of a sample is decided by its frequency +in the dataset. There are two methods to weight the data, independent of +sample frequency: + +* Class weights +* Sample weights -- When training from NumPy data: via the `sample_weight` and `class_weight` arguments. -- When training from `Dataset` objects: by having the `Dataset` return a tuple -`(input_batch, target_batch, sample_weight_batch)`. +### Class weights -A "sample weights" array is an array of numbers that specify how much weight each -sample in a batch should have in computing the total loss. It is commonly used in -imbalanced classification problems (the idea being to give more weight to rarely-seen -classes). When the weights used are ones and zeros, the array can be used as a mask -for the loss function (entirely discarding the contribution of certain samples to the -total loss). +This is set by passing a dictionary to the `class_weight` argument to +`Model.fit()`. This dictionary maps class indices to the weight that should +be used for samples belonging to this class. -A "class weights" dict is a more specific instance of the same concept: it maps class -indices to the sample weight that should be used for samples belonging to this class. -For instance, if class "0" is twice less represented than class "1" in your data, you -could use `class_weight={0: 1., 1: 0.5}`. +This can be used to balance classes without resampling, or to train a +model that has a gives more importance to a particular class. -Here's a NumPy example where we use class weights or sample weights to give more -importance to the correct classification of class #5 (which is the digit "5" in the -MNIST dataset). +For instance, if class "0" is half as represented as class "1" in your data, +you could use `Model.fit(..., class_weight={0: 1., 1: 0.5})`. +Here's a NumPy example where we use class weights or sample weights to +give more importance to the correct classification of class #5 (which +is the digit "5" in the MNIST dataset). ```python @@ -964,20 +911,35 @@ class_weight = { print("Fit with class weight") model = get_compiled_model() model.fit(x_train, y_train, class_weight=class_weight, batch_size=64, epochs=1) - ```
``` Fit with class weight -782/782 [==============================] - 1s 809us/step - loss: 0.3820 - sparse_categorical_accuracy: 0.8995 +782/782 [==============================] - 1s 756us/step - loss: 0.3582 - sparse_categorical_accuracy: 0.9052 - + ```
-Here's the same example using `sample_weight` instead: +### Sample weights +For fine grained control, or if you are not building a classifier, +you can use "sample weights". + +- When training from NumPy data: Pass the `sample_weight` + argument to `Model.fit()`. +- When training from `tf.data` or any other sort of iterator: + Yield `(input_batch, label_batch, sample_weight_batch)` tuples. + +A "sample weights" array is an array of numbers that specify how much weight +each sample in a batch should have in computing the total loss. It is commonly +used in imbalanced classification problems (the idea being to give more weight +to rarely-seen classes). + +When the weights used are ones and zeros, the array can be used as a *mask* for +the loss function (entirely discarding the contribution of certain samples to +the total loss). ```python @@ -987,22 +949,20 @@ sample_weight[y_train == 5] = 2.0 print("Fit with sample weight") model = get_compiled_model() model.fit(x_train, y_train, sample_weight=sample_weight, batch_size=64, epochs=1) - ```
``` Fit with sample weight -782/782 [==============================] - 1s 891us/step - loss: 0.3727 - sparse_categorical_accuracy: 0.9028 +782/782 [==============================] - 1s 736us/step - loss: 0.3750 - sparse_categorical_accuracy: 0.9025 - + ```
Here's a matching `Dataset` example: - ```python sample_weight = np.ones(shape=(len(y_train),)) sample_weight[y_train == 5] = 2.0 @@ -1016,14 +976,13 @@ train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) model = get_compiled_model() model.fit(train_dataset, epochs=1) - ```
``` -782/782 [==============================] - 1s 1ms/step - loss: 0.3674 - sparse_categorical_accuracy: 0.9047 +782/782 [==============================] - 1s 911us/step - loss: 0.3679 - sparse_categorical_accuracy: 0.9045 - + ```
@@ -1041,7 +1000,6 @@ combination of these inputs: a "score" (of shape `(1,)`) and a probability distribution over five classes (of shape `(5,)`). - ```python image_input = keras.Input(shape=(32, 32, 3), name="img_input") timeseries_input = keras.Input(shape=(None, 10), name="ts_input") @@ -1060,23 +1018,20 @@ class_output = layers.Dense(5, activation="softmax", name="class_output")(x) model = keras.Model( inputs=[image_input, timeseries_input], outputs=[score_output, class_output] ) - ``` Let's plot this model, so you can clearly see what we're doing here (note that the shapes shown in the plot are batch shapes, rather than per-sample shapes). - ```python keras.utils.plot_model(model, "multi_input_and_output_model.png", show_shapes=True) - ``` -![png](/img/guides/training_with_built_in_methods/training_with_built_in_methods_62_0.png) +![png](/img/guides/training_with_built_in_methods/training_with_built_in_methods_64_0.png) @@ -1084,13 +1039,11 @@ At compilation time, we can specify different losses to different outputs, by pa the loss functions as a list: - ```python model.compile( optimizer=keras.optimizers.RMSprop(1e-3), loss=[keras.losses.MeanSquaredError(), keras.losses.CategoricalCrossentropy()], ) - ``` If we only passed a single loss function to the model, the same loss function would be @@ -1099,7 +1052,6 @@ applied to every output (which is not appropriate here). Likewise for metrics: - ```python model.compile( optimizer=keras.optimizers.RMSprop(1e-3), @@ -1112,14 +1064,12 @@ model.compile( [keras.metrics.CategoricalAccuracy()], ], ) - ``` Since we gave names to our output layers, we could also specify per-output losses and metrics via a dict: - ```python model.compile( optimizer=keras.optimizers.RMSprop(1e-3), @@ -1135,7 +1085,6 @@ model.compile( "class_output": [keras.metrics.CategoricalAccuracy()], }, ) - ``` We recommend the use of explicit names and dicts if you have more than 2 outputs. @@ -1145,7 +1094,6 @@ instance, one might wish to privilege the "score" loss in our example, by giving the importance of the class loss), using the `loss_weights` argument: - ```python model.compile( optimizer=keras.optimizers.RMSprop(1e-3), @@ -1162,14 +1110,12 @@ model.compile( }, loss_weights={"score_output": 2.0, "class_output": 1.0}, ) - ``` You could also chose not to compute a loss for certain outputs, if these outputs meant for prediction but not for training: - ```python # List loss version model.compile( @@ -1182,7 +1128,6 @@ model.compile( optimizer=keras.optimizers.RMSprop(1e-3), loss={"class_output": keras.losses.CategoricalCrossentropy()}, ) - ``` Passing data to a multi-input or multi-output model in fit works in a similar way as @@ -1191,7 +1136,6 @@ specifying a loss function in compile: you can pass **lists of NumPy arrays** (w names to NumPy arrays**. - ```python model.compile( optimizer=keras.optimizers.RMSprop(1e-3), @@ -1214,15 +1158,14 @@ model.fit( batch_size=32, epochs=1, ) - ```
``` -4/4 [==============================] - 0s 4ms/step - loss: 7.6784 - score_output_loss: 3.1199 - class_output_loss: 4.5585 -4/4 [==============================] - 0s 3ms/step - loss: 6.4875 - score_output_loss: 2.0329 - class_output_loss: 4.4547 +4/4 [==============================] - 0s 4ms/step - loss: 5.1358 - score_output_loss: 0.1805 - class_output_loss: 4.9552 +4/4 [==============================] - 0s 6ms/step - loss: 4.8821 - score_output_loss: 0.1509 - class_output_loss: 4.7312 - + ```
@@ -1230,7 +1173,6 @@ Here's the `Dataset` use case: similarly as what we did for NumPy arrays, the `D should return a tuple of dicts. - ```python train_dataset = tf.data.Dataset.from_tensor_slices( ( @@ -1241,14 +1183,13 @@ train_dataset = tf.data.Dataset.from_tensor_slices( train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) model.fit(train_dataset, epochs=1) - ```
``` -2/2 [==============================] - 0s 4ms/step - loss: 5.9390 - score_output_loss: 1.5347 - class_output_loss: 4.4042 +2/2 [==============================] - 0s 4ms/step - loss: 4.7814 - score_output_loss: 0.1373 - class_output_loss: 4.6440 - + ```
@@ -1272,7 +1213,6 @@ performance threshold is exceeded Callbacks can be passed as a list to your call to `fit()`: - ```python model = get_compiled_model() @@ -1295,28 +1235,27 @@ model.fit( callbacks=callbacks, validation_split=0.2, ) - ```
``` Epoch 1/20 -625/625 [==============================] - 1s 1ms/step - loss: 0.3623 - sparse_categorical_accuracy: 0.8980 - val_loss: 0.2259 - val_sparse_categorical_accuracy: 0.9310 +625/625 [==============================] - 1s 906us/step - loss: 0.3761 - sparse_categorical_accuracy: 0.8926 - val_loss: 0.2249 - val_sparse_categorical_accuracy: 0.9312 Epoch 2/20 -625/625 [==============================] - 1s 1ms/step - loss: 0.1667 - sparse_categorical_accuracy: 0.9504 - val_loss: 0.1822 - val_sparse_categorical_accuracy: 0.9442 +625/625 [==============================] - 1s 836us/step - loss: 0.1689 - sparse_categorical_accuracy: 0.9505 - val_loss: 0.1720 - val_sparse_categorical_accuracy: 0.9492 Epoch 3/20 -625/625 [==============================] - 1s 1ms/step - loss: 0.1212 - sparse_categorical_accuracy: 0.9640 - val_loss: 0.1572 - val_sparse_categorical_accuracy: 0.9534 +625/625 [==============================] - 1s 839us/step - loss: 0.1198 - sparse_categorical_accuracy: 0.9632 - val_loss: 0.1499 - val_sparse_categorical_accuracy: 0.9564 Epoch 4/20 -625/625 [==============================] - 1s 1ms/step - loss: 0.0959 - sparse_categorical_accuracy: 0.9711 - val_loss: 0.1514 - val_sparse_categorical_accuracy: 0.9546 +625/625 [==============================] - 1s 861us/step - loss: 0.0943 - sparse_categorical_accuracy: 0.9714 - val_loss: 0.1433 - val_sparse_categorical_accuracy: 0.9580 Epoch 5/20 -625/625 [==============================] - 1s 1ms/step - loss: 0.0798 - sparse_categorical_accuracy: 0.9761 - val_loss: 0.1389 - val_sparse_categorical_accuracy: 0.9616 +625/625 [==============================] - 1s 843us/step - loss: 0.0772 - sparse_categorical_accuracy: 0.9768 - val_loss: 0.1367 - val_sparse_categorical_accuracy: 0.9618 Epoch 6/20 -625/625 [==============================] - 1s 1ms/step - loss: 0.0684 - sparse_categorical_accuracy: 0.9798 - val_loss: 0.1408 - val_sparse_categorical_accuracy: 0.9615 +625/625 [==============================] - 1s 875us/step - loss: 0.0634 - sparse_categorical_accuracy: 0.9802 - val_loss: 0.1348 - val_sparse_categorical_accuracy: 0.9627 Epoch 7/20 -625/625 [==============================] - 1s 1ms/step - loss: 0.0583 - sparse_categorical_accuracy: 0.9829 - val_loss: 0.1502 - val_sparse_categorical_accuracy: 0.9605 +625/625 [==============================] - 1s 890us/step - loss: 0.0538 - sparse_categorical_accuracy: 0.9832 - val_loss: 0.1437 - val_sparse_categorical_accuracy: 0.9610 Epoch 00007: early stopping - + ```
@@ -1345,7 +1284,6 @@ Make sure to read the Here's a simple example saving a list of per-batch loss values during training: - ```python class LossHistory(keras.callbacks.Callback): @@ -1355,7 +1293,6 @@ class LossHistory(keras.callbacks.Callback): def on_batch_end(self, batch, logs): self.per_batch_losses.append(logs.get("loss")) - ``` --- @@ -1367,7 +1304,6 @@ checkpoints of your model at frequent intervals. The easiest way to achieve this is with the `ModelCheckpoint` callback: - ```python model = get_compiled_model() @@ -1387,28 +1323,39 @@ callbacks = [ model.fit( x_train, y_train, epochs=2, batch_size=64, callbacks=callbacks, validation_split=0.2 ) - ```
``` Epoch 1/2 -598/625 [===========================>..] - ETA: 0s - loss: 0.3742 - sparse_categorical_accuracy: 0.8939 +617/625 [============================>.] - ETA: 0s - loss: 0.3742 - sparse_categorical_accuracy: 0.8925 +WARNING: Logging before flag parsing goes to stderr. +W0611 15:21:22.880445 4702766528 deprecation.py:323] From /usr/local/lib/python3.7/site-packages/tensorflow/python/keras/backend.py:467: set_learning_phase (from tensorflow.python.keras.backend) is deprecated and will be removed after 2020-10-11. +Instructions for updating: +Simply pass a True/False value to the `training` argument of the `__call__` method of your layer or model. ```
``` -Epoch 00001: val_loss improved from inf to 0.23939, saving model to mymodel_1 -625/625 [==============================] - 1s 2ms/step - loss: 0.3677 - sparse_categorical_accuracy: 0.8956 - val_loss: 0.2394 - val_sparse_categorical_accuracy: 0.9288 +Epoch 00001: val_loss improved from inf to 0.22021, saving model to mymodel_1 + +W0611 15:21:22.963418 4702766528 deprecation.py:323] From /usr/local/lib/python3.7/site-packages/tensorflow/python/training/tracking/tracking.py:105: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. +W0611 15:21:22.965559 4702766528 deprecation.py:323] From /usr/local/lib/python3.7/site-packages/tensorflow/python/training/tracking/tracking.py:105: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +625/625 [==============================] - 1s 2ms/step - loss: 0.3716 - sparse_categorical_accuracy: 0.8933 - val_loss: 0.2202 - val_sparse_categorical_accuracy: 0.9342 Epoch 2/2 -597/625 [===========================>..] - ETA: 0s - loss: 0.1711 - sparse_categorical_accuracy: 0.9497 -Epoch 00002: val_loss improved from 0.23939 to 0.17413, saving model to mymodel_2 -625/625 [==============================] - 1s 2ms/step - loss: 0.1707 - sparse_categorical_accuracy: 0.9499 - val_loss: 0.1741 - val_sparse_categorical_accuracy: 0.9488 +614/625 [============================>.] - ETA: 0s - loss: 0.1715 - sparse_categorical_accuracy: 0.9489 +Epoch 00002: val_loss improved from 0.22021 to 0.18237, saving model to mymodel_2 +625/625 [==============================] - 1s 1ms/step - loss: 0.1717 - sparse_categorical_accuracy: 0.9488 - val_loss: 0.1824 - val_sparse_categorical_accuracy: 0.9443 - + ```
@@ -1417,7 +1364,6 @@ the ability to restart training from the last saved state of the model in case t gets randomly interrupted. Here's a basic example: - ```python import os @@ -1448,15 +1394,14 @@ callbacks = [ ) ] model.fit(x_train, y_train, epochs=1, callbacks=callbacks) - ```
``` Creating a new model -1563/1563 [==============================] - 8s 5ms/step - loss: 0.2950 - sparse_categorical_accuracy: 0.9151 +1563/1563 [==============================] - 6s 4ms/step - loss: 0.3006 - sparse_categorical_accuracy: 0.9117 - + ```
@@ -1465,7 +1410,6 @@ You call also write your own callback for saving and restoring models. For a complete guide on serialization and saving, see the [guide to saving and serializing Models](/guides/serialization_and_saving/). - --- ## Using learning rate schedules @@ -1482,7 +1426,6 @@ You can easily use a static learning rate decay schedule by passing a schedule o as the `learning_rate` argument in your optimizer: - ```python initial_learning_rate = 0.1 lr_schedule = keras.optimizers.schedules.ExponentialDecay( @@ -1490,7 +1433,6 @@ lr_schedule = keras.optimizers.schedules.ExponentialDecay( ) optimizer = keras.optimizers.RMSprop(learning_rate=lr_schedule) - ``` Several built-in schedules are available: `ExponentialDecay`, `PiecewiseConstantDecay`, @@ -1506,7 +1448,6 @@ However, callbacks do have access to all metrics, including validation metrics! thus achieve this pattern by using a callback that modifies the current learning rate on the optimizer. In fact, this is even built-in as the `ReduceLROnPlateau` callback. - --- ## Visualizing loss and metrics during training @@ -1526,7 +1467,6 @@ from the command line: tensorboard --logdir=/full_path_to_your_logs ``` - ### Using the TensorBoard callback The easiest way to use TensorBoard with a Keras model and the fit method is the @@ -1536,7 +1476,6 @@ In the simplest case, just specify where you want the callback to write logs, an you're good to go: - ```python keras.callbacks.TensorBoard( log_dir="/full_path_to_your_logs", @@ -1544,7 +1483,6 @@ keras.callbacks.TensorBoard( embeddings_freq=0, # How often to log embedding visualizations update_freq="epoch", ) # How often to write logs (default: once per epoch) - ``` @@ -1552,10 +1490,9 @@ keras.callbacks.TensorBoard(
``` - + ```
For more information, see the [documentation for the `TensorBoard` callback](/api/callbacks/tensorboard/). - diff --git a/guides/md/understanding_masking_and_padding.md b/guides/md/understanding_masking_and_padding.md index ab11db5e4b..a5586c0041 100644 --- a/guides/md/understanding_masking_and_padding.md +++ b/guides/md/understanding_masking_and_padding.md @@ -19,7 +19,6 @@ import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers - ``` --- @@ -88,7 +87,6 @@ padded_inputs = tf.keras.preprocessing.sequence.pad_sequences( ) print(padded_inputs) - ```
@@ -135,7 +133,6 @@ unmasked_embedding = tf.cast( masked_embedding = masking_layer(unmasked_embedding) print(masked_embedding._keras_mask) - ```
@@ -171,7 +168,6 @@ receive a mask, which means it will ignore padded values: model = keras.Sequential( [layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True), layers.LSTM(32),] ) - ``` This is also the case for the following Functional API model: @@ -183,7 +179,6 @@ x = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)(inputs) outputs = layers.LSTM(32)(x) model = keras.Model(inputs, outputs) - ``` --- @@ -199,7 +194,6 @@ Thus, you can pass the output of the `compute_mask()` method of a mask-producing to the `__call__` method of a mask-consuming layer, like this: - ```python class MyLayer(layers.Layer): @@ -222,7 +216,6 @@ layer = MyLayer() x = np.random.random((32, 10)) * 100 x = x.astype("int32") layer(x) - ``` @@ -231,19 +224,19 @@ layer(x)
``` + [-5.7978416e-04, -1.8325391e-03, -2.0467002e-04, ..., + -3.9534271e-03, -2.2688047e-04, 1.2577593e-03], + [ 2.4689233e-03, -3.6403039e-04, 7.7487719e-05, ..., + 1.0208538e-03, 2.3937733e-03, -4.4873711e-03], + [ 2.6551904e-03, -1.8738948e-03, -1.9827935e-04, ..., + -3.3328766e-03, 1.0988748e-06, 1.4491909e-04]], dtype=float32)> ```
@@ -284,7 +277,6 @@ class TemporalSplit(keras.layers.Layer): first_half, second_half = TemporalSplit()(masked_embedding) print(first_half._keras_mask) print(second_half._keras_mask) - ```
@@ -337,15 +329,14 @@ y = layer(x) mask = layer.compute_mask(x) print(mask) - ```
``` tf.Tensor( -[[ True True True True True True True False True True] +[[ True True True True True True True True True True] [ True True True True True True True True True True] - [ True True True False True False True True False True]], shape=(3, 10), dtype=bool) + [ True True True True True True True True True True]], shape=(3, 10), dtype=bool) ```
@@ -366,7 +357,6 @@ to be able to propagate the current input mask, you should set `self.supports_ma Here's an example of a layer that is whitelisted for mask propagation: - ```python class MyActivation(keras.layers.Layer): @@ -378,7 +368,6 @@ class MyActivation(keras.layers.Layer): def call(self, inputs): return tf.nn.relu(inputs) - ``` You can now use this custom layer in-between a mask-generating layer (like `Embedding`) @@ -394,7 +383,6 @@ print("Mask found:", x._keras_mask) outputs = layers.LSTM(32)(x) # Will receive the mask model = keras.Model(inputs, outputs) - ```
@@ -434,7 +422,6 @@ outputs = TemporalSoftmax()(x) model = keras.Model(inputs, outputs) y = model(np.random.randint(0, 10, size=(32, 100)), np.random.random((32, 100, 1))) - ``` --- @@ -454,4 +441,3 @@ automatically. manually. - You can easily write layers that modify the current mask, that generate a new mask, or that consume the mask associated with the inputs. - diff --git a/guides/md/working_with_rnns.md b/guides/md/working_with_rnns.md index f6ce5d88d9..f1184b88a6 100644 --- a/guides/md/working_with_rnns.md +++ b/guides/md/working_with_rnns.md @@ -31,24 +31,20 @@ part of the `for` loop) with custom behavior, and use it with the generic `keras.layers.RNN` layer (the `for` loop itself). This allows you to quickly prototype different research ideas in a flexible way with minimal code. - --- ## Setup - ```python import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers - ``` --- ## Built-in RNN layers: a simple example - There are three built-in RNN layers in Keras: 1. `keras.layers.SimpleRNN`, a fully-connected RNN where the output from previous @@ -68,7 +64,6 @@ embeds each integer into a 64-dimensional vector, then processes the sequence of vectors using a `LSTM` layer. - ```python model = keras.Sequential() # Add an Embedding layer expecting input vocab of size 1000, and @@ -82,7 +77,6 @@ model.add(layers.LSTM(128)) model.add(layers.Dense(10)) model.summary() - ```
@@ -115,7 +109,6 @@ CPU), via the `unroll` argument For more information, see the [RNN API documentation](https://keras.io/api/layers/recurrent_layers/). - --- ## Outputs and states @@ -129,7 +122,6 @@ per timestep per sample), if you set `return_sequences=True`. The shape of this is `(batch_size, timesteps, units)`. - ```python model = keras.Sequential() model.add(layers.Embedding(input_dim=1000, output_dim=64)) @@ -143,7 +135,6 @@ model.add(layers.SimpleRNN(128)) model.add(layers.Dense(10)) model.summary() - ```
@@ -184,7 +175,6 @@ Note that the shape of the state needs to match the unit size of the layer, like example below. - ```python encoder_vocab = 1000 decoder_vocab = 2000 @@ -213,7 +203,6 @@ output = layers.Dense(10)(decoder_output) model = keras.Model([encoder_input, decoder_input], output) model.summary() - ```
@@ -273,7 +262,6 @@ layer. The cell abstraction, together with the generic `keras.layers.RNN` class, make it very easy to implement custom RNN architectures for your research. - --- ## Cross-batch statefulness @@ -323,7 +311,6 @@ number of samples (batch size). E.g. if a batch contains `[sequence_A_from_t0_to Here is a complete example: - ```python paragraph1 = np.random.random((20, 10, 50)).astype(np.float32) paragraph2 = np.random.random((20, 10, 50)).astype(np.float32) @@ -338,13 +325,11 @@ output = lstm_layer(paragraph3) # If no initial_state was provided, zero-states will be used by default. lstm_layer.reset_states() - ``` ### RNN State Reuse - The recorded states of the RNN layer are not included in the `layer.weights()`. If you would like to reuse the state from a RNN layer, you can retrieve the states value by `layer.states` and use it as the @@ -356,7 +341,6 @@ supports layers with single input and output, the extra input of initial state m it impossible to use here. - ```python paragraph1 = np.random.random((20, 10, 50)).astype(np.float32) paragraph2 = np.random.random((20, 10, 50)).astype(np.float32) @@ -371,7 +355,6 @@ existing_state = lstm_layer.states new_lstm_layer = layers.LSTM(64) new_output = new_lstm_layer(paragraph3, initial_state=existing_state) - ``` --- @@ -386,7 +369,6 @@ Keras provides an easy API for you to build such bidirectional RNNs: the `keras.layers.Bidirectional` wrapper. - ```python model = keras.Sequential() @@ -397,7 +379,6 @@ model.add(layers.Bidirectional(layers.LSTM(32))) model.add(layers.Dense(10)) model.summary() - ```
@@ -429,7 +410,6 @@ concatenation, change the `merge_mode` parameter in the `Bidirectional` wrapper constructor. For more details about `Bidirectional`, please check [the API docs](https://keras.io/api/layers/recurrent_layers/bidirectional/). - --- ## Performance optimization and CuDNN kernels @@ -456,7 +436,6 @@ For the detailed list of constraints, please see the documentation for the [LSTM](https://keras.io/api/layers/recurrent_layers/lstm/) and [GRU](https://keras.io/api/layers/recurrent_layers/gru/) layers. - ### Using CuDNN kernels when available Let's build a simple LSTM model to demonstrate the performance difference. @@ -465,7 +444,6 @@ We'll use as input sequences the sequence of rows of MNIST digits (treating each pixels as a timestep), and we'll predict the digit's label. - ```python batch_size = 64 # Each MNIST image batch is a tensor of shape (batch_size, 28, 28). @@ -497,20 +475,17 @@ def build_model(allow_cudnn_kernel=True): ) return model - ``` Let's load the MNIST dataset: - ```python mnist = keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 sample, sample_label = x_train[0], y_train[0] - ``` Let's create a model instance and train it. @@ -520,7 +495,6 @@ output of the model has shape of `[batch_size, 10]`. The target for the model is integer vector, each of the integer is in the range of 0 to 9. - ```python model = build_model(allow_cudnn_kernel=True) @@ -534,21 +508,19 @@ model.compile( model.fit( x_train, y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=1 ) - ```
``` -938/938 [==============================] - 10s 10ms/step - loss: 0.9154 - accuracy: 0.7059 - val_loss: 0.5218 - val_accuracy: 0.8350 +938/938 [==============================] - 10s 11ms/step - loss: 0.9792 - accuracy: 0.6869 - val_loss: 0.5196 - val_accuracy: 0.8427 - + ```
Now, let's compare to a model that does not use the CuDNN kernel: - ```python noncudnn_model = build_model(allow_cudnn_kernel=False) noncudnn_model.set_weights(model.get_weights()) @@ -560,14 +532,13 @@ noncudnn_model.compile( noncudnn_model.fit( x_train, y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=1 ) - ```
``` -938/938 [==============================] - 10s 11ms/step - loss: 0.4161 - accuracy: 0.8717 - val_loss: 0.4139 - val_accuracy: 0.8614 +938/938 [==============================] - 10s 11ms/step - loss: 0.3900 - accuracy: 0.8843 - val_loss: 0.4477 - val_accuracy: 0.8426 - + ```
@@ -583,7 +554,6 @@ You simply don't have to worry about the hardware you're running on anymore. Isn pretty cool? - ```python import matplotlib.pyplot as plt @@ -595,7 +565,6 @@ with tf.device("CPU:0"): "Predicted result is: %s, target result is: %s" % (result.numpy(), sample_label) ) plt.imshow(sample, cmap=plt.get_cmap("gray")) - ```
@@ -625,15 +594,12 @@ representation could be: The following code provides an example of how to build a custom RNN cell that accepts such structured inputs. - ### Define a custom cell that support nested input/output - See [Making new Layers & Models via subclassing](/guides/making_new_layers_and_models_via_subclassing/) for details on writing your own layers. - ```python class NestedCell(keras.layers.Layer): @@ -679,7 +645,6 @@ class NestedCell(keras.layers.Layer): def get_config(self): return {"unit_1": self.unit_1, "unit_2": unit_2, "unit_3": self.unit_3} - ``` ### Build a RNN model with nested input/output @@ -688,7 +653,6 @@ Let's build a Keras model that uses a `keras.layers.RNN` layer and the custom ce we just defined. - ```python unit_1 = 10 unit_2 = 20 @@ -712,7 +676,6 @@ outputs = rnn((input_1, input_2)) model = keras.models.Model([input_1, input_2], outputs) model.compile(optimizer="adam", loss="mse", metrics=["accuracy"]) - ``` ### Train the model with randomly generated data @@ -721,7 +684,6 @@ Since there isn't a good candidate dataset for this model, we use random Numpy d demonstration. - ```python input_1_data = np.random.random((batch_size * num_batches, timestep, i1)) input_2_data = np.random.random((batch_size * num_batches, timestep, i2, i3)) @@ -731,14 +693,13 @@ input_data = [input_1_data, input_2_data] target_data = [target_1_data, target_2_data] model.fit(input_data, target_data, batch_size=batch_size) - ```
``` -10/10 [==============================] - 2s 212ms/step - loss: 0.7551 - rnn_1_loss: 0.2712 - rnn_1_1_loss: 0.4839 - rnn_1_accuracy: 0.0922 - rnn_1_1_accuracy: 0.0319 +10/10 [==============================] - 2s 222ms/step - loss: 0.7225 - rnn_1_loss: 0.2545 - rnn_1_1_loss: 0.4679 - rnn_1_accuracy: 0.1094 - rnn_1_1_accuracy: 0.0349 - + ```
@@ -747,5 +708,4 @@ logic for individual step within the sequence, and the `keras.layers.RNN` layer will handle the sequence iteration for you. It's an incredibly powerful way to quickly prototype new kinds of RNNs (e.g. a LSTM variant). -For more details, please visit the [API docs](https://keras.io/api/layers/recurrent_layers/RNN/). - +For more details, please visit the [API docs](https://keras.io/api/layers/recurrent_layers/rnn/). diff --git a/guides/md/writing_a_training_loop_from_scratch.md b/guides/md/writing_a_training_loop_from_scratch.md index c61b485806..2efb0a1c24 100644 --- a/guides/md/writing_a_training_loop_from_scratch.md +++ b/guides/md/writing_a_training_loop_from_scratch.md @@ -19,7 +19,6 @@ import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers import numpy as np - ``` --- @@ -50,14 +49,12 @@ retrieve using `model.trainable_weights`). Let's consider a simple MNIST model: - ```python inputs = keras.Input(shape=(784,), name="digits") x1 = layers.Dense(64, activation="relu")(inputs) x2 = layers.Dense(64, activation="relu")(x1) outputs = layers.Dense(10, name="predictions")(x2) model = keras.Model(inputs=inputs, outputs=outputs) - ``` Let's train it using mini-batch gradient with a custom training loop. @@ -78,7 +75,6 @@ x_train = np.reshape(x_train, (-1, 784)) x_test = np.reshape(x_train, (-1, 784)) train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size) - ``` Here's our training loop: @@ -129,22 +125,21 @@ for epoch in range(epochs): % (step, float(loss_value)) ) print("Seen so far: %s samples" % ((step + 1) * 64)) - ```
``` Start of epoch 0 -Training loss (for one batch) at step 0: 111.6209 +Training loss (for one batch) at step 0: 138.9553 Seen so far: 64 samples -Training loss (for one batch) at step 200: 2.0270 +Training loss (for one batch) at step 200: 2.0124 Seen so far: 12864 samples -Training loss (for one batch) at step 400: 0.6650 +Training loss (for one batch) at step 400: 0.6247 Seen so far: 25664 samples -Training loss (for one batch) at step 600: 1.4232 +Training loss (for one batch) at step 600: 0.9244 Seen so far: 38464 samples -Training loss (for one batch) at step 800: 0.8876 +Training loss (for one batch) at step 800: 0.4198 Seen so far: 51264 samples ```
@@ -152,15 +147,15 @@ Seen so far: 51264 samples
``` Start of epoch 1 -Training loss (for one batch) at step 0: 1.1270 +Training loss (for one batch) at step 0: 0.6736 Seen so far: 64 samples -Training loss (for one batch) at step 200: 0.5749 +Training loss (for one batch) at step 200: 0.6869 Seen so far: 12864 samples -Training loss (for one batch) at step 400: 0.9260 +Training loss (for one batch) at step 400: 0.5578 Seen so far: 25664 samples -Training loss (for one batch) at step 600: 0.6680 +Training loss (for one batch) at step 600: 0.3697 Seen so far: 38464 samples -Training loss (for one batch) at step 800: 0.7342 +Training loss (for one batch) at step 800: 0.0953 Seen so far: 51264 samples ``` @@ -213,7 +208,6 @@ x_train = x_train[:-10000] y_train = y_train[:-10000] val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) val_dataset = val_dataset.batch(64) - ``` Here's our training & evaluation loop: @@ -262,45 +256,44 @@ for epoch in range(epochs): val_acc_metric.reset_states() print("Validation acc: %.4f" % (float(val_acc),)) print("Time taken: %.2fs" % (time.time() - start_time)) - ```
``` Start of epoch 0 -Training loss (for one batch) at step 0: 98.7654 +Training loss (for one batch) at step 0: 103.4554 Seen so far: 64 samples -Training loss (for one batch) at step 200: 1.6912 +Training loss (for one batch) at step 200: 1.5734 Seen so far: 12864 samples -Training loss (for one batch) at step 400: 0.8003 +Training loss (for one batch) at step 400: 0.7797 Seen so far: 25664 samples -Training loss (for one batch) at step 600: 0.5667 +Training loss (for one batch) at step 600: 1.2821 Seen so far: 38464 samples -Training loss (for one batch) at step 800: 0.3703 +Training loss (for one batch) at step 800: 0.3632 Seen so far: 51264 samples -Training acc over epoch: 0.7895 -Validation acc: 0.8839 -Time taken: 4.00s +Training acc over epoch: 0.7958 +Validation acc: 0.8843 +Time taken: 3.78s ```
``` Start of epoch 1 -Training loss (for one batch) at step 0: 0.4217 +Training loss (for one batch) at step 0: 0.7340 Seen so far: 64 samples -Training loss (for one batch) at step 200: 0.6494 +Training loss (for one batch) at step 200: 0.5991 Seen so far: 12864 samples -Training loss (for one batch) at step 400: 0.3163 +Training loss (for one batch) at step 400: 0.8521 Seen so far: 25664 samples -Training loss (for one batch) at step 600: 0.5749 +Training loss (for one batch) at step 600: 0.6446 Seen so far: 38464 samples -Training loss (for one batch) at step 800: 0.7743 +Training loss (for one batch) at step 800: 0.5393 Seen so far: 51264 samples -Training acc over epoch: 0.8796 -Validation acc: 0.9109 -Time taken: 4.12s +Training acc over epoch: 0.8817 +Validation acc: 0.9163 +Time taken: 4.31s ```
@@ -333,7 +326,6 @@ def train_step(x, y): train_acc_metric.update_state(y, logits) return loss_value - ``` Let's do the same with the evaluation step: @@ -346,7 +338,6 @@ def test_step(x, y): val_logits = model(x, training=False) val_acc_metric.update_state(y, val_logits) - ``` Now, let's re-run our training loop with this compiled training step: @@ -387,44 +378,43 @@ for epoch in range(epochs): val_acc_metric.reset_states() print("Validation acc: %.4f" % (float(val_acc),)) print("Time taken: %.2fs" % (time.time() - start_time)) - ```
``` Start of epoch 0 -Training loss (for one batch) at step 0: 0.4031 +Training loss (for one batch) at step 0: 0.2797 Seen so far: 64 samples -Training loss (for one batch) at step 200: 0.5389 +Training loss (for one batch) at step 200: 0.5493 Seen so far: 12864 samples -Training loss (for one batch) at step 400: 0.2882 +Training loss (for one batch) at step 400: 0.3036 Seen so far: 25664 samples -Training loss (for one batch) at step 600: 0.1869 +Training loss (for one batch) at step 600: 0.4908 Seen so far: 38464 samples -Training loss (for one batch) at step 800: 0.2242 +Training loss (for one batch) at step 800: 0.3206 Seen so far: 51264 samples -Training acc over epoch: 0.9021 -Validation acc: 0.9245 -Time taken: 0.94s +Training acc over epoch: 0.9060 +Validation acc: 0.9191 +Time taken: 1.00s ```
``` Start of epoch 1 -Training loss (for one batch) at step 0: 0.5663 +Training loss (for one batch) at step 0: 0.3756 Seen so far: 64 samples -Training loss (for one batch) at step 200: 0.4330 +Training loss (for one batch) at step 200: 0.2454 Seen so far: 12864 samples -Training loss (for one batch) at step 400: 0.2152 +Training loss (for one batch) at step 400: 0.4296 Seen so far: 25664 samples -Training loss (for one batch) at step 600: 0.3243 +Training loss (for one batch) at step 600: 0.2993 Seen so far: 38464 samples -Training loss (for one batch) at step 800: 0.2464 +Training loss (for one batch) at step 800: 0.6099 Seen so far: 51264 samples -Training acc over epoch: 0.9143 -Validation acc: 0.9327 +Training acc over epoch: 0.9174 +Validation acc: 0.9326 Time taken: 0.67s ``` @@ -445,7 +435,6 @@ and add them to the main loss in your training step. Consider this layer, that creates an activity regularization loss: - ```python class ActivityRegularizationLayer(layers.Layer): @@ -453,7 +442,6 @@ class ActivityRegularizationLayer(layers.Layer): self.add_loss(1e-2 * tf.reduce_sum(inputs)) return inputs - ``` Let's build a really simple model that uses it: @@ -468,7 +456,6 @@ x = layers.Dense(64, activation="relu")(x) outputs = layers.Dense(10, name="predictions")(x) model = keras.Model(inputs=inputs, outputs=outputs) - ``` Here's what our training step should look like now: @@ -488,7 +475,6 @@ def train_step(x, y): train_acc_metric.update_state(y, logits) return loss_value - ``` --- @@ -548,7 +534,6 @@ discriminator = keras.Sequential( name="discriminator", ) discriminator.summary() - ```
@@ -599,7 +584,6 @@ generator = keras.Sequential( ], name="generator", ) - ``` Here's the key bit: the training loop. As you can see it is quite straightforward. The @@ -652,7 +636,6 @@ def train_step(real_images): g_optimizer.apply_gradients(zip(grads, generator.trainable_weights)) return d_loss, g_loss, generated_images - ``` Let's train our GAN, by repeatedly calling `train_step` on batches of images. @@ -699,15 +682,14 @@ for epoch in range(epochs): # Remove the lines below to actually train the model! if step > 10: break - ```
``` Start epoch 0 -discriminator loss at step 0: 0.68 -adversarial loss at step 0: 0.69 +discriminator loss at step 0: 0.71 +adversarial loss at step 0: 0.73 ```
diff --git a/guides/md/writing_your_own_callbacks.md b/guides/md/writing_your_own_callbacks.md index 857382db39..da038f6505 100644 --- a/guides/md/writing_your_own_callbacks.md +++ b/guides/md/writing_your_own_callbacks.md @@ -28,7 +28,6 @@ started. ```python import tensorflow as tf from tensorflow import keras - ``` --- @@ -99,7 +98,6 @@ def get_model(): ) return model - ``` Then, load the MNIST data for training and testing from Keras datasets API: @@ -116,7 +114,6 @@ x_train = x_train[:1000] y_train = y_train[:1000] x_test = x_test[:1000] y_test = y_test[:1000] - ``` Now, define a simple custom callback that logs: @@ -187,7 +184,6 @@ class CustomCallback(keras.callbacks.Callback): keys = list(logs.keys()) print("...Predicting: end of batch {}; got log keys: {}".format(batch, keys)) - ``` Let's try it out: @@ -210,7 +206,6 @@ res = model.evaluate( ) res = model.predict(x_test, batch_size=128, callbacks=[CustomCallback()]) - ```
@@ -316,37 +311,36 @@ res = model.evaluate( verbose=0, callbacks=[LossAndErrorPrintingCallback()], ) - ```
``` -For batch 0, loss is 32.34. -For batch 1, loss is 465.33. -For batch 2, loss is 319.39. -For batch 3, loss is 241.91. -For batch 4, loss is 195.22. -For batch 5, loss is 163.79. -For batch 6, loss is 141.30. -For batch 7, loss is 127.17. -The average loss for epoch 0 is 127.17 and mean absolute error is 6.13. -For batch 0, loss is 4.59. -For batch 1, loss is 4.53. -For batch 2, loss is 4.59. -For batch 3, loss is 4.80. -For batch 4, loss is 4.83. -For batch 5, loss is 4.78. -For batch 6, loss is 4.73. -For batch 7, loss is 4.65. -The average loss for epoch 1 is 4.65 and mean absolute error is 1.73. -For batch 0, loss is 6.36. -For batch 1, loss is 5.55. -For batch 2, loss is 5.64. -For batch 3, loss is 5.69. -For batch 4, loss is 5.93. -For batch 5, loss is 5.83. -For batch 6, loss is 5.78. -For batch 7, loss is 5.71. +For batch 0, loss is 25.20. +For batch 1, loss is 456.04. +For batch 2, loss is 310.18. +For batch 3, loss is 235.03. +For batch 4, loss is 189.57. +For batch 5, loss is 158.96. +For batch 6, loss is 137.16. +For batch 7, loss is 123.40. +The average loss for epoch 0 is 123.40 and mean absolute error is 5.83. +For batch 0, loss is 5.40. +For batch 1, loss is 5.18. +For batch 2, loss is 5.06. +For batch 3, loss is 4.79. +For batch 4, loss is 4.56. +For batch 5, loss is 4.42. +For batch 6, loss is 4.57. +For batch 7, loss is 4.70. +The average loss for epoch 1 is 4.70 and mean absolute error is 1.75. +For batch 0, loss is 7.93. +For batch 1, loss is 7.96. +For batch 2, loss is 7.80. +For batch 3, loss is 7.69. +For batch 4, loss is 7.79. +For batch 5, loss is 8.02. +For batch 6, loss is 8.00. +For batch 7, loss is 7.93. ```
@@ -440,39 +434,38 @@ model.fit( verbose=0, callbacks=[LossAndErrorPrintingCallback(), EarlyStoppingAtMinLoss()], ) - ```
``` -For batch 0, loss is 27.16. -For batch 1, loss is 479.73. -For batch 2, loss is 328.31. -For batch 3, loss is 248.68. -For batch 4, loss is 200.24. -The average loss for epoch 0 is 200.24 and mean absolute error is 8.40. -For batch 0, loss is 6.74. -For batch 1, loss is 6.80. -For batch 2, loss is 6.37. -For batch 3, loss is 6.15. -For batch 4, loss is 5.94. -The average loss for epoch 1 is 5.94 and mean absolute error is 1.98. -For batch 0, loss is 5.79. -For batch 1, loss is 5.56. -For batch 2, loss is 5.40. -For batch 3, loss is 5.04. -For batch 4, loss is 4.81. -The average loss for epoch 2 is 4.81 and mean absolute error is 1.73. -For batch 0, loss is 5.60. -For batch 1, loss is 9.10. -For batch 2, loss is 11.11. -For batch 3, loss is 16.32. -For batch 4, loss is 23.31. -The average loss for epoch 3 is 23.31 and mean absolute error is 3.98. +For batch 0, loss is 29.84. +For batch 1, loss is 472.08. +For batch 2, loss is 324.16. +For batch 3, loss is 245.23. +For batch 4, loss is 197.54. +The average loss for epoch 0 is 197.54 and mean absolute error is 8.41. +For batch 0, loss is 6.23. +For batch 1, loss is 5.45. +For batch 2, loss is 5.24. +For batch 3, loss is 5.58. +For batch 4, loss is 5.60. +The average loss for epoch 1 is 5.60 and mean absolute error is 1.91. +For batch 0, loss is 4.79. +For batch 1, loss is 4.79. +For batch 2, loss is 4.83. +For batch 3, loss is 4.95. +For batch 4, loss is 5.17. +The average loss for epoch 2 is 5.17 and mean absolute error is 1.87. +For batch 0, loss is 6.40. +For batch 1, loss is 7.65. +For batch 2, loss is 9.47. +For batch 3, loss is 10.95. +For batch 4, loss is 12.86. +The average loss for epoch 3 is 12.86 and mean absolute error is 3.04. Restoring model weights from the end of the best epoch. Epoch 00004: early stopping - + ```
@@ -543,189 +536,188 @@ model.fit( CustomLearningRateScheduler(lr_schedule), ], ) - ```
``` Epoch 00000: Learning rate is 0.1000. -For batch 0, loss is 20.39. -For batch 1, loss is 467.92. -For batch 2, loss is 320.53. -For batch 3, loss is 242.66. -For batch 4, loss is 195.60. -The average loss for epoch 0 is 195.60 and mean absolute error is 8.23. +For batch 0, loss is 29.01. +For batch 1, loss is 407.35. +For batch 2, loss is 280.47. +For batch 3, loss is 213.56. +For batch 4, loss is 172.89. +The average loss for epoch 0 is 172.89 and mean absolute error is 8.08. ```
``` Epoch 00001: Learning rate is 0.1000. -For batch 0, loss is 6.62. -For batch 1, loss is 6.02. -For batch 2, loss is 6.85. -For batch 3, loss is 6.46. -For batch 4, loss is 6.33. -The average loss for epoch 1 is 6.33 and mean absolute error is 2.03. +For batch 0, loss is 7.80. +For batch 1, loss is 7.24. +For batch 2, loss is 6.51. +For batch 3, loss is 6.33. +For batch 4, loss is 5.78. +The average loss for epoch 1 is 5.78 and mean absolute error is 1.95. ```
``` Epoch 00002: Learning rate is 0.1000. -For batch 0, loss is 5.28. -For batch 1, loss is 5.29. -For batch 2, loss is 5.30. -For batch 3, loss is 4.84. -For batch 4, loss is 4.59. -The average loss for epoch 2 is 4.59 and mean absolute error is 1.73. +For batch 0, loss is 5.36. +For batch 1, loss is 5.65. +For batch 2, loss is 5.87. +For batch 3, loss is 6.36. +For batch 4, loss is 7.20. +The average loss for epoch 2 is 7.20 and mean absolute error is 2.14. ```
``` Epoch 00003: Learning rate is 0.0500. -For batch 0, loss is 5.20. -For batch 1, loss is 5.33. -For batch 2, loss is 4.70. -For batch 3, loss is 4.43. -For batch 4, loss is 4.29. -The average loss for epoch 3 is 4.29 and mean absolute error is 1.67. +For batch 0, loss is 22.85. +For batch 1, loss is 12.93. +For batch 2, loss is 9.43. +For batch 3, loss is 7.59. +For batch 4, loss is 7.07. +The average loss for epoch 3 is 7.07 and mean absolute error is 2.06. ```
``` Epoch 00004: Learning rate is 0.0500. -For batch 0, loss is 4.97. -For batch 1, loss is 4.64. -For batch 2, loss is 4.30. -For batch 3, loss is 3.95. -For batch 4, loss is 3.96. -The average loss for epoch 4 is 3.96 and mean absolute error is 1.57. +For batch 0, loss is 4.12. +For batch 1, loss is 3.77. +For batch 2, loss is 3.66. +For batch 3, loss is 4.14. +For batch 4, loss is 3.85. +The average loss for epoch 4 is 3.85 and mean absolute error is 1.54. ```
``` Epoch 00005: Learning rate is 0.0500. -For batch 0, loss is 3.18. -For batch 1, loss is 3.04. -For batch 2, loss is 3.31. -For batch 3, loss is 3.88. -For batch 4, loss is 3.83. -The average loss for epoch 5 is 3.83 and mean absolute error is 1.56. +For batch 0, loss is 3.56. +For batch 1, loss is 3.97. +For batch 2, loss is 4.39. +For batch 3, loss is 5.21. +For batch 4, loss is 5.80. +The average loss for epoch 5 is 5.80 and mean absolute error is 1.89. ```
``` Epoch 00006: Learning rate is 0.0100. -For batch 0, loss is 4.45. -For batch 1, loss is 4.82. -For batch 2, loss is 4.13. -For batch 3, loss is 3.70. -For batch 4, loss is 3.44. -The average loss for epoch 6 is 3.44 and mean absolute error is 1.47. +For batch 0, loss is 8.79. +For batch 1, loss is 8.06. +For batch 2, loss is 6.58. +For batch 3, loss is 5.60. +For batch 4, loss is 5.05. +The average loss for epoch 6 is 5.05 and mean absolute error is 1.80. ```
``` Epoch 00007: Learning rate is 0.0100. -For batch 0, loss is 2.93. -For batch 1, loss is 3.21. -For batch 2, loss is 3.49. -For batch 3, loss is 3.49. -For batch 4, loss is 3.60. -The average loss for epoch 7 is 3.60 and mean absolute error is 1.50. +For batch 0, loss is 3.49. +For batch 1, loss is 3.68. +For batch 2, loss is 3.90. +For batch 3, loss is 3.65. +For batch 4, loss is 3.81. +The average loss for epoch 7 is 3.81 and mean absolute error is 1.52. ```
``` Epoch 00008: Learning rate is 0.0100. -For batch 0, loss is 3.94. -For batch 1, loss is 3.30. -For batch 2, loss is 3.45. -For batch 3, loss is 3.68. -For batch 4, loss is 3.45. -The average loss for epoch 8 is 3.45 and mean absolute error is 1.44. +For batch 0, loss is 2.75. +For batch 1, loss is 2.51. +For batch 2, loss is 2.79. +For batch 3, loss is 2.87. +For batch 4, loss is 3.07. +The average loss for epoch 8 is 3.07 and mean absolute error is 1.39. ```
``` Epoch 00009: Learning rate is 0.0050. -For batch 0, loss is 4.47. -For batch 1, loss is 3.74. -For batch 2, loss is 3.70. -For batch 3, loss is 3.89. -For batch 4, loss is 3.64. -The average loss for epoch 9 is 3.64 and mean absolute error is 1.47. +For batch 0, loss is 3.10. +For batch 1, loss is 3.53. +For batch 2, loss is 3.39. +For batch 3, loss is 3.40. +For batch 4, loss is 3.44. +The average loss for epoch 9 is 3.44 and mean absolute error is 1.48. ```
``` Epoch 00010: Learning rate is 0.0050. -For batch 0, loss is 4.53. -For batch 1, loss is 4.10. -For batch 2, loss is 3.81. -For batch 3, loss is 3.74. -For batch 4, loss is 3.83. -The average loss for epoch 10 is 3.83 and mean absolute error is 1.50. +For batch 0, loss is 2.75. +For batch 1, loss is 3.01. +For batch 2, loss is 3.14. +For batch 3, loss is 3.21. +For batch 4, loss is 3.21. +The average loss for epoch 10 is 3.21 and mean absolute error is 1.40. ```
``` Epoch 00011: Learning rate is 0.0050. -For batch 0, loss is 2.82. -For batch 1, loss is 3.02. -For batch 2, loss is 3.16. -For batch 3, loss is 3.04. -For batch 4, loss is 2.90. -The average loss for epoch 11 is 2.90 and mean absolute error is 1.34. +For batch 0, loss is 3.47. +For batch 1, loss is 3.10. +For batch 2, loss is 3.71. +For batch 3, loss is 3.66. +For batch 4, loss is 3.51. +The average loss for epoch 11 is 3.51 and mean absolute error is 1.44. ```
``` Epoch 00012: Learning rate is 0.0010. -For batch 0, loss is 3.30. -For batch 1, loss is 3.17. -For batch 2, loss is 2.87. -For batch 3, loss is 2.87. -For batch 4, loss is 2.96. -The average loss for epoch 12 is 2.96 and mean absolute error is 1.33. +For batch 0, loss is 3.29. +For batch 1, loss is 3.31. +For batch 2, loss is 3.14. +For batch 3, loss is 3.01. +For batch 4, loss is 3.04. +The average loss for epoch 12 is 3.04 and mean absolute error is 1.35. ```
``` Epoch 00013: Learning rate is 0.0010. -For batch 0, loss is 2.84. -For batch 1, loss is 3.25. -For batch 2, loss is 3.20. -For batch 3, loss is 3.11. +For batch 0, loss is 2.66. +For batch 1, loss is 3.16. +For batch 2, loss is 3.16. +For batch 3, loss is 2.99. For batch 4, loss is 3.07. -The average loss for epoch 13 is 3.07 and mean absolute error is 1.37. +The average loss for epoch 13 is 3.07 and mean absolute error is 1.40. ```
``` Epoch 00014: Learning rate is 0.0010. -For batch 0, loss is 2.69. -For batch 1, loss is 2.70. -For batch 2, loss is 2.89. -For batch 3, loss is 2.69. -For batch 4, loss is 2.97. -The average loss for epoch 14 is 2.97 and mean absolute error is 1.33. - - +For batch 0, loss is 2.53. +For batch 1, loss is 3.05. +For batch 2, loss is 3.04. +For batch 3, loss is 3.36. +For batch 4, loss is 3.31. +The average loss for epoch 14 is 3.31 and mean absolute error is 1.39. + + ```
diff --git a/guides/training_with_built_in_methods.py b/guides/training_with_built_in_methods.py index a6b4f2e50d..7107801630 100644 --- a/guides/training_with_built_in_methods.py +++ b/guides/training_with_built_in_methods.py @@ -700,7 +700,7 @@ def __getitem__(self, idx): """ ## Using sample weighting and class weighting -With the default settings the weight of a sample is decided by its frequency +With the default settings the weight of a sample is decided by its frequency in the dataset. There are two methods to weight the data, independent of sample frequency: @@ -713,12 +713,12 @@ def __getitem__(self, idx): This is set by passing a dictionary to the `class_weight` argument to `Model.fit()`. This dictionary maps class indices to the weight that should -be used for samples belonging to this class. +be used for samples belonging to this class. This can be used to balance classes without resampling, or to train a model that has a gives more importance to a particular class. -For instance, if class "0" is half as represented as class "1" in your data, +For instance, if class "0" is half as represented as class "1" in your data, you could use `Model.fit(..., class_weight={0: 1., 1: 0.5})`. """ @@ -751,8 +751,9 @@ def __getitem__(self, idx): """ ### Sample weights + For fine grained control, or if you are not building a classifier, -you can use "sample weights". +you can use "sample weights". - When training from NumPy data: Pass the `sample_weight` argument to `Model.fit()`. diff --git a/guides/working_with_rnns.py b/guides/working_with_rnns.py index 8b35301f03..2cafbcd894 100644 --- a/guides/working_with_rnns.py +++ b/guides/working_with_rnns.py @@ -584,5 +584,5 @@ def get_config(self): will handle the sequence iteration for you. It's an incredibly powerful way to quickly prototype new kinds of RNNs (e.g. a LSTM variant). -For more details, please visit the [API docs](https://keras.io/api/layers/recurrent_layers/RNN/). +For more details, please visit the [API docs](https://keras.io/api/layers/recurrent_layers/rnn/). """ diff --git a/scripts/autogen.py b/scripts/autogen.py index 9ad7d9b2eb..4168a491eb 100644 --- a/scripts/autogen.py +++ b/scripts/autogen.py @@ -32,6 +32,7 @@ from master import MASTER import tutobooks +import generate_tf_guides EXAMPLES_GH_LOCATION = "keras-team/keras-io/blob/master/examples/" @@ -887,7 +888,7 @@ def get_working_dir(arg): ) cmd = sys.argv[1] - if cmd not in {"make", "serve", "add_example", "add_guide"}: + if cmd not in {"make", "serve", "add_example", "add_guide", "generate_tf_guides"}: raise ValueError("Must specify command `make`, `serve`, or `add_example`.") if cmd in {"add_example", "add_guide"}: if not len(sys.argv) in (3, 4): @@ -906,7 +907,10 @@ def get_working_dir(arg): working_dir=get_working_dir(sys.argv[3]) if len(sys.argv) == 4 else None, ) elif cmd == "add_guide": + tutobooks.MAX_LOC = 500 keras_io.add_guide( sys.argv[2], working_dir=get_working_dir(sys.argv[3]) if len(sys.argv) == 4 else None, ) + elif cmd == "generate_tf_guides": + generate_tf_guides.generate_tf_guides() diff --git a/scripts/generate_tf_guides.py b/scripts/generate_tf_guides.py new file mode 100644 index 0000000000..02cbb1378f --- /dev/null +++ b/scripts/generate_tf_guides.py @@ -0,0 +1,242 @@ +from pathlib import Path +import copy +import json +import re + +CONFIG = [ + { + "title": "The Functional API", + "source_name": "functional_api", + "target_name": "functional", + }, + { + "title": "Training & evaluation with the built-in methods", + "source_name": "training_with_built_in_methods", + "target_name": "train_and_evaluate", + }, + { + "title": "Making new Layers & Models via subclassing", + "source_name": "making_new_layers_and_models_via_subclassing", + "target_name": "custom_layers_and_models", + }, + { + "title": "Recurrent Neural Networks (RNN) with Keras", + "source_name": "working_with_rnns", + "target_name": "rnn", + }, + { + "title": "Masking and padding with Keras", + "source_name": "understanding_masking_and_padding", + "target_name": "masking_and_padding", + }, + { + "title": "Save and load Keras models", + "source_name": "serialization_and_saving", + "target_name": "save_and_serialize", + }, + { + "title": "Writing your own callbacks", + "source_name": "writing_your_own_callbacks", + "target_name": "custom_callback", + }, + { + "title": "Writing a training loop from scratch", + "source_name": "writing_a_training_loop_from_scratch", + "target_name": "writing_a_training_loop_from_scratch", + }, + { + "title": "Transfer learning & fine-tuning", + "source_name": "transfer_learning", + "target_name": "transfer_learning", + }, + { + "title": "The Sequential model", + "source_name": "sequential_model", + "target_name": "sequential_model", + }, + { + "title": "Customizing what happens in `fit()`", + "source_name": "customizing_what_happens_in_fit", + "target_name": "customizing_what_happens_in_fit", + }, +] + + +TF_BUTTONS_TEMPLATE = { + "cell_type": "markdown", + "metadata": {"colab_type": "text",}, + "source": [ + '\n', + " \n", + " \n", + " \n", + " \n", + "
\n", + ' View on TensorFlow.org\n', + " \n", + ' Run in Google Colab\n', + " \n", + ' View source on GitHub\n', + " \n", + ' Download notebook\n', + "
", + ], +} + + +TF_IPYNB_CELLS_TEMPLATE = [ + { + "cell_type": "markdown", + "metadata": {"colab_type": "text",}, + "source": ["##### Copyright 2020 The TensorFlow Authors."], + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": {"cellView": "form", "colab": {}, "colab_type": "code",}, + "outputs": [], + "source": [ + '#@title Licensed under the Apache License, Version 2.0 (the "License");\n', + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + '# distributed under the License is distributed on an "AS IS" BASIS,\n', + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.", + ], + }, + # Then: title + # Then: buttons +] + +TF_IPYNB_BASE = { + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "", # FILL ME + "private_outputs": True, + "provenance": [], + "toc_visible": True, + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3", + }, + }, + "nbformat": 4, + "nbformat_minor": 0, +} + + +def generate_single_tf_guide(source_dir, target_dir, title, source_name, target_name): + f = open(Path(source_dir) / (source_name + ".ipynb")) + original_ipynb = json.loads(f.read()) + f.close() + + # Skip first title cell + cells = original_ipynb["cells"][1:] + # Strip Keras tags + for cell in cells: + if cell["cell_type"] == "markdown": + new_lines = [] + lines = cell["source"] + num_lines = len(lines) + for i in range(num_lines - 1): + if lines[i].startswith('
') and lines[ + i + 1 + ].startswith("```"): + continue + elif lines[i].startswith("
") and lines[i - 1].startswith("```"): + continue + else: + new_lines.append(lines[i]) + if len(lines) >= 2 and not ( + lines[-1].startswith("
") and lines[-2].startswith("```") + ): + new_lines.append(lines[-1]) + if len(lines) < 2: + new_lines.append(lines[-1]) + cell["source"] = new_lines + + # Add header cells + header_cells = copy.deepcopy(TF_IPYNB_CELLS_TEMPLATE) + # Add title cell + header_cells.append( + { + "cell_type": "markdown", + "metadata": {"colab_type": "text"}, + "source": ["# " + title], + } + ) + buttons = copy.deepcopy(TF_BUTTONS_TEMPLATE) + for i in range(len(buttons["source"])): + buttons["source"][i] = buttons["source"][i].replace("TARGET_NAME", target_name) + buttons["source"][i] = buttons["source"][i].replace("SOURCE_NAME", source_name) + header_cells.append(buttons) + cells = header_cells + cells + + notebook = {} + for key in TF_IPYNB_BASE.keys(): + notebook[key] = TF_IPYNB_BASE[key] + notebook["metadata"]["colab"]["name"] = target_name + notebook["cells"] = cells + + f = open(Path(target_dir) / (target_name + ".ipynb"), "w") + json_st = json.dumps(notebook, indent=1, sort_keys=True) + + # Apply link conversion + json_st = json_st.replace( + "(/api/callbacks/", + "(https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/", + ) + json_st = json_st.replace( + "keras.io/api/layers/recurrent_layers/rnn/", + "https://www.tensorflow.org/api_docs/python/tf/keras/layers/RNN/", + ) + json_st = json_st.replace( + "https://keras.io/api/layers/recurrent_layers/gru/", + "https://www.tensorflow.org/api_docs/python/tf/keras/layers/GRU/", + ) + json_st = json_st.replace( + "https://keras.io/api/layers/recurrent_layers/lstm/", + "https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM/", + ) + json_st = json_st.replace( + "https://keras.io/api/layers/recurrent_layers/bidirectional/", + "https://www.tensorflow.org/api_docs/python/tf/keras/layers/Bidirectional/", + ) + json_st = json_st.replace( + "https://keras.io/api/callbacks/", + "https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/", + ) + for entry in CONFIG: + src = entry["source_name"] + dst = entry["target_name"] + json_st = re.sub( + r"(?is)]\((\s*)/guides/" + src, + "](https://www.tensorflow.org/guide/keras/" + dst, + json_st, + ) + json_st = re.sub( + r"(?is)(\s+)/guides/" + src, + "https://www.tensorflow.org/guide/keras/" + dst, + json_st, + ) + f.write(json_st) + f.close() + + +def generate_tf_guides(): + for entry in CONFIG: + generate_single_tf_guide( + source_dir="../guides/ipynb/", + target_dir="../tf/", + title=entry["title"], + source_name=entry["source_name"], + target_name=entry["target_name"], + ) diff --git a/scripts/layers_master.py b/scripts/layers_master.py index b4e2a3bb68..95702efda4 100644 --- a/scripts/layers_master.py +++ b/scripts/layers_master.py @@ -261,6 +261,11 @@ 'title': 'ConvLSTM2D layer', 'generate': ['tensorflow.keras.layers.ConvLSTM2D'] }, + { + 'path': 'rnn', + 'title': 'Base RNN layer', + 'generate': ['tensorflow.keras.layers.RNN'] + }, ] }, { diff --git a/scripts/tutobooks.py b/scripts/tutobooks.py index 06cfecbde6..5c73cbf76a 100644 --- a/scripts/tutobooks.py +++ b/scripts/tutobooks.py @@ -151,6 +151,8 @@ def py_to_nb(py_path, nb_path, fill_outputs=True): # Drop last newline char if source and not source[-1].strip(): source = source[:-1] + if source: + source[-1] = source[-1].rstrip() if tag == "shell": source = ["!" + l for l in source] cell_type = "code" diff --git a/tf/custom_callback.ipynb b/tf/custom_callback.ipynb new file mode 100644 index 0000000000..c2b5e07c52 --- /dev/null +++ b/tf/custom_callback.ipynb @@ -0,0 +1,630 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "cellView": "form", + "colab": {}, + "colab_type": "code" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# Writing your own callbacks" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# Introduction\n", + "\n", + "A callback is a powerful tool to customize the behavior of a Keras model during\n", + "training, evaluation, or inference. Examples include `tf.keras.callbacks.TensorBoard`\n", + "to visualize training progress and results with TensorBoard, or\n", + "`tf.keras.callbacks.ModelCheckpoint` to periodically save your model during training.\n", + "\n", + "In this guide, you will learn what a Keras callback is, what it can do, and how you can\n", + "build your own. We provide a few demos of simple callback applications to get you\n", + "started." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Keras callbacks overview\n", + "\n", + "All callbacks subclass the `keras.callbacks.Callback` class, and\n", + "override a set of methods called at various stages of training, testing, and\n", + "predicting. Callbacks are useful to get a view on internal states and statistics of\n", + "the model during training.\n", + "\n", + "You can pass a list of callbacks (as the keyword argument `callbacks`) to the following\n", + "model methods:\n", + "\n", + "- `keras.Model.fit()`\n", + "- `keras.Model.evaluate()`\n", + "- `keras.Model.predict()`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## An overview of callback methods\n", + "\n", + "### Global methods\n", + "\n", + "#### `on_(train|test|predict)_begin(self, logs=None)`\n", + "\n", + "Called at the beginning of `fit`/`evaluate`/`predict`.\n", + "\n", + "#### `on_(train|test|predict)_end(self, logs=None)`\n", + "\n", + "Called at the end of `fit`/`evaluate`/`predict`.\n", + "\n", + "### Batch-level methods for training/testing/predicting\n", + "\n", + "#### `on_(train|test|predict)_batch_begin(self, batch, logs=None)`\n", + "\n", + "Called right before processing a batch during training/testing/predicting.\n", + "\n", + "#### `on_(train|test|predict)_batch_end(self, batch, logs=None)`\n", + "\n", + "Called at the end of training/testing/predicting a batch. Within this method, `logs` is\n", + "a dict containing the metrics results.\n", + "\n", + "### Epoch-level methods (training only)\n", + "\n", + "#### `on_epoch_begin(self, epoch, logs=None)`\n", + "\n", + "Called at the beginning of an epoch during training.\n", + "\n", + "#### `on_epoch_end(self, epoch, logs=None)`\n", + "\n", + "Called at the end of an epoch during training." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## A basic example\n", + "\n", + "Let's take a look at a concrete example. To get started, let's import tensorflow and\n", + "define a simple Sequential Keras model:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Define the Keras model to add callbacks to\n", + "def get_model():\n", + " model = keras.Sequential()\n", + " model.add(keras.layers.Dense(1, input_dim=784))\n", + " model.compile(\n", + " optimizer=keras.optimizers.RMSprop(learning_rate=0.1),\n", + " loss=\"mean_squared_error\",\n", + " metrics=[\"mean_absolute_error\"],\n", + " )\n", + " return model\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Then, load the MNIST data for training and testing from Keras datasets API:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Load example MNIST data and pre-process it\n", + "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()\n", + "x_train = x_train.reshape(-1, 784).astype(\"float32\") / 255.0\n", + "x_test = x_test.reshape(-1, 784).astype(\"float32\") / 255.0\n", + "\n", + "# Limit the data to 1000 samples\n", + "x_train = x_train[:1000]\n", + "y_train = y_train[:1000]\n", + "x_test = x_test[:1000]\n", + "y_test = y_test[:1000]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Now, define a simple custom callback that logs:\n", + "\n", + "- When `fit`/`evaluate`/`predict` starts & ends\n", + "- When each epoch starts & ends\n", + "- When each training batch starts & ends\n", + "- When each evaluation (test) batch starts & ends\n", + "- When each inference (prediction) batch starts & ends" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomCallback(keras.callbacks.Callback):\n", + " def on_train_begin(self, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"Starting training; got log keys: {}\".format(keys))\n", + "\n", + " def on_train_end(self, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"Stop training; got log keys: {}\".format(keys))\n", + "\n", + " def on_epoch_begin(self, epoch, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"Start epoch {} of training; got log keys: {}\".format(epoch, keys))\n", + "\n", + " def on_epoch_end(self, epoch, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"End epoch {} of training; got log keys: {}\".format(epoch, keys))\n", + "\n", + " def on_test_begin(self, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"Start testing; got log keys: {}\".format(keys))\n", + "\n", + " def on_test_end(self, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"Stop testing; got log keys: {}\".format(keys))\n", + "\n", + " def on_predict_begin(self, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"Start predicting; got log keys: {}\".format(keys))\n", + "\n", + " def on_predict_end(self, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"Stop predicting; got log keys: {}\".format(keys))\n", + "\n", + " def on_train_batch_begin(self, batch, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"...Training: start of batch {}; got log keys: {}\".format(batch, keys))\n", + "\n", + " def on_train_batch_end(self, batch, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"...Training: end of batch {}; got log keys: {}\".format(batch, keys))\n", + "\n", + " def on_test_batch_begin(self, batch, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"...Evaluating: start of batch {}; got log keys: {}\".format(batch, keys))\n", + "\n", + " def on_test_batch_end(self, batch, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"...Evaluating: end of batch {}; got log keys: {}\".format(batch, keys))\n", + "\n", + " def on_predict_batch_begin(self, batch, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"...Predicting: start of batch {}; got log keys: {}\".format(batch, keys))\n", + "\n", + " def on_predict_batch_end(self, batch, logs=None):\n", + " keys = list(logs.keys())\n", + " print(\"...Predicting: end of batch {}; got log keys: {}\".format(batch, keys))\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's try it out:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = get_model()\n", + "model.fit(\n", + " x_train,\n", + " y_train,\n", + " batch_size=128,\n", + " epochs=1,\n", + " verbose=0,\n", + " validation_split=0.5,\n", + " callbacks=[CustomCallback()],\n", + ")\n", + "\n", + "res = model.evaluate(\n", + " x_test, y_test, batch_size=128, verbose=0, callbacks=[CustomCallback()]\n", + ")\n", + "\n", + "res = model.predict(x_test, batch_size=128, callbacks=[CustomCallback()])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Usage of `logs` dict\n", + "The `logs` dict contains the loss value, and all the metrics at the end of a batch or\n", + "epoch. Example includes the loss and mean absolute error." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class LossAndErrorPrintingCallback(keras.callbacks.Callback):\n", + " def on_train_batch_end(self, batch, logs=None):\n", + " print(\"For batch {}, loss is {:7.2f}.\".format(batch, logs[\"loss\"]))\n", + "\n", + " def on_test_batch_end(self, batch, logs=None):\n", + " print(\"For batch {}, loss is {:7.2f}.\".format(batch, logs[\"loss\"]))\n", + "\n", + " def on_epoch_end(self, epoch, logs=None):\n", + " print(\n", + " \"The average loss for epoch {} is {:7.2f} \"\n", + " \"and mean absolute error is {:7.2f}.\".format(\n", + " epoch, logs[\"loss\"], logs[\"mean_absolute_error\"]\n", + " )\n", + " )\n", + "\n", + "\n", + "model = get_model()\n", + "model.fit(\n", + " x_train,\n", + " y_train,\n", + " batch_size=128,\n", + " epochs=2,\n", + " verbose=0,\n", + " callbacks=[LossAndErrorPrintingCallback()],\n", + ")\n", + "\n", + "res = model.evaluate(\n", + " x_test,\n", + " y_test,\n", + " batch_size=128,\n", + " verbose=0,\n", + " callbacks=[LossAndErrorPrintingCallback()],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Usage of `self.model` attribute\n", + "\n", + "In addition to receiving log information when one of their methods is called,\n", + "callbacks have access to the model associated with the current round of\n", + "training/evaluation/inference: `self.model`.\n", + "\n", + "Here are of few of the things you can do with `self.model` in a callback:\n", + "\n", + "- Set `self.model.stop_training = True` to immediately interrupt training.\n", + "- Mutate hyperparameters of the optimizer (available as `self.model.optimizer`),\n", + "such as `self.model.optimizer.learning_rate`.\n", + "- Save the model at period intervals.\n", + "- Record the output of `model.predict()` on a few test samples at the end of each\n", + "epoch, to use as a sanity check during training.\n", + "- Extract visualizations of intermediate features at the end of each epoch, to monitor\n", + "what the model is learning over time.\n", + "- etc.\n", + "\n", + "Let's see this in action in a couple of examples." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Examples of Keras callback applications" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Early stopping at minimum loss\n", + "\n", + "This first example shows the creation of a `Callback` that stops training when the\n", + "minimum of loss has been reached, by setting the attribute `self.model.stop_training`\n", + "(boolean). Optionally, you can provide an argument `patience` to specify how many\n", + "epochs we should wait before stopping after having reached a local minimum.\n", + "\n", + "`tf.keras.callbacks.EarlyStopping` provides a more complete and general implementation." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "\n", + "class EarlyStoppingAtMinLoss(keras.callbacks.Callback):\n", + " \"\"\"Stop training when the loss is at its min, i.e. the loss stops decreasing.\n", + "\n", + " Arguments:\n", + " patience: Number of epochs to wait after min has been hit. After this\n", + " number of no improvement, training stops.\n", + " \"\"\"\n", + "\n", + " def __init__(self, patience=0):\n", + " super(EarlyStoppingAtMinLoss, self).__init__()\n", + " self.patience = patience\n", + " # best_weights to store the weights at which the minimum loss occurs.\n", + " self.best_weights = None\n", + "\n", + " def on_train_begin(self, logs=None):\n", + " # The number of epoch it has waited when loss is no longer minimum.\n", + " self.wait = 0\n", + " # The epoch the training stops at.\n", + " self.stopped_epoch = 0\n", + " # Initialize the best as infinity.\n", + " self.best = np.Inf\n", + "\n", + " def on_epoch_end(self, epoch, logs=None):\n", + " current = logs.get(\"loss\")\n", + " if np.less(current, self.best):\n", + " self.best = current\n", + " self.wait = 0\n", + " # Record the best weights if current results is better (less).\n", + " self.best_weights = self.model.get_weights()\n", + " else:\n", + " self.wait += 1\n", + " if self.wait >= self.patience:\n", + " self.stopped_epoch = epoch\n", + " self.model.stop_training = True\n", + " print(\"Restoring model weights from the end of the best epoch.\")\n", + " self.model.set_weights(self.best_weights)\n", + "\n", + " def on_train_end(self, logs=None):\n", + " if self.stopped_epoch > 0:\n", + " print(\"Epoch %05d: early stopping\" % (self.stopped_epoch + 1))\n", + "\n", + "\n", + "model = get_model()\n", + "model.fit(\n", + " x_train,\n", + " y_train,\n", + " batch_size=64,\n", + " steps_per_epoch=5,\n", + " epochs=30,\n", + " verbose=0,\n", + " callbacks=[LossAndErrorPrintingCallback(), EarlyStoppingAtMinLoss()],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Learning rate scheduling\n", + "\n", + "In this example, we show how a custom Callback can be used to dynamically change the\n", + "learning rate of the optimizer during the course of training.\n", + "\n", + "See `callbacks.LearningRateScheduler` for a more general implementations." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomLearningRateScheduler(keras.callbacks.Callback):\n", + " \"\"\"Learning rate scheduler which sets the learning rate according to schedule.\n", + "\n", + " Arguments:\n", + " schedule: a function that takes an epoch index\n", + " (integer, indexed from 0) and current learning rate\n", + " as inputs and returns a new learning rate as output (float).\n", + " \"\"\"\n", + "\n", + " def __init__(self, schedule):\n", + " super(CustomLearningRateScheduler, self).__init__()\n", + " self.schedule = schedule\n", + "\n", + " def on_epoch_begin(self, epoch, logs=None):\n", + " if not hasattr(self.model.optimizer, \"lr\"):\n", + " raise ValueError('Optimizer must have a \"lr\" attribute.')\n", + " # Get the current learning rate from model's optimizer.\n", + " lr = float(tf.keras.backend.get_value(self.model.optimizer.learning_rate))\n", + " # Call schedule function to get the scheduled learning rate.\n", + " scheduled_lr = self.schedule(epoch, lr)\n", + " # Set the value back to the optimizer before this epoch starts\n", + " tf.keras.backend.set_value(self.model.optimizer.lr, scheduled_lr)\n", + " print(\"\\nEpoch %05d: Learning rate is %6.4f.\" % (epoch, scheduled_lr))\n", + "\n", + "\n", + "LR_SCHEDULE = [\n", + " # (epoch to start, learning rate) tuples\n", + " (3, 0.05),\n", + " (6, 0.01),\n", + " (9, 0.005),\n", + " (12, 0.001),\n", + "]\n", + "\n", + "\n", + "def lr_schedule(epoch, lr):\n", + " \"\"\"Helper function to retrieve the scheduled learning rate based on epoch.\"\"\"\n", + " if epoch < LR_SCHEDULE[0][0] or epoch > LR_SCHEDULE[-1][0]:\n", + " return lr\n", + " for i in range(len(LR_SCHEDULE)):\n", + " if epoch == LR_SCHEDULE[i][0]:\n", + " return LR_SCHEDULE[i][1]\n", + " return lr\n", + "\n", + "\n", + "model = get_model()\n", + "model.fit(\n", + " x_train,\n", + " y_train,\n", + " batch_size=64,\n", + " steps_per_epoch=5,\n", + " epochs=15,\n", + " verbose=0,\n", + " callbacks=[\n", + " LossAndErrorPrintingCallback(),\n", + " CustomLearningRateScheduler(lr_schedule),\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Built-in Keras callbacks\n", + "Be sure to check out the existing Keras callbacks by\n", + "reading the [API docs](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/).\n", + "Applications include logging to CSV, saving\n", + "the model, visualizing metrics in TensorBoard, and a lot more!" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "custom_callback", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/tf/custom_layers_and_models.ipynb b/tf/custom_layers_and_models.ipynb new file mode 100644 index 0000000000..b15ab6e2c8 --- /dev/null +++ b/tf/custom_layers_and_models.ipynb @@ -0,0 +1,1239 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "cellView": "form", + "colab": {}, + "colab_type": "code" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# Making new Layers & Models via subclassing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## The `Layer` class: the combination of state (weights) and some computation\n", + "\n", + "One of the central abstraction in Keras is the `Layer` class. A layer\n", + "encapsulates both a state (the layer's \"weights\") and a transformation from\n", + "inputs to outputs (a \"call\", the layer's forward pass).\n", + "\n", + "Here's a densely-connected layer. It has a state: the variables `w` and `b`." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class Linear(keras.layers.Layer):\n", + " def __init__(self, units=32, input_dim=32):\n", + " super(Linear, self).__init__()\n", + " w_init = tf.random_normal_initializer()\n", + " self.w = tf.Variable(\n", + " initial_value=w_init(shape=(input_dim, units), dtype=\"float32\"),\n", + " trainable=True,\n", + " )\n", + " b_init = tf.zeros_initializer()\n", + " self.b = tf.Variable(\n", + " initial_value=b_init(shape=(units,), dtype=\"float32\"), trainable=True\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " return tf.matmul(inputs, self.w) + self.b\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "You would use a layer by calling it on some tensor input(s), much like a Python\n", + "function." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "x = tf.ones((2, 2))\n", + "linear_layer = Linear(4, 2)\n", + "y = linear_layer(x)\n", + "print(y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Note that the weights `w` and `b` are automatically tracked by the layer upon\n", + "being set as layer attributes:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "assert linear_layer.weights == [linear_layer.w, linear_layer.b]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Note you also have access to a quicker shortcut for adding weight to a layer:\n", + "the `add_weight()` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class Linear(keras.layers.Layer):\n", + " def __init__(self, units=32, input_dim=32):\n", + " super(Linear, self).__init__()\n", + " self.w = self.add_weight(\n", + " shape=(input_dim, units), initializer=\"random_normal\", trainable=True\n", + " )\n", + " self.b = self.add_weight(shape=(units,), initializer=\"zeros\", trainable=True)\n", + "\n", + " def call(self, inputs):\n", + " return tf.matmul(inputs, self.w) + self.b\n", + "\n", + "\n", + "x = tf.ones((2, 2))\n", + "linear_layer = Linear(4, 2)\n", + "y = linear_layer(x)\n", + "print(y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Layers can have non-trainable weights\n", + "\n", + "Besides trainable weights, you can add non-trainable weights to a layer as\n", + "well. Such weights are meant not to be taken into account during\n", + "backpropagation, when you are training the layer.\n", + "\n", + "Here's how to add and use a non-trainable weight:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class ComputeSum(keras.layers.Layer):\n", + " def __init__(self, input_dim):\n", + " super(ComputeSum, self).__init__()\n", + " self.total = tf.Variable(initial_value=tf.zeros((input_dim,)), trainable=False)\n", + "\n", + " def call(self, inputs):\n", + " self.total.assign_add(tf.reduce_sum(inputs, axis=0))\n", + " return self.total\n", + "\n", + "\n", + "x = tf.ones((2, 2))\n", + "my_sum = ComputeSum(2)\n", + "y = my_sum(x)\n", + "print(y.numpy())\n", + "y = my_sum(x)\n", + "print(y.numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "It's part of `layer.weights`, but it gets categorized as a non-trainable weight:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "print(\"weights:\", len(my_sum.weights))\n", + "print(\"non-trainable weights:\", len(my_sum.non_trainable_weights))\n", + "\n", + "# It's not included in the trainable weights:\n", + "print(\"trainable_weights:\", my_sum.trainable_weights)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Best practice: deferring weight creation until the shape of the inputs is known\n", + "\n", + "Our `Linear` layer above took an `input_dim `argument that was used to compute\n", + "the shape of the weights `w` and `b` in `__init__()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class Linear(keras.layers.Layer):\n", + " def __init__(self, units=32, input_dim=32):\n", + " super(Linear, self).__init__()\n", + " self.w = self.add_weight(\n", + " shape=(input_dim, units), initializer=\"random_normal\", trainable=True\n", + " )\n", + " self.b = self.add_weight(shape=(units,), initializer=\"zeros\", trainable=True)\n", + "\n", + " def call(self, inputs):\n", + " return tf.matmul(inputs, self.w) + self.b\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "In many cases, you may not know in advance the size of your inputs, and you\n", + "would like to lazily create weights when that value becomes known, some time\n", + "after instantiating the layer.\n", + "\n", + "In the Keras API, we recommend creating layer weights in the `build(self,\n", + "inputs_shape)` method of your layer. Like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class Linear(keras.layers.Layer):\n", + " def __init__(self, units=32):\n", + " super(Linear, self).__init__()\n", + " self.units = units\n", + "\n", + " def build(self, input_shape):\n", + " self.w = self.add_weight(\n", + " shape=(input_shape[-1], self.units),\n", + " initializer=\"random_normal\",\n", + " trainable=True,\n", + " )\n", + " self.b = self.add_weight(\n", + " shape=(self.units,), initializer=\"random_normal\", trainable=True\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " return tf.matmul(inputs, self.w) + self.b\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "The `__call__()` method of your layer will automatically run build the first time\n", + "it is called. You now have a layer that's lazy and thus easier to use:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# At instantiation, we don't know on what inputs this is going to get called\n", + "linear_layer = Linear(32)\n", + "\n", + "# The layer's weights are created dynamically the first time the layer is called\n", + "y = linear_layer(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Layers are recursively composable\n", + "\n", + "If you assign a Layer instance as attribute of another Layer, the outer layer\n", + "will start tracking the weights of the inner layer.\n", + "\n", + "We recommend creating such sublayers in the `__init__()` method (since the\n", + "sublayers will typically have a build method, they will be built when the\n", + "outer layer gets built)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Let's assume we are reusing the Linear class\n", + "# with a `build` method that we defined above.\n", + "\n", + "\n", + "class MLPBlock(keras.layers.Layer):\n", + " def __init__(self):\n", + " super(MLPBlock, self).__init__()\n", + " self.linear_1 = Linear(32)\n", + " self.linear_2 = Linear(32)\n", + " self.linear_3 = Linear(1)\n", + "\n", + " def call(self, inputs):\n", + " x = self.linear_1(inputs)\n", + " x = tf.nn.relu(x)\n", + " x = self.linear_2(x)\n", + " x = tf.nn.relu(x)\n", + " return self.linear_3(x)\n", + "\n", + "\n", + "mlp = MLPBlock()\n", + "y = mlp(tf.ones(shape=(3, 64))) # The first call to the `mlp` will create the weights\n", + "print(\"weights:\", len(mlp.weights))\n", + "print(\"trainable weights:\", len(mlp.trainable_weights))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## The `add_loss()` method\n", + "\n", + "When writing the `call()` method of a layer, you can create loss tensors that\n", + "you will want to use later, when writing your training loop. This is doable by\n", + "calling `self.add_loss(value)`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# A layer that creates an activity regularization loss\n", + "class ActivityRegularizationLayer(keras.layers.Layer):\n", + " def __init__(self, rate=1e-2):\n", + " super(ActivityRegularizationLayer, self).__init__()\n", + " self.rate = rate\n", + "\n", + " def call(self, inputs):\n", + " self.add_loss(self.rate * tf.reduce_sum(inputs))\n", + " return inputs\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "These losses (including those created by any inner layer) can be retrieved via\n", + "`layer.losses`. This property is reset at the start of every `__call__()` to\n", + "the top-level layer, so that `layer.losses` always contains the loss values\n", + "created during the last forward pass." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class OuterLayer(keras.layers.Layer):\n", + " def __init__(self):\n", + " super(OuterLayer, self).__init__()\n", + " self.activity_reg = ActivityRegularizationLayer(1e-2)\n", + "\n", + " def call(self, inputs):\n", + " return self.activity_reg(inputs)\n", + "\n", + "\n", + "layer = OuterLayer()\n", + "assert len(layer.losses) == 0 # No losses yet since the layer has never been called\n", + "\n", + "_ = layer(tf.zeros(1, 1))\n", + "assert len(layer.losses) == 1 # We created one loss value\n", + "\n", + "# `layer.losses` gets reset at the start of each __call__\n", + "_ = layer(tf.zeros(1, 1))\n", + "assert len(layer.losses) == 1 # This is the loss created during the call above" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "In addition, the `loss` property also contains regularization losses created\n", + "for the weights of any inner layer:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class OuterLayerWithKernelRegularizer(keras.layers.Layer):\n", + " def __init__(self):\n", + " super(OuterLayerWithKernelRegularizer, self).__init__()\n", + " self.dense = keras.layers.Dense(\n", + " 32, kernel_regularizer=tf.keras.regularizers.l2(1e-3)\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " return self.dense(inputs)\n", + "\n", + "\n", + "layer = OuterLayerWithKernelRegularizer()\n", + "_ = layer(tf.zeros((1, 1)))\n", + "\n", + "# This is `1e-3 * sum(layer.dense.kernel ** 2)`,\n", + "# created by the `kernel_regularizer` above.\n", + "print(layer.losses)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "These losses are meant to be taken into account when writing training loops,\n", + "like this:\n", + "\n", + "```python\n", + "# Instantiate an optimizer.\n", + "optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)\n", + "loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", + "\n", + "# Iterate over the batches of a dataset.\n", + "for x_batch_train, y_batch_train in train_dataset:\n", + " with tf.GradientTape() as tape:\n", + " logits = layer(x_batch_train) # Logits for this minibatch\n", + " # Loss value for this minibatch\n", + " loss_value = loss_fn(y_batch_train, logits)\n", + " # Add extra losses created during this forward pass:\n", + " loss_value += sum(model.losses)\n", + "\n", + " grads = tape.gradient(loss_value, model.trainable_weights)\n", + " optimizer.apply_gradients(zip(grads, model.trainable_weights))\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "For a detailed guide about writing training loops, see the\n", + "[guide to writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch/).\n", + "\n", + "These losses also work seamlessly with `fit()` (they get automatically summed\n", + "and added to the main loss, if any):" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "inputs = keras.Input(shape=(3,))\n", + "outputs = ActivityRegularizationLayer()(inputs)\n", + "model = keras.Model(inputs, outputs)\n", + "\n", + "# If there is a loss passed in `compile`, thee regularization\n", + "# losses get added to it\n", + "model.compile(optimizer=\"adam\", loss=\"mse\")\n", + "model.fit(np.random.random((2, 3)), np.random.random((2, 3)))\n", + "\n", + "# It's also possible not to pass any loss in `compile`,\n", + "# since the model already has a loss to minimize, via the `add_loss`\n", + "# call during the forward pass!\n", + "model.compile(optimizer=\"adam\")\n", + "model.fit(np.random.random((2, 3)), np.random.random((2, 3)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## The `add_metric()` method\n", + "\n", + "Similarly to `add_loss()`, layers also have an `add_metric()` method\n", + "for tracking the moving average of a quantity during training.\n", + "\n", + "Consider the following layer: a \"logistic endpoint\" layer.\n", + "It takes as inputs predictions & targets, it computes a loss which it tracks\n", + "via `add_loss()`, and it computes an accuracy scalar, which it tracks via\n", + "`add_metric()`." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class LogisticEndpoint(keras.layers.Layer):\n", + " def __init__(self, name=None):\n", + " super(LogisticEndpoint, self).__init__(name=name)\n", + " self.loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)\n", + " self.accuracy_fn = keras.metrics.BinaryAccuracy()\n", + "\n", + " def call(self, targets, logits, sample_weights=None):\n", + " # Compute the training-time loss value and add it\n", + " # to the layer using `self.add_loss()`.\n", + " loss = self.loss_fn(targets, logits, sample_weights)\n", + " self.add_loss(loss)\n", + "\n", + " # Log accuracy as a metric and add it\n", + " # to the layer using `self.add_metric()`.\n", + " acc = self.accuracy_fn(targets, logits, sample_weights)\n", + " self.add_metric(acc, name=\"accuracy\")\n", + "\n", + " # Return the inference-time prediction tensor (for `.predict()`).\n", + " return tf.nn.softmax(logits)\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Metrics tracked in this way are accessible via `layer.metrics`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "layer = LogisticEndpoint()\n", + "\n", + "targets = tf.ones((2, 2))\n", + "logits = tf.ones((2, 2))\n", + "y = layer(targets, logits)\n", + "\n", + "print(\"layer.metrics:\", layer.metrics)\n", + "print(\"current accuracy value:\", float(layer.metrics[0].result()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Just like for `add_loss()`, these metrics are tracked by `fit()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(3,), name=\"inputs\")\n", + "targets = keras.Input(shape=(10,), name=\"targets\")\n", + "logits = keras.layers.Dense(10)(inputs)\n", + "predictions = LogisticEndpoint(name=\"predictions\")(logits, targets)\n", + "\n", + "model = keras.Model(inputs=[inputs, targets], outputs=predictions)\n", + "model.compile(optimizer=\"adam\")\n", + "\n", + "data = {\n", + " \"inputs\": np.random.random((3, 3)),\n", + " \"targets\": np.random.random((3, 10)),\n", + "}\n", + "model.fit(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## You can optionally enable serialization on your layers\n", + "\n", + "If you need your custom layers to be serializable as part of a\n", + "[Functional model](https://www.tensorflow.org/guide/keras/functional/), you can optionally implement a `get_config()`\n", + "method:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class Linear(keras.layers.Layer):\n", + " def __init__(self, units=32):\n", + " super(Linear, self).__init__()\n", + " self.units = units\n", + "\n", + " def build(self, input_shape):\n", + " self.w = self.add_weight(\n", + " shape=(input_shape[-1], self.units),\n", + " initializer=\"random_normal\",\n", + " trainable=True,\n", + " )\n", + " self.b = self.add_weight(\n", + " shape=(self.units,), initializer=\"random_normal\", trainable=True\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " return tf.matmul(inputs, self.w) + self.b\n", + "\n", + " def get_config(self):\n", + " return {\"units\": self.units}\n", + "\n", + "\n", + "# Now you can recreate the layer from its config:\n", + "layer = Linear(64)\n", + "config = layer.get_config()\n", + "print(config)\n", + "new_layer = Linear.from_config(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Note that the `__init__()` method of the base `Layer` class takes some keyword\n", + "arguments, in particular a `name` and a `dtype`. It's good practice to pass\n", + "these arguments to the parent class in `__init__()` and to include them in the\n", + "layer config:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class Linear(keras.layers.Layer):\n", + " def __init__(self, units=32, **kwargs):\n", + " super(Linear, self).__init__(**kwargs)\n", + " self.units = units\n", + "\n", + " def build(self, input_shape):\n", + " self.w = self.add_weight(\n", + " shape=(input_shape[-1], self.units),\n", + " initializer=\"random_normal\",\n", + " trainable=True,\n", + " )\n", + " self.b = self.add_weight(\n", + " shape=(self.units,), initializer=\"random_normal\", trainable=True\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " return tf.matmul(inputs, self.w) + self.b\n", + "\n", + " def get_config(self):\n", + " config = super(Linear, self).get_config()\n", + " config.update({\"units\": self.units})\n", + " return config\n", + "\n", + "\n", + "layer = Linear(64)\n", + "config = layer.get_config()\n", + "print(config)\n", + "new_layer = Linear.from_config(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "If you need more flexibility when deserializing the layer from its config, you\n", + "can also override the `from_config()` class method. This is the base\n", + "implementation of `from_config()`:\n", + "\n", + "```python\n", + "def from_config(cls, config):\n", + " return cls(**config)\n", + "```\n", + "\n", + "To learn more about serialization and saving, see the complete\n", + "[guide to saving and serializing models](https://www.tensorflow.org/guide/keras/save_and_serialize/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Privileged `training` argument in the `call()` method\n", + "\n", + "Some layers, in particular the `BatchNormalization` layer and the `Dropout`\n", + "layer, have different behaviors during training and inference. For such\n", + "layers, it is standard practice to expose a `training` (boolean) argument in\n", + "the `call()` method.\n", + "\n", + "By exposing this argument in `call()`, you enable the built-in training and\n", + "evaluation loops (e.g. `fit()`) to correctly use the layer in training and\n", + "inference." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomDropout(keras.layers.Layer):\n", + " def __init__(self, rate, **kwargs):\n", + " super(CustomDropout, self).__init__(**kwargs)\n", + " self.rate = rate\n", + "\n", + " def call(self, inputs, training=None):\n", + " if training:\n", + " return tf.nn.dropout(inputs, rate=self.rate)\n", + " return inputs\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Privileged `mask` argument in the `call()` method\n", + "\n", + "The other privileged argument supported by `call()` is the `mask` argument.\n", + "\n", + "You will find it in all Keras RNN layers. A mask is a boolean tensor (one\n", + "boolean value per timestep in the input) used to skip certain input timesteps\n", + "when processing timeseries data.\n", + "\n", + "Keras will automatically pass the correct `mask` argument to `__call__()` for\n", + "layers that support it, when a mask is generated by a prior layer.\n", + "Mask-generating layers are the `Embedding`\n", + "layer configured with `mask_zero=True`, and the `Masking` layer.\n", + "\n", + "To learn more about masking and how to write masking-enabled layers, please\n", + "check out the guide\n", + "[\"understanding padding and masking\"](https://www.tensorflow.org/guide/keras/masking_and_padding/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## The `Model` class\n", + "\n", + "In general, you will use the `Layer` class to define inner computation blocks,\n", + "and will use the `Model` class to define the outer model -- the object you\n", + "will train.\n", + "\n", + "For instance, in a ResNet50 model, you would have several ResNet blocks\n", + "subclassing `Layer`, and a single `Model` encompassing the entire ResNet50\n", + "network.\n", + "\n", + "The `Model` class has the same API as `Layer`, with the following differences:\n", + "\n", + "- It exposes built-in training, evaluation, and prediction loops\n", + "(`model.fit()`, `model.evaluate()`, `model.predict()`).\n", + "- It exposes the list of its inner layers, via the `model.layers` property.\n", + "- It exposes saving and serialization APIs (`save()`, `save_weights()`...)\n", + "\n", + "Effectively, the `Layer` class corresponds to what we refer to in the\n", + "literature as a \"layer\" (as in \"convolution layer\" or \"recurrent layer\") or as\n", + "a \"block\" (as in \"ResNet block\" or \"Inception block\").\n", + "\n", + "Meanwhile, the `Model` class corresponds to what is referred to in the\n", + "literature as a \"model\" (as in \"deep learning model\") or as a \"network\" (as in\n", + "\"deep neural network\").\n", + "\n", + "So if you're wondering, \"should I use the `Layer` class or the `Model` class?\",\n", + "ask yourself: will I need to call `fit()` on it? Will I need to call `save()`\n", + "on it? If so, go with `Model`. If not (either because your class is just a block\n", + "in a bigger system, or because you are writing training & saving code yourself),\n", + "use `Layer`.\n", + "\n", + "For instance, we could take our mini-resnet example above, and use it to build\n", + "a `Model` that we could train with `fit()`, and that we could save with\n", + "`save_weights()`:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "```python\n", + "class ResNet(tf.keras.Model):\n", + "\n", + " def __init__(self):\n", + " super(ResNet, self).__init__()\n", + " self.block_1 = ResNetBlock()\n", + " self.block_2 = ResNetBlock()\n", + " self.global_pool = layers.GlobalAveragePooling2D()\n", + " self.classifier = Dense(num_classes)\n", + "\n", + " def call(self, inputs):\n", + " x = self.block_1(inputs)\n", + " x = self.block_2(x)\n", + " x = self.global_pool(x)\n", + " return self.classifier(x)\n", + "\n", + "\n", + "resnet = ResNet()\n", + "dataset = ...\n", + "resnet.fit(dataset, epochs=10)\n", + "resnet.save(filepath)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Putting it all together: an end-to-end example\n", + "\n", + "Here's what you've learned so far:\n", + "\n", + "- A `Layer` encapsulate a state (created in `__init__()` or `build()`) and some\n", + "computation (defined in `call()`).\n", + "- Layers can be recursively nested to create new, bigger computation blocks.\n", + "- Layers can create and track losses (typically regularization losses) as well\n", + "as metrics, via `add_loss()` and `add_metric()`\n", + "- The outer container, the thing you want to train, is a `Model`. A `Model` is\n", + "just like a `Layer`, but with added training and serialization utilities.\n", + "\n", + "Let's put all of these things together into an end-to-end example: we're going\n", + "to implement a Variational AutoEncoder (VAE). We'll train it on MNIST digits.\n", + "\n", + "Our VAE will be a subclass of `Model`, built as a nested composition of layers\n", + "that subclass `Layer`. It will feature a regularization loss (KL divergence)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "from tensorflow.keras import layers\n", + "\n", + "\n", + "class Sampling(layers.Layer):\n", + " \"\"\"Uses (z_mean, z_log_var) to sample z, the vector encoding a digit.\"\"\"\n", + "\n", + " def call(self, inputs):\n", + " z_mean, z_log_var = inputs\n", + " batch = tf.shape(z_mean)[0]\n", + " dim = tf.shape(z_mean)[1]\n", + " epsilon = tf.keras.backend.random_normal(shape=(batch, dim))\n", + " return z_mean + tf.exp(0.5 * z_log_var) * epsilon\n", + "\n", + "\n", + "class Encoder(layers.Layer):\n", + " \"\"\"Maps MNIST digits to a triplet (z_mean, z_log_var, z).\"\"\"\n", + "\n", + " def __init__(self, latent_dim=32, intermediate_dim=64, name=\"encoder\", **kwargs):\n", + " super(Encoder, self).__init__(name=name, **kwargs)\n", + " self.dense_proj = layers.Dense(intermediate_dim, activation=\"relu\")\n", + " self.dense_mean = layers.Dense(latent_dim)\n", + " self.dense_log_var = layers.Dense(latent_dim)\n", + " self.sampling = Sampling()\n", + "\n", + " def call(self, inputs):\n", + " x = self.dense_proj(inputs)\n", + " z_mean = self.dense_mean(x)\n", + " z_log_var = self.dense_log_var(x)\n", + " z = self.sampling((z_mean, z_log_var))\n", + " return z_mean, z_log_var, z\n", + "\n", + "\n", + "class Decoder(layers.Layer):\n", + " \"\"\"Converts z, the encoded digit vector, back into a readable digit.\"\"\"\n", + "\n", + " def __init__(self, original_dim, intermediate_dim=64, name=\"decoder\", **kwargs):\n", + " super(Decoder, self).__init__(name=name, **kwargs)\n", + " self.dense_proj = layers.Dense(intermediate_dim, activation=\"relu\")\n", + " self.dense_output = layers.Dense(original_dim, activation=\"sigmoid\")\n", + "\n", + " def call(self, inputs):\n", + " x = self.dense_proj(inputs)\n", + " return self.dense_output(x)\n", + "\n", + "\n", + "class VariationalAutoEncoder(keras.Model):\n", + " \"\"\"Combines the encoder and decoder into an end-to-end model for training.\"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " original_dim,\n", + " intermediate_dim=64,\n", + " latent_dim=32,\n", + " name=\"autoencoder\",\n", + " **kwargs\n", + " ):\n", + " super(VariationalAutoEncoder, self).__init__(name=name, **kwargs)\n", + " self.original_dim = original_dim\n", + " self.encoder = Encoder(latent_dim=latent_dim, intermediate_dim=intermediate_dim)\n", + " self.decoder = Decoder(original_dim, intermediate_dim=intermediate_dim)\n", + "\n", + " def call(self, inputs):\n", + " z_mean, z_log_var, z = self.encoder(inputs)\n", + " reconstructed = self.decoder(z)\n", + " # Add KL divergence regularization loss.\n", + " kl_loss = -0.5 * tf.reduce_mean(\n", + " z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1\n", + " )\n", + " self.add_loss(kl_loss)\n", + " return reconstructed\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's write a simple training loop on MNIST:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "original_dim = 784\n", + "vae = VariationalAutoEncoder(original_dim, 64, 32)\n", + "\n", + "optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)\n", + "mse_loss_fn = tf.keras.losses.MeanSquaredError()\n", + "\n", + "loss_metric = tf.keras.metrics.Mean()\n", + "\n", + "(x_train, _), _ = tf.keras.datasets.mnist.load_data()\n", + "x_train = x_train.reshape(60000, 784).astype(\"float32\") / 255\n", + "\n", + "train_dataset = tf.data.Dataset.from_tensor_slices(x_train)\n", + "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", + "\n", + "epochs = 2\n", + "\n", + "# Iterate over epochs.\n", + "for epoch in range(epochs):\n", + " print(\"Start of epoch %d\" % (epoch,))\n", + "\n", + " # Iterate over the batches of the dataset.\n", + " for step, x_batch_train in enumerate(train_dataset):\n", + " with tf.GradientTape() as tape:\n", + " reconstructed = vae(x_batch_train)\n", + " # Compute reconstruction loss\n", + " loss = mse_loss_fn(x_batch_train, reconstructed)\n", + " loss += sum(vae.losses) # Add KLD regularization loss\n", + "\n", + " grads = tape.gradient(loss, vae.trainable_weights)\n", + " optimizer.apply_gradients(zip(grads, vae.trainable_weights))\n", + "\n", + " loss_metric(loss)\n", + "\n", + " if step % 100 == 0:\n", + " print(\"step %d: mean loss = %.4f\" % (step, loss_metric.result()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Note that since the VAE is subclassing `Model`, it features built-in training\n", + "loops. So you could also have trained it like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "vae = VariationalAutoEncoder(784, 64, 32)\n", + "\n", + "optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)\n", + "\n", + "vae.compile(optimizer, loss=tf.keras.losses.MeanSquaredError())\n", + "vae.fit(x_train, x_train, epochs=2, batch_size=64)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Beyond object-oriented development: the Functional API\n", + "\n", + "Was this example too much object-oriented development for you? You can also\n", + "build models using the [Functional API](https://www.tensorflow.org/guide/keras/functional/). Importantly,\n", + "choosing one style or another does not prevent you from leveraging components\n", + "written in the other style: you can always mix-and-match.\n", + "\n", + "For instance, the Functional API example below reuses the same `Sampling` layer\n", + "we defined in the example above:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "original_dim = 784\n", + "intermediate_dim = 64\n", + "latent_dim = 32\n", + "\n", + "# Define encoder model.\n", + "original_inputs = tf.keras.Input(shape=(original_dim,), name=\"encoder_input\")\n", + "x = layers.Dense(intermediate_dim, activation=\"relu\")(original_inputs)\n", + "z_mean = layers.Dense(latent_dim, name=\"z_mean\")(x)\n", + "z_log_var = layers.Dense(latent_dim, name=\"z_log_var\")(x)\n", + "z = Sampling()((z_mean, z_log_var))\n", + "encoder = tf.keras.Model(inputs=original_inputs, outputs=z, name=\"encoder\")\n", + "\n", + "# Define decoder model.\n", + "latent_inputs = tf.keras.Input(shape=(latent_dim,), name=\"z_sampling\")\n", + "x = layers.Dense(intermediate_dim, activation=\"relu\")(latent_inputs)\n", + "outputs = layers.Dense(original_dim, activation=\"sigmoid\")(x)\n", + "decoder = tf.keras.Model(inputs=latent_inputs, outputs=outputs, name=\"decoder\")\n", + "\n", + "# Define VAE model.\n", + "outputs = decoder(z)\n", + "vae = tf.keras.Model(inputs=original_inputs, outputs=outputs, name=\"vae\")\n", + "\n", + "# Add KL divergence regularization loss.\n", + "kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)\n", + "vae.add_loss(kl_loss)\n", + "\n", + "# Train.\n", + "optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)\n", + "vae.compile(optimizer, loss=tf.keras.losses.MeanSquaredError())\n", + "vae.fit(x_train, x_train, epochs=3, batch_size=64)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "For more information, make sure to read the [Functional API guide](https://www.tensorflow.org/guide/keras/functional/)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "custom_layers_and_models", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/tf/customizing_what_happens_in_fit.ipynb b/tf/customizing_what_happens_in_fit.ipynb new file mode 100644 index 0000000000..fa16e1dfc4 --- /dev/null +++ b/tf/customizing_what_happens_in_fit.ipynb @@ -0,0 +1,610 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "cellView": "form", + "colab": {}, + "colab_type": "code" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# Customizing what happens in `fit()`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Introduction\n", + "\n", + "When you're doing supervised learning, you can use `fit()` and everything works\n", + "smoothly.\n", + "\n", + "When you need to write your own training loop from scratch, you can use the\n", + "`GradientTape` and take control of every little detail.\n", + "\n", + "But what if you need a custom training algorithm, but you still want to benefit from\n", + "the convenient features of `fit()`, such as callbacks, built-in distribution support,\n", + "or step fusing?\n", + "\n", + "A core principle of Keras is **progressive disclosure of complexity**. You should\n", + "always be able to get into lower-level workflows in a gradual way. You shouldn't fall\n", + "off a cliff if the high-level functionality doesn't exactly match your use case. You\n", + "should be able to gain more control over the small details while retaing a\n", + "commensurate amount of high-level convenience.\n", + "\n", + "When you need to customize what `fit()` does, you should **override the training step\n", + "function of the `Model` class**. This is the function that is called by `fit()` for\n", + "every batch of data. You will then be able to call `fit()` as usual -- and it will be\n", + "running your own learning algorithm.\n", + "\n", + "Note that this pattern does not prevent you from building models with the Functional\n", + "API. You can do this whether you're building `Sequential` models, Functional API\n", + "models, or subclassed models.\n", + "\n", + "Let's see how that works." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Setup\n", + "Requires TensorFlow 2.2 or later." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## A first simple example\n", + "\n", + "Let's start from a simple example:\n", + "\n", + "- We create a new class that subclasses `keras.Model`.\n", + "- We just override the method `train_step(self, data)`.\n", + "- We return a dictionary mapping metric names (including the loss) to their current\n", + "value.\n", + "\n", + "The input argument `data` is what gets passed to fit as training data:\n", + "\n", + "- If you pass Numpy arrays, by calling `fit(x, y, ...)`, then `data` will be the tuple\n", + "`(x, y)`\n", + "- If you pass a `tf.data.Dataset`, by calling `fit(dataset, ...)`, then `data` will be\n", + "what gets yielded by `dataset` at each batch.\n", + "\n", + "In the body of the `train_step` method, we implement a regular training update,\n", + "similar to what you are already familiar with. Importantly, **we compute the loss via\n", + "`self.compiled_loss`**, which wraps the loss(es) function(s) that were passed to\n", + "`compile()`.\n", + "\n", + "Similarly, we call `self.compiled_metrics.update_state(y, y_pred)` to update the state\n", + "of the metrics that were passed in `compile()`, and we query results from\n", + "`self.metrics` at the end to retrieve their current value." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomModel(keras.Model):\n", + " def train_step(self, data):\n", + " # Unpack the data. Its structure depends on your model and\n", + " # on what you pass to `fit()`.\n", + " x, y = data\n", + "\n", + " with tf.GradientTape() as tape:\n", + " y_pred = self(x, training=True) # Forward pass\n", + " # Compute the loss value\n", + " # (the loss function is configured in `compile()`)\n", + " loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)\n", + "\n", + " # Compute gradients\n", + " trainable_vars = self.trainable_variables\n", + " gradients = tape.gradient(loss, trainable_vars)\n", + " # Update weights\n", + " self.optimizer.apply_gradients(zip(gradients, trainable_vars))\n", + " # Update metrics (includes the metric that tracks the loss)\n", + " self.compiled_metrics.update_state(y, y_pred)\n", + " # Return a dict mapping metric names to current value\n", + " return {m.name: m.result() for m in self.metrics}\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's try this out:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "# Construct and compile an instance of CustomModel\n", + "inputs = keras.Input(shape=(32,))\n", + "outputs = keras.layers.Dense(1)(inputs)\n", + "model = CustomModel(inputs, outputs)\n", + "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n", + "\n", + "# Just use `fit` as usual\n", + "x = np.random.random((1000, 32))\n", + "y = np.random.random((1000, 1))\n", + "model.fit(x, y, epochs=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Going lower-level\n", + "\n", + "Naturally, you could just skip passing a loss function in `compile()`, and instead do\n", + "everything *manually* in `train_step`. Likewise for metrics. Here's a lower-level\n", + "example, that only uses `compile()` to configure the optimizer:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "mae_metric = keras.metrics.MeanAbsoluteError(name=\"mae\")\n", + "loss_tracker = keras.metrics.Mean(name=\"loss\")\n", + "\n", + "\n", + "class CustomModel(keras.Model):\n", + " def train_step(self, data):\n", + " x, y = data\n", + "\n", + " with tf.GradientTape() as tape:\n", + " y_pred = self(x, training=True) # Forward pass\n", + " # Compute our own loss\n", + " loss = keras.losses.mean_squared_error(y, y_pred)\n", + "\n", + " # Compute gradients\n", + " trainable_vars = self.trainable_variables\n", + " gradients = tape.gradient(loss, trainable_vars)\n", + "\n", + " # Update weights\n", + " self.optimizer.apply_gradients(zip(gradients, trainable_vars))\n", + "\n", + " # Compute our own metrics\n", + " loss_tracker.update_state(loss)\n", + " mae_metric.update_state(y, y_pred)\n", + " return {\"loss\": loss_tracker.result(), \"mae\": mae_metric.result()}\n", + "\n", + "\n", + "# Construct an instance of CustomModel\n", + "inputs = keras.Input(shape=(32,))\n", + "outputs = keras.layers.Dense(1)(inputs)\n", + "model = CustomModel(inputs, outputs)\n", + "\n", + "# We don't passs a loss or metrics here.\n", + "model.compile(optimizer=\"adam\")\n", + "\n", + "# Just use `fit` as usual -- you can use callbacks, etc.\n", + "x = np.random.random((1000, 32))\n", + "y = np.random.random((1000, 1))\n", + "model.fit(x, y, epochs=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Supporting `sample_weight` & `class_weight`\n", + "\n", + "You may have noticed that our first basic example didn't make any mention of sample\n", + "weighting. If you want to support the `fit()` arguments `sample_weight` and\n", + "`class_weight`, you'd simply do the following:\n", + "\n", + "- Unpack `sample_weight` from the `data` argument\n", + "- Pass it to `compiled_loss` & `compiled_metrics` (of course, you could also just apply\n", + "it manually if you don't rely on `compile()` for losses & metrics)\n", + "- That's it. That's the list." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomModel(keras.Model):\n", + " def train_step(self, data):\n", + " # Unpack the data. Its structure depends on your model and\n", + " # on what you pass to `fit()`.\n", + " if len(data) == 3:\n", + " x, y, sample_weight = data\n", + " else:\n", + " x, y = data\n", + "\n", + " with tf.GradientTape() as tape:\n", + " y_pred = self(x, training=True) # Forward pass\n", + " # Compute the loss value.\n", + " # The loss function is configured in `compile()`.\n", + " loss = self.compiled_loss(\n", + " y,\n", + " y_pred,\n", + " sample_weight=sample_weight,\n", + " regularization_losses=self.losses,\n", + " )\n", + "\n", + " # Compute gradients\n", + " trainable_vars = self.trainable_variables\n", + " gradients = tape.gradient(loss, trainable_vars)\n", + "\n", + " # Update weights\n", + " self.optimizer.apply_gradients(zip(gradients, trainable_vars))\n", + "\n", + " # Update the metrics.\n", + " # Metrics are configured in `compile()`.\n", + " self.compiled_metrics.update_state(y, y_pred, sample_weight=sample_weight)\n", + "\n", + " # Return a dict mapping metric names to current value.\n", + " # Note that it will include the loss (tracked in self.metrics).\n", + " return {m.name: m.result() for m in self.metrics}\n", + "\n", + "\n", + "# Construct and compile an instance of CustomModel\n", + "inputs = keras.Input(shape=(32,))\n", + "outputs = keras.layers.Dense(1)(inputs)\n", + "model = CustomModel(inputs, outputs)\n", + "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\"])\n", + "\n", + "# You can now use sample_weight argument\n", + "x = np.random.random((1000, 32))\n", + "y = np.random.random((1000, 1))\n", + "sw = np.random.random((1000, 1))\n", + "model.fit(x, y, sample_weight=sw, epochs=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Providing your own evaluation step\n", + "\n", + "What if you want to do the same for calls to `model.evaluate()`? Then you would\n", + "override `test_step` in exactly the same way. Here's what it looks like:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomModel(keras.Model):\n", + " def test_step(self, data):\n", + " # Unpack the data\n", + " x, y = data\n", + " # Compute predictions\n", + " y_pred = self(x, training=False)\n", + " # Updates the metrics tracking the loss\n", + " self.compiled_loss(y, y_pred, regularization_losses=self.losses)\n", + " # Update the metrics.\n", + " self.compiled_metrics.update_state(y, y_pred)\n", + " # Return a dict mapping metric names to current value.\n", + " # Note that it will include the loss (tracked in self.metrics).\n", + " return {m.name: m.result() for m in self.metrics}\n", + "\n", + "\n", + "# Construct an instance of CustomModel\n", + "inputs = keras.Input(shape=(32,))\n", + "outputs = keras.layers.Dense(1)(inputs)\n", + "model = CustomModel(inputs, outputs)\n", + "model.compile(loss=\"mse\", metrics=[\"mae\"])\n", + "\n", + "# Evaluate with our custom test_step\n", + "x = np.random.random((1000, 32))\n", + "y = np.random.random((1000, 1))\n", + "model.evaluate(x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Wrapping up: an end-to-end GAN example\n", + "\n", + "Let's walk through an end-to-end example that leverages everything you just learned.\n", + "\n", + "Let's consider:\n", + "\n", + "- A generator network meant to generate 28x28x1 images.\n", + "- A discriminator network meant to classify 28x28x1 images into two classes (\"fake\" and\n", + "\"real\").\n", + "- One optimizer for each.\n", + "- A loss function to train the discriminator.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "from tensorflow.keras import layers\n", + "\n", + "# Create the discriminator\n", + "discriminator = keras.Sequential(\n", + " [\n", + " keras.Input(shape=(28, 28, 1)),\n", + " layers.Conv2D(64, (3, 3), strides=(2, 2), padding=\"same\"),\n", + " layers.LeakyReLU(alpha=0.2),\n", + " layers.Conv2D(128, (3, 3), strides=(2, 2), padding=\"same\"),\n", + " layers.LeakyReLU(alpha=0.2),\n", + " layers.GlobalMaxPooling2D(),\n", + " layers.Dense(1),\n", + " ],\n", + " name=\"discriminator\",\n", + ")\n", + "\n", + "# Create the generator\n", + "latent_dim = 128\n", + "generator = keras.Sequential(\n", + " [\n", + " keras.Input(shape=(latent_dim,)),\n", + " # We want to generate 128 coefficients to reshape into a 7x7x128 map\n", + " layers.Dense(7 * 7 * 128),\n", + " layers.LeakyReLU(alpha=0.2),\n", + " layers.Reshape((7, 7, 128)),\n", + " layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n", + " layers.LeakyReLU(alpha=0.2),\n", + " layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n", + " layers.LeakyReLU(alpha=0.2),\n", + " layers.Conv2D(1, (7, 7), padding=\"same\", activation=\"sigmoid\"),\n", + " ],\n", + " name=\"generator\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here's a feature-complete GAN class, overriding `compile()` to use its own signature,\n", + "and implementing the entire GAN algorithm in 17 lines in `train_step`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class GAN(keras.Model):\n", + " def __init__(self, discriminator, generator, latent_dim):\n", + " super(GAN, self).__init__()\n", + " self.discriminator = discriminator\n", + " self.generator = generator\n", + " self.latent_dim = latent_dim\n", + "\n", + " def compile(self, d_optimizer, g_optimizer, loss_fn):\n", + " super(GAN, self).compile()\n", + " self.d_optimizer = d_optimizer\n", + " self.g_optimizer = g_optimizer\n", + " self.loss_fn = loss_fn\n", + "\n", + " def train_step(self, real_images):\n", + " if isinstance(real_images, tuple):\n", + " real_images = real_images[0]\n", + " # Sample random points in the latent space\n", + " batch_size = tf.shape(real_images)[0]\n", + " random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))\n", + "\n", + " # Decode them to fake images\n", + " generated_images = self.generator(random_latent_vectors)\n", + "\n", + " # Combine them with real images\n", + " combined_images = tf.concat([generated_images, real_images], axis=0)\n", + "\n", + " # Assemble labels discriminating real from fake images\n", + " labels = tf.concat(\n", + " [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0\n", + " )\n", + " # Add random noise to the labels - important trick!\n", + " labels += 0.05 * tf.random.uniform(tf.shape(labels))\n", + "\n", + " # Train the discriminator\n", + " with tf.GradientTape() as tape:\n", + " predictions = self.discriminator(combined_images)\n", + " d_loss = self.loss_fn(labels, predictions)\n", + " grads = tape.gradient(d_loss, self.discriminator.trainable_weights)\n", + " self.d_optimizer.apply_gradients(\n", + " zip(grads, self.discriminator.trainable_weights)\n", + " )\n", + "\n", + " # Sample random points in the latent space\n", + " random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))\n", + "\n", + " # Assemble labels that say \"all real images\"\n", + " misleading_labels = tf.zeros((batch_size, 1))\n", + "\n", + " # Train the generator (note that we should *not* update the weights\n", + " # of the discriminator)!\n", + " with tf.GradientTape() as tape:\n", + " predictions = self.discriminator(self.generator(random_latent_vectors))\n", + " g_loss = self.loss_fn(misleading_labels, predictions)\n", + " grads = tape.gradient(g_loss, self.generator.trainable_weights)\n", + " self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))\n", + " return {\"d_loss\": d_loss, \"g_loss\": g_loss}\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's test-drive it:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Prepare the dataset. We use both the training & test MNIST digits.\n", + "batch_size = 64\n", + "(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()\n", + "all_digits = np.concatenate([x_train, x_test])\n", + "all_digits = all_digits.astype(\"float32\") / 255.0\n", + "all_digits = np.reshape(all_digits, (-1, 28, 28, 1))\n", + "dataset = tf.data.Dataset.from_tensor_slices(all_digits)\n", + "dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)\n", + "\n", + "gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim)\n", + "gan.compile(\n", + " d_optimizer=keras.optimizers.Adam(learning_rate=0.0003),\n", + " g_optimizer=keras.optimizers.Adam(learning_rate=0.0003),\n", + " loss_fn=keras.losses.BinaryCrossentropy(from_logits=True),\n", + ")\n", + "\n", + "# To limit execution time, we only train on 100 batches. You can train on\n", + "# the entire dataset. You will need about 20 epochs to get nice results.\n", + "gan.fit(dataset.take(100), epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "The idea behind deep learning are simple, so why should their implementation be painful?" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "customizing_what_happens_in_fit", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/tf/functional.ipynb b/tf/functional.ipynb new file mode 100644 index 0000000000..93f6976dc7 --- /dev/null +++ b/tf/functional.ipynb @@ -0,0 +1,1420 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "cellView": "form", + "colab": {}, + "colab_type": "code" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# The Functional API" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "from tensorflow.keras import layers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Introduction\n", + "\n", + "The Keras *functional API* is a way to create models that is more flexible\n", + "than the `tf.keras.Sequential` API. The functional API can handle models\n", + "with non-linear topology, models with shared layers, and models\n", + "with multiple inputs or outputs.\n", + "\n", + "The main idea that a deep learning model is usually\n", + "a directed acyclic graph (DAG) of layers.\n", + "So the functional API is a way to build *graphs of layers*.\n", + "\n", + "Consider the following model:\n", + "\n", + "```\n", + "(input: 784-dimensional vectors)\n", + " \u21a7\n", + "[Dense (64 units, relu activation)]\n", + " \u21a7\n", + "[Dense (64 units, relu activation)]\n", + " \u21a7\n", + "[Dense (10 units, softmax activation)]\n", + " \u21a7\n", + "(output: logits of a probability distribution over 10 classes)\n", + "```\n", + "\n", + "This is a basic graph with three layers.\n", + "To build this model using the functional API, start by creating an input node:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(784,))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "The shape of the data is set as a 784-dimensional vector.\n", + "The batch size is always omitted since only the shape of each sample is specified.\n", + "\n", + "If, for example, you have an image input with a shape of `(32, 32, 3)`,\n", + "you would use:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Just for demonstration purposes.\n", + "img_inputs = keras.Input(shape=(32, 32, 3))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "The `inputs` that is returned contains information about the shape and `dtype`\n", + "of the input data that you feed to your model.\n", + "Here's the shape:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here's the dtype:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs.dtype" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "You create a new node in the graph of layers by calling a layer on this `inputs`\n", + "object:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "dense = layers.Dense(64, activation=\"relu\")\n", + "x = dense(inputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "The \"layer call\" action is like drawing an arrow from \"inputs\" to this layer\n", + "you created.\n", + "You're \"passing\" the inputs to the `dense` layer, and out you get `x`.\n", + "\n", + "Let's add a few more layers to the graph of layers:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "x = layers.Dense(64, activation=\"relu\")(x)\n", + "outputs = layers.Dense(10)(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "At this point, you can create a `Model` by specifying its inputs and outputs\n", + "in the graph of layers:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Model(inputs=inputs, outputs=outputs, name=\"mnist_model\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's check out what the model summary looks like:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "You can also plot the model as a graph:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "keras.utils.plot_model(model, \"my_first_model.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "And, optionally, display the input and output shapes of each layer\n", + "in the plotted graph:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "keras.utils.plot_model(model, \"my_first_model_with_shape_info.png\", show_shapes=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "This figure and the code are almost identical. In the code version,\n", + "the connection arrows are replaced by the call operation.\n", + "\n", + "A \"graph of layers\" is an intuitive mental image for a deep learning model,\n", + "and the functional API is a way to create models that closely mirror this." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Training, evaluation, and inference\n", + "\n", + "Training, evaluation, and inference work exactly in the same way for models\n", + "built using the functional API as for `Sequential` models.\n", + "\n", + "Here, load the MNIST image data, reshape it into vectors,\n", + "fit the model on the data (while monitoring performance on a validation split),\n", + "then evaluate the model on the test data:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", + "\n", + "x_train = x_train.reshape(60000, 784).astype(\"float32\") / 255\n", + "x_test = x_test.reshape(10000, 784).astype(\"float32\") / 255\n", + "\n", + "model.compile(\n", + " loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " optimizer=keras.optimizers.RMSprop(),\n", + " metrics=[\"accuracy\"],\n", + ")\n", + "\n", + "history = model.fit(x_train, y_train, batch_size=64, epochs=2, validation_split=0.2)\n", + "\n", + "test_scores = model.evaluate(x_test, y_test, verbose=2)\n", + "print(\"Test loss:\", test_scores[0])\n", + "print(\"Test accuracy:\", test_scores[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "For further reading, see the [training and evaluation](https://www.tensorflow.org/guide/keras/train_and_evaluate/) guide." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Save and serialize\n", + "\n", + "Saving the model and serialization work the same way for models built using\n", + "the functional API as they do for `Sequential` models. To standard way\n", + "to save a functional model is to call `model.save()`\n", + "to save the entire model as a single file. You can later recreate the same model\n", + "from this file, even if the code that built the model is no longer available.\n", + "\n", + "This saved file includes the:\n", + "- model architecture\n", + "- model weight values (that were learned during training)\n", + "- model training config, if any (as passed to `compile`)\n", + "- optimizer and its state, if any (to restart training where you left off)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.save(\"path_to_my_model\")\n", + "del model\n", + "# Recreate the exact same model purely from the file:\n", + "model = keras.models.load_model(\"path_to_my_model\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "For details, read the model [serialization & saving](\n", + "https://www.tensorflow.org/guide/keras/save_and_serialize/) guide." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Use the same graph of layers to define multiple models\n", + "\n", + "In the functional API, models are created by specifying their inputs\n", + "and outputs in a graph of layers. That means that a single\n", + "graph of layers can be used to generate multiple models.\n", + "\n", + "In the example below, you use the same stack of layers to instantiate two models:\n", + "an `encoder` model that turns image inputs into 16-dimensional vectors,\n", + "and an end-to-end `autoencoder` model for training." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "encoder_input = keras.Input(shape=(28, 28, 1), name=\"img\")\n", + "x = layers.Conv2D(16, 3, activation=\"relu\")(encoder_input)\n", + "x = layers.Conv2D(32, 3, activation=\"relu\")(x)\n", + "x = layers.MaxPooling2D(3)(x)\n", + "x = layers.Conv2D(32, 3, activation=\"relu\")(x)\n", + "x = layers.Conv2D(16, 3, activation=\"relu\")(x)\n", + "encoder_output = layers.GlobalMaxPooling2D()(x)\n", + "\n", + "encoder = keras.Model(encoder_input, encoder_output, name=\"encoder\")\n", + "encoder.summary()\n", + "\n", + "x = layers.Reshape((4, 4, 1))(encoder_output)\n", + "x = layers.Conv2DTranspose(16, 3, activation=\"relu\")(x)\n", + "x = layers.Conv2DTranspose(32, 3, activation=\"relu\")(x)\n", + "x = layers.UpSampling2D(3)(x)\n", + "x = layers.Conv2DTranspose(16, 3, activation=\"relu\")(x)\n", + "decoder_output = layers.Conv2DTranspose(1, 3, activation=\"relu\")(x)\n", + "\n", + "autoencoder = keras.Model(encoder_input, decoder_output, name=\"autoencoder\")\n", + "autoencoder.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here, the decoding architecture is strictly symmetrical\n", + "to the encoding architecture, so the output shape is the same as\n", + "the input shape `(28, 28, 1)`.\n", + "\n", + "The reverse of a `Conv2D` layer is a `Conv2DTranspose` layer,\n", + "and the reverse of a `MaxPooling2D` layer is an `UpSampling2D` layer." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## All models are callable, just like layers\n", + "\n", + "You can treat any model as if it were a layer by invoking it on an `Input` or\n", + "on the output of another layer. By calling a model you aren't just reusing\n", + "the architecture of the model, you're also reusing its weights.\n", + "\n", + "To see this in action, here's a different take on the autoencoder example that\n", + "creates an encoder model, a decoder model, and chain them in two calls\n", + "to obtain the autoencoder model:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "encoder_input = keras.Input(shape=(28, 28, 1), name=\"original_img\")\n", + "x = layers.Conv2D(16, 3, activation=\"relu\")(encoder_input)\n", + "x = layers.Conv2D(32, 3, activation=\"relu\")(x)\n", + "x = layers.MaxPooling2D(3)(x)\n", + "x = layers.Conv2D(32, 3, activation=\"relu\")(x)\n", + "x = layers.Conv2D(16, 3, activation=\"relu\")(x)\n", + "encoder_output = layers.GlobalMaxPooling2D()(x)\n", + "\n", + "encoder = keras.Model(encoder_input, encoder_output, name=\"encoder\")\n", + "encoder.summary()\n", + "\n", + "decoder_input = keras.Input(shape=(16,), name=\"encoded_img\")\n", + "x = layers.Reshape((4, 4, 1))(decoder_input)\n", + "x = layers.Conv2DTranspose(16, 3, activation=\"relu\")(x)\n", + "x = layers.Conv2DTranspose(32, 3, activation=\"relu\")(x)\n", + "x = layers.UpSampling2D(3)(x)\n", + "x = layers.Conv2DTranspose(16, 3, activation=\"relu\")(x)\n", + "decoder_output = layers.Conv2DTranspose(1, 3, activation=\"relu\")(x)\n", + "\n", + "decoder = keras.Model(decoder_input, decoder_output, name=\"decoder\")\n", + "decoder.summary()\n", + "\n", + "autoencoder_input = keras.Input(shape=(28, 28, 1), name=\"img\")\n", + "encoded_img = encoder(autoencoder_input)\n", + "decoded_img = decoder(encoded_img)\n", + "autoencoder = keras.Model(autoencoder_input, decoded_img, name=\"autoencoder\")\n", + "autoencoder.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "As you can see, the model can be nested: a model can contain sub-models\n", + "(since a model is just like a layer).\n", + "A common use case for model nesting is *ensembling*.\n", + "For example, here's how to ensemble a set of models into a single model\n", + "that averages their predictions:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "def get_model():\n", + " inputs = keras.Input(shape=(128,))\n", + " outputs = layers.Dense(1)(inputs)\n", + " return keras.Model(inputs, outputs)\n", + "\n", + "\n", + "model1 = get_model()\n", + "model2 = get_model()\n", + "model3 = get_model()\n", + "\n", + "inputs = keras.Input(shape=(128,))\n", + "y1 = model1(inputs)\n", + "y2 = model2(inputs)\n", + "y3 = model3(inputs)\n", + "outputs = layers.average([y1, y2, y3])\n", + "ensemble_model = keras.Model(inputs=inputs, outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Manipulate complex graph topologies\n", + "\n", + "### Models with multiple inputs and outputs\n", + "\n", + "The functional API makes it easy to manipulate multiple inputs and outputs.\n", + "This cannot be handled with the `Sequential` API.\n", + "\n", + "For example, if you're building a system for ranking custom issue tickets by\n", + "priority and routing them to the correct department,\n", + "then the model will have three inputs:\n", + "\n", + "- the title of the ticket (text input),\n", + "- the text body of the ticket (text input), and\n", + "- any tags added by the user (categorical input)\n", + "\n", + "This model will have two outputs:\n", + "\n", + "- the priority score between 0 and 1 (scalar sigmoid output), and\n", + "- the department that should handle the ticket (softmax output\n", + "over the set of departments).\n", + "\n", + "You can build this model in a few lines with the functional API:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "num_tags = 12 # Number of unique issue tags\n", + "num_words = 10000 # Size of vocabulary obtained when preprocessing text data\n", + "num_departments = 4 # Number of departments for predictions\n", + "\n", + "title_input = keras.Input(\n", + " shape=(None,), name=\"title\"\n", + ") # Variable-length sequence of ints\n", + "body_input = keras.Input(shape=(None,), name=\"body\") # Variable-length sequence of ints\n", + "tags_input = keras.Input(\n", + " shape=(num_tags,), name=\"tags\"\n", + ") # Binary vectors of size `num_tags`\n", + "\n", + "# Embed each word in the title into a 64-dimensional vector\n", + "title_features = layers.Embedding(num_words, 64)(title_input)\n", + "# Embed each word in the text into a 64-dimensional vector\n", + "body_features = layers.Embedding(num_words, 64)(body_input)\n", + "\n", + "# Reduce sequence of embedded words in the title into a single 128-dimensional vector\n", + "title_features = layers.LSTM(128)(title_features)\n", + "# Reduce sequence of embedded words in the body into a single 32-dimensional vector\n", + "body_features = layers.LSTM(32)(body_features)\n", + "\n", + "# Merge all available features into a single large vector via concatenation\n", + "x = layers.concatenate([title_features, body_features, tags_input])\n", + "\n", + "# Stick a logistic regression for priority prediction on top of the features\n", + "priority_pred = layers.Dense(1, name=\"priority\")(x)\n", + "# Stick a department classifier on top of the features\n", + "department_pred = layers.Dense(num_departments, name=\"department\")(x)\n", + "\n", + "# Instantiate an end-to-end model predicting both priority and department\n", + "model = keras.Model(\n", + " inputs=[title_input, body_input, tags_input],\n", + " outputs=[priority_pred, department_pred],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Now plot the model:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "keras.utils.plot_model(model, \"multi_input_and_output_model.png\", show_shapes=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "When compiling this model, you can assign different losses to each output.\n", + "You can even assign different weights to each loss -- to modulate\n", + "their contribution to the total training loss." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(1e-3),\n", + " loss=[\n", + " keras.losses.BinaryCrossentropy(from_logits=True),\n", + " keras.losses.CategoricalCrossentropy(from_logits=True),\n", + " ],\n", + " loss_weights=[1.0, 0.2],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Since the output layers have different names, you could also specify\n", + "the loss like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(1e-3),\n", + " loss={\n", + " \"priority\": keras.losses.BinaryCrossentropy(from_logits=True),\n", + " \"department\": keras.losses.CategoricalCrossentropy(from_logits=True),\n", + " },\n", + " loss_weights=[1.0, 0.2],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Train the model by passing lists of NumPy arrays of inputs and targets:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Dummy input data\n", + "title_data = np.random.randint(num_words, size=(1280, 10))\n", + "body_data = np.random.randint(num_words, size=(1280, 100))\n", + "tags_data = np.random.randint(2, size=(1280, num_tags)).astype(\"float32\")\n", + "\n", + "# Dummy target data\n", + "priority_targets = np.random.random(size=(1280, 1))\n", + "dept_targets = np.random.randint(2, size=(1280, num_departments))\n", + "\n", + "model.fit(\n", + " {\"title\": title_data, \"body\": body_data, \"tags\": tags_data},\n", + " {\"priority\": priority_targets, \"department\": dept_targets},\n", + " epochs=2,\n", + " batch_size=32,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "When calling fit with a `Dataset` object, it should yield either a\n", + "tuple of lists like `([title_data, body_data, tags_data], [priority_targets, dept_targets])`\n", + "or a tuple of dictionaries like\n", + "`({'title': title_data, 'body': body_data, 'tags': tags_data}, {'priority': priority_targets, 'department': dept_targets})`.\n", + "\n", + "For more detailed explanation, refer to the [training and evaluation](https://www.tensorflow.org/guide/keras/train_and_evaluate/) guide." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### A toy ResNet model\n", + "\n", + "In addition to models with multiple inputs and outputs,\n", + "the functional API makes it easy to manipulate non-linear connectivity\n", + "topologies -- these are models with layers that are not connected sequentially.\n", + "Something the `Sequential` API can not handle.\n", + "\n", + "A common use case for this is residual connections.\n", + "Let's build a toy ResNet model for CIFAR10 to demonstrate this:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(32, 32, 3), name=\"img\")\n", + "x = layers.Conv2D(32, 3, activation=\"relu\")(inputs)\n", + "x = layers.Conv2D(64, 3, activation=\"relu\")(x)\n", + "block_1_output = layers.MaxPooling2D(3)(x)\n", + "\n", + "x = layers.Conv2D(64, 3, activation=\"relu\", padding=\"same\")(block_1_output)\n", + "x = layers.Conv2D(64, 3, activation=\"relu\", padding=\"same\")(x)\n", + "block_2_output = layers.add([x, block_1_output])\n", + "\n", + "x = layers.Conv2D(64, 3, activation=\"relu\", padding=\"same\")(block_2_output)\n", + "x = layers.Conv2D(64, 3, activation=\"relu\", padding=\"same\")(x)\n", + "block_3_output = layers.add([x, block_2_output])\n", + "\n", + "x = layers.Conv2D(64, 3, activation=\"relu\")(block_3_output)\n", + "x = layers.GlobalAveragePooling2D()(x)\n", + "x = layers.Dense(256, activation=\"relu\")(x)\n", + "x = layers.Dropout(0.5)(x)\n", + "outputs = layers.Dense(10)(x)\n", + "\n", + "model = keras.Model(inputs, outputs, name=\"toy_resnet\")\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Plot the model:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "keras.utils.plot_model(model, \"mini_resnet.png\", show_shapes=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Now train the model:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()\n", + "\n", + "x_train = x_train.astype(\"float32\") / 255.0\n", + "x_test = x_test.astype(\"float32\") / 255.0\n", + "y_train = keras.utils.to_categorical(y_train, 10)\n", + "y_test = keras.utils.to_categorical(y_test, 10)\n", + "\n", + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(1e-3),\n", + " loss=keras.losses.CategoricalCrossentropy(from_logits=True),\n", + " metrics=[\"acc\"],\n", + ")\n", + "# We restrict the data to the first 1000 samples so as to limit execution time\n", + "# on Colab. Try to train on the entire dataset until convergence!\n", + "model.fit(x_train[:1000], y_train[:1000], batch_size=64, epochs=1, validation_split=0.2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Shared layers\n", + "\n", + "Another good use for the functional API are for models that use *shared layers*.\n", + "Shared layers are layer instances that are reused multiple times in a same model --\n", + "they learn features that correspond to multiple paths in the graph-of-layers.\n", + "\n", + "Shared layers are often used to encode inputs from similar spaces\n", + "(say, two different pieces of text that feature similar vocabulary).\n", + "They enable sharing of information across these different inputs,\n", + "and they make it possible to train such a model on less data.\n", + "If a given word is seen in one of the inputs,\n", + "that will benefit the processing of all inputs that pass through the shared layer.\n", + "\n", + "To share a layer in the functional API, call the same layer instance multiple times.\n", + "For instance, here's an `Embedding` layer shared across two different text inputs:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Embedding for 1000 unique words mapped to 128-dimensional vectors\n", + "shared_embedding = layers.Embedding(1000, 128)\n", + "\n", + "# Variable-length sequence of integers\n", + "text_input_a = keras.Input(shape=(None,), dtype=\"int32\")\n", + "\n", + "# Variable-length sequence of integers\n", + "text_input_b = keras.Input(shape=(None,), dtype=\"int32\")\n", + "\n", + "# Reuse the same layer to encode both inputs\n", + "encoded_input_a = shared_embedding(text_input_a)\n", + "encoded_input_b = shared_embedding(text_input_b)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Extract and reuse nodes in the graph of layers\n", + "\n", + "Because the graph of layers you are manipulating is a static data structure,\n", + "it can be accessed and inspected. And this is how you are able to plot\n", + "functional models as images.\n", + "\n", + "This also means that you can access the activations of intermediate layers\n", + "(\"nodes\" in the graph) and reuse them elsewhere --\n", + "which is very useful for something like feature extraction.\n", + "\n", + "Let's look at an example. This is a VGG19 model with weights pretrained on ImageNet:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "vgg19 = tf.keras.applications.VGG19()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "And these are the intermediate activations of the model,\n", + "obtained by querying the graph data structure:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "features_list = [layer.output for layer in vgg19.layers]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Use these features to create a new feature-extraction model that returns\n", + "the values of the intermediate layer activations:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "feat_extraction_model = keras.Model(inputs=vgg19.input, outputs=features_list)\n", + "\n", + "img = np.random.random((1, 224, 224, 3)).astype(\"float32\")\n", + "extracted_features = feat_extraction_model(img)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "This comes in handy for tasks like\n", + "[neural style transfer](https://www.tensorflow.org/tutorials/generative/style_transfer),\n", + "among other things." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Extend the API using custom layers\n", + "\n", + "`tf.keras` includes a wide range of built-in layers, for example:\n", + "\n", + "- Convolutional layers: `Conv1D`, `Conv2D`, `Conv3D`, `Conv2DTranspose`\n", + "- Pooling layers: `MaxPooling1D`, `MaxPooling2D`, `MaxPooling3D`, `AveragePooling1D`\n", + "- RNN layers: `GRU`, `LSTM`, `ConvLSTM2D`\n", + "- `BatchNormalization`, `Dropout`, `Embedding`, etc.\n", + "\n", + "But if you don't find what you need, it's easy to extend the API by creating\n", + "your own layers. All layers subclass the `Layer` class and implement:\n", + "\n", + "- `call` method, that specifies the computation done by the layer.\n", + "- `build` method, that creates the weights of the layer (this is just a style\n", + "convention since you can create weights in `__init__`, as well).\n", + "\n", + "To learn more about creating layers from scratch, read\n", + "[custom layers and models](https://www.tensorflow.org/guide/keras/custom_layers_and_models) guide.\n", + "\n", + "The following is a basic implementation of `tf.keras.layers.Dense`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomDense(layers.Layer):\n", + " def __init__(self, units=32):\n", + " super(CustomDense, self).__init__()\n", + " self.units = units\n", + "\n", + " def build(self, input_shape):\n", + " self.w = self.add_weight(\n", + " shape=(input_shape[-1], self.units),\n", + " initializer=\"random_normal\",\n", + " trainable=True,\n", + " )\n", + " self.b = self.add_weight(\n", + " shape=(self.units,), initializer=\"random_normal\", trainable=True\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " return tf.matmul(inputs, self.w) + self.b\n", + "\n", + "\n", + "inputs = keras.Input((4,))\n", + "outputs = CustomDense(10)(inputs)\n", + "\n", + "model = keras.Model(inputs, outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "For serialization support in your custom layer, define a `get_config`\n", + "method that returns the constructor arguments of the layer instance:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomDense(layers.Layer):\n", + " def __init__(self, units=32):\n", + " super(CustomDense, self).__init__()\n", + " self.units = units\n", + "\n", + " def build(self, input_shape):\n", + " self.w = self.add_weight(\n", + " shape=(input_shape[-1], self.units),\n", + " initializer=\"random_normal\",\n", + " trainable=True,\n", + " )\n", + " self.b = self.add_weight(\n", + " shape=(self.units,), initializer=\"random_normal\", trainable=True\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " return tf.matmul(inputs, self.w) + self.b\n", + "\n", + " def get_config(self):\n", + " return {\"units\": self.units}\n", + "\n", + "\n", + "inputs = keras.Input((4,))\n", + "outputs = CustomDense(10)(inputs)\n", + "\n", + "model = keras.Model(inputs, outputs)\n", + "config = model.get_config()\n", + "\n", + "new_model = keras.Model.from_config(config, custom_objects={\"CustomDense\": CustomDense})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Optionally, implement the classmethod `from_config(cls, config)` which is used\n", + "when recreating a layer instance given its config dictionary.\n", + "The default implementation of `from_config` is:\n", + "\n", + "```python\n", + "def from_config(cls, config):\n", + " return cls(**config)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## When to use the functional API\n", + "\n", + "When should you use the Keras functional API to create a new model,\n", + "or just subclass the `Model` class directly? In general, the functional API\n", + "is higher-level, easier and safer, and has a number of\n", + "features that subclassed models do not support.\n", + "\n", + "However, model subclassing provides greater flexibility when building models\n", + "that are not easily expressible as directed acyclic graphs of layers.\n", + "For example, you could not implement a Tree-RNN with the functional API\n", + "and would have to subclass `Model` directly.\n", + "\n", + "For in-depth look at the differences between the functional API and\n", + "model subclassing, read\n", + "[What are Symbolic and Imperative APIs in TensorFlow 2.0?](https://blog.tensorflow.org/2019/01/what-are-symbolic-and-imperative-apis.html).\n", + "\n", + "### Functional API strengths:\n", + "\n", + "The following properties are also true for Sequential models\n", + "(which are also data structures), but are not true for subclassed models\n", + "(which are Python bytecode, not data structures).\n", + "\n", + "#### Less verbose\n", + "\n", + "There is no `super(MyClass, self).__init__(...)`, no `def call(self, ...):`, etc.\n", + "\n", + "Compare:\n", + "\n", + "```python\n", + "inputs = keras.Input(shape=(32,))\n", + "x = layers.Dense(64, activation='relu')(inputs)\n", + "outputs = layers.Dense(10)(x)\n", + "mlp = keras.Model(inputs, outputs)\n", + "```\n", + "\n", + "With the subclassed version:\n", + "\n", + "```python\n", + "class MLP(keras.Model):\n", + "\n", + " def __init__(self, **kwargs):\n", + " super(MLP, self).__init__(**kwargs)\n", + " self.dense_1 = layers.Dense(64, activation='relu')\n", + " self.dense_2 = layers.Dense(10)\n", + "\n", + " def call(self, inputs):\n", + " x = self.dense_1(inputs)\n", + " return self.dense_2(x)\n", + "\n", + "# Instantiate the model.\n", + "mlp = MLP()\n", + "# Necessary to create the model's state.\n", + "# The model doesn't have a state until it's called at least once.\n", + "_ = mlp(tf.zeros((1, 32)))\n", + "```\n", + "\n", + "#### Model validation while defining its connectivity graph\n", + "\n", + "In the functional API, the input specification (shape and dtype) is created\n", + "in advance (using `Input`). Every time you call a layer,\n", + "the layer checks that the specification passed to it matches its assumptions,\n", + "and it will raise a helpful error message if not.\n", + "\n", + "This guarantees that any model you can build with the functional API will run.\n", + "All debugging -- other than convergence-related debugging --\n", + "happens statically during the model construction and not at execution time.\n", + "This is similar to type checking in a compiler.\n", + "\n", + "#### A functional model is plottable and inspectable\n", + "\n", + "You can plot the model as a graph, and you can easily access intermediate nodes\n", + "in this graph. For example, to extract and reuse the activations of intermediate\n", + "layers (as seen in a previous example):\n", + "\n", + "```python\n", + "features_list = [layer.output for layer in vgg19.layers]\n", + "feat_extraction_model = keras.Model(inputs=vgg19.input, outputs=features_list)\n", + "```\n", + "\n", + "#### A functional model can be serialized or cloned\n", + "\n", + "Because a functional model is a data structure rather than a piece of code,\n", + "it is safely serializable and can be saved as a single file\n", + "that allows you to recreate the exact same model\n", + "without having access to any of the original code.\n", + "See the [serialization & saving guide](https://www.tensorflow.org/guide/keras/save_and_serialize/).\n", + "\n", + "To serialize a subclassed model, it is necessary for the implementer\n", + "to specify a `get_config()`\n", + "and `from_config()` method at the model level.\n", + "\n", + "\n", + "### Functional API weakness:\n", + "\n", + "#### It does not support dynamic architectures\n", + "\n", + "The functional API treats models as DAGs of layers.\n", + "This is true for most deep learning architectures, but not all -- for example,\n", + "recursive networks or Tree RNNs do not follow this assumption and cannot\n", + "be implemented in the functional API." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Mix-and-match API styles\n", + "\n", + "Choosing between the functional API or Model subclassing isn't a\n", + "binary decision that restricts you into one category of models.\n", + "All models in the `tf.keras` API can interact with each other, whether they're\n", + "`Sequential` models, functional models, or subclassed models that are written\n", + "from scratch.\n", + "\n", + "You can always use a functional model or `Sequential` model\n", + "as part of a subclassed model or layer:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "units = 32\n", + "timesteps = 10\n", + "input_dim = 5\n", + "\n", + "# Define a Functional model\n", + "inputs = keras.Input((None, units))\n", + "x = layers.GlobalAveragePooling1D()(inputs)\n", + "outputs = layers.Dense(1)(x)\n", + "model = keras.Model(inputs, outputs)\n", + "\n", + "\n", + "class CustomRNN(layers.Layer):\n", + " def __init__(self):\n", + " super(CustomRNN, self).__init__()\n", + " self.units = units\n", + " self.projection_1 = layers.Dense(units=units, activation=\"tanh\")\n", + " self.projection_2 = layers.Dense(units=units, activation=\"tanh\")\n", + " # Our previously-defined Functional model\n", + " self.classifier = model\n", + "\n", + " def call(self, inputs):\n", + " outputs = []\n", + " state = tf.zeros(shape=(inputs.shape[0], self.units))\n", + " for t in range(inputs.shape[1]):\n", + " x = inputs[:, t, :]\n", + " h = self.projection_1(x)\n", + " y = h + self.projection_2(state)\n", + " state = y\n", + " outputs.append(y)\n", + " features = tf.stack(outputs, axis=1)\n", + " print(features.shape)\n", + " return self.classifier(features)\n", + "\n", + "\n", + "rnn_model = CustomRNN()\n", + "_ = rnn_model(tf.zeros((1, timesteps, input_dim)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "You can use any subclassed layer or model in the functional API\n", + "as long as it implements a `call` method that follows one of the following patterns:\n", + "\n", + "- `call(self, inputs, **kwargs)` --\n", + "Where `inputs` is a tensor or a nested structure of tensors (e.g. a list of tensors),\n", + "and where `**kwargs` are non-tensor arguments (non-inputs).\n", + "- `call(self, inputs, training=None, **kwargs)` --\n", + "Where `training` is a boolean indicating whether the layer should behave\n", + "in training mode and inference mode.\n", + "- `call(self, inputs, mask=None, **kwargs)` --\n", + "Where `mask` is a boolean mask tensor (useful for RNNs, for instance).\n", + "- `call(self, inputs, training=None, mask=None, **kwargs)` --\n", + "Of course, you can have both masking and training-specific behavior at the same time.\n", + "\n", + "Additionally, if you implement the `get_config` method on your custom Layer or model,\n", + "the functional models you create will still be serializable and cloneable.\n", + "\n", + "Here's a quick example of a custom RNN, written from scratch,\n", + "being used in a functional model:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "units = 32\n", + "timesteps = 10\n", + "input_dim = 5\n", + "batch_size = 16\n", + "\n", + "\n", + "class CustomRNN(layers.Layer):\n", + " def __init__(self):\n", + " super(CustomRNN, self).__init__()\n", + " self.units = units\n", + " self.projection_1 = layers.Dense(units=units, activation=\"tanh\")\n", + " self.projection_2 = layers.Dense(units=units, activation=\"tanh\")\n", + " self.classifier = layers.Dense(1)\n", + "\n", + " def call(self, inputs):\n", + " outputs = []\n", + " state = tf.zeros(shape=(inputs.shape[0], self.units))\n", + " for t in range(inputs.shape[1]):\n", + " x = inputs[:, t, :]\n", + " h = self.projection_1(x)\n", + " y = h + self.projection_2(state)\n", + " state = y\n", + " outputs.append(y)\n", + " features = tf.stack(outputs, axis=1)\n", + " return self.classifier(features)\n", + "\n", + "\n", + "# Note that you specify a static batch size for the inputs with the `batch_shape`\n", + "# arg, because the inner computation of `CustomRNN` requires a static batch size\n", + "# (when you create the `state` zeros tensor).\n", + "inputs = keras.Input(batch_shape=(batch_size, timesteps, input_dim))\n", + "x = layers.Conv1D(32, 3)(inputs)\n", + "outputs = CustomRNN()(x)\n", + "\n", + "model = keras.Model(inputs, outputs)\n", + "\n", + "rnn_model = CustomRNN()\n", + "_ = rnn_model(tf.zeros((1, 10, 5)))" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "functional", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/tf/masking_and_padding.ipynb b/tf/masking_and_padding.ipynb new file mode 100644 index 0000000000..f9838f975b --- /dev/null +++ b/tf/masking_and_padding.ipynb @@ -0,0 +1,618 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "cellView": "form", + "colab": {}, + "colab_type": "code" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# Masking and padding with Keras" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "from tensorflow.keras import layers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Introduction\n", + "\n", + "**Masking** is a way to tell sequence-processing layers that certain timesteps\n", + "in an input are missing, and thus should be skipped when processing the data.\n", + "\n", + "**Padding** is a special form of masking were the masked steps are at the start or at\n", + "the beginning of a sequence. Padding comes from the need to encode sequence data into\n", + "contiguous batches: in order to make all sequences in a batch fit a given standard\n", + "length, it is necessary to pad or truncate some sequences.\n", + "\n", + "Let's take a close look." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Padding sequence data\n", + "\n", + "When processing sequence data, it is very common for individual samples to have\n", + "different lengths. Consider the following example (text tokenized as words):\n", + "\n", + "```\n", + "[\n", + " [\"Hello\", \"world\", \"!\"],\n", + " [\"How\", \"are\", \"you\", \"doing\", \"today\"],\n", + " [\"The\", \"weather\", \"will\", \"be\", \"nice\", \"tomorrow\"],\n", + "]\n", + "```\n", + "\n", + "After vocabulary lookup, the data might be vectorized as integers, e.g.:\n", + "\n", + "```\n", + "[\n", + " [71, 1331, 4231]\n", + " [73, 8, 3215, 55, 927],\n", + " [83, 91, 1, 645, 1253, 927],\n", + "]\n", + "```\n", + "\n", + "The data is a nested list where individual samples have length 3, 5, and 6,\n", + "respectively. Since the input data for a deep learning model must be a single tensor\n", + "(of shape e.g. `(batch_size, 6, vocab_size)` in this case), samples that are shorter\n", + "than the longest item need to be padded with some placeholder value (alternatively,\n", + "one might also truncate long samples before padding short samples).\n", + "\n", + "Keras provides a utility function to truncate and pad Python lists to a common length:\n", + "`tf.keras.preprocessing.sequence.pad_sequences`." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "raw_inputs = [\n", + " [711, 632, 71],\n", + " [73, 8, 3215, 55, 927],\n", + " [83, 91, 1, 645, 1253, 927],\n", + "]\n", + "\n", + "# By default, this will pad using 0s; it is configurable via the\n", + "# \"value\" parameter.\n", + "# Note that you could \"pre\" padding (at the beginning) or\n", + "# \"post\" padding (at the end).\n", + "# We recommend using \"post\" padding when working with RNN layers\n", + "# (in order to be able to use the\n", + "# CuDNN implementation of the layers).\n", + "padded_inputs = tf.keras.preprocessing.sequence.pad_sequences(\n", + " raw_inputs, padding=\"post\"\n", + ")\n", + "print(padded_inputs)\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Masking\n", + "\n", + "Now that all samples have a uniform length, the model must be informed that some part\n", + "of the data is actually padding and should be ignored. That mechanism is **masking**.\n", + "\n", + "There are three ways to introduce input masks in Keras models:\n", + "\n", + "- Add a `keras.layers.Masking` layer.\n", + "- Configure a `keras.layers.Embedding` layer with `mask_zero=True`.\n", + "- Pass a `mask` argument manually when calling layers that support this argument (e.g.\n", + "RNN layers)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Mask-generating layers: `Embedding` and `Masking`\n", + "\n", + "Under the hood, these layers will create a mask tensor (2D tensor with shape `(batch,\n", + "sequence_length)`), and attach it to the tensor output returned by the `Masking` or\n", + "`Embedding` layer." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "embedding = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)\n", + "masked_output = embedding(padded_inputs)\n", + "\n", + "print(masked_output._keras_mask)\n", + "\n", + "masking_layer = layers.Masking()\n", + "# Simulate the embedding lookup by expanding the 2D input to 3D,\n", + "# with embedding dimension of 10.\n", + "unmasked_embedding = tf.cast(\n", + " tf.tile(tf.expand_dims(padded_inputs, axis=-1), [1, 1, 10]), tf.float32\n", + ")\n", + "\n", + "masked_embedding = masking_layer(unmasked_embedding)\n", + "print(masked_embedding._keras_mask)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "As you can see from the printed result, the mask is a 2D boolean tensor with shape\n", + "`(batch_size, sequence_length)`, where each individual `False` entry indicates that\n", + "the corresponding timestep should be ignored during processing." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Mask propagation in the Functional API and Sequential API\n", + "\n", + "When using the Functional API or the Sequential API, a mask generated by an `Embedding`\n", + "or `Masking` layer will be propagated through the network for any layer that is\n", + "capable of using them (for example, RNN layers). Keras will automatically fetch the\n", + "mask corresponding to an input and pass it to any layer that knows how to use it.\n", + "\n", + "For instance, in the following Sequential model, the `LSTM` layer will automatically\n", + "receive a mask, which means it will ignore padded values:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential(\n", + " [layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True), layers.LSTM(32),]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "This is also the case for the following Functional API model:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(None,), dtype=\"int32\")\n", + "x = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)(inputs)\n", + "outputs = layers.LSTM(32)(x)\n", + "\n", + "model = keras.Model(inputs, outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Passing mask tensors directly to layers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Layers that can handle masks (such as the `LSTM` layer) have a `mask` argument in their\n", + "`__call__` method.\n", + "\n", + "Meanwhile, layers that produce a mask (e.g. `Embedding`) expose a `compute_mask(input,\n", + "previous_mask)` method which you can call.\n", + "\n", + "Thus, you can pass the output of the `compute_mask()` method of a mask-producing layer\n", + "to the `__call__` method of a mask-consuming layer, like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class MyLayer(layers.Layer):\n", + " def __init__(self, **kwargs):\n", + " super(MyLayer, self).__init__(**kwargs)\n", + " self.embedding = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)\n", + " self.lstm = layers.LSTM(32)\n", + "\n", + " def call(self, inputs):\n", + " x = self.embedding(inputs)\n", + " # Note that you could also prepare a `mask` tensor manually.\n", + " # It only needs to be a boolean tensor\n", + " # with the right shape, i.e. (batch_size, timesteps).\n", + " mask = self.embedding.compute_mask(inputs)\n", + " output = self.lstm(x, mask=mask) # The layer will ignore the masked values\n", + " return output\n", + "\n", + "\n", + "layer = MyLayer()\n", + "x = np.random.random((32, 10)) * 100\n", + "x = x.astype(\"int32\")\n", + "layer(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Supporting masking in your custom layers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Sometimes, you may need to write layers that generate a mask (like `Embedding`), or\n", + "layers that need to modify the current mask.\n", + "\n", + "For instance, any layer that produces a tensor with a different time dimension than its\n", + "input, such as a `Concatenate` layer that concatenates on the time dimension, will\n", + "need to modify the current mask so that downstream layers will be able to properly\n", + "take masked timesteps into account.\n", + "\n", + "To do this, your layer should implement the `layer.compute_mask()` method, which\n", + "produces a new mask given the input and the current mask.\n", + "\n", + "Here is an example of a `TemporalSplit` layer that needs to modify the current mask." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class TemporalSplit(keras.layers.Layer):\n", + " \"\"\"Split the input tensor into 2 tensors along the time dimension.\"\"\"\n", + "\n", + " def call(self, inputs):\n", + " # Expect the input to be 3D and mask to be 2D, split the input tensor into 2\n", + " # subtensors along the time axis (axis 1).\n", + " return tf.split(inputs, 2, axis=1)\n", + "\n", + " def compute_mask(self, inputs, mask=None):\n", + " # Also split the mask into 2 if it presents.\n", + " if mask is None:\n", + " return None\n", + " return tf.split(mask, 2, axis=1)\n", + "\n", + "\n", + "first_half, second_half = TemporalSplit()(masked_embedding)\n", + "print(first_half._keras_mask)\n", + "print(second_half._keras_mask)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here is another example of a `CustomEmbedding` layer that is capable of generating a\n", + "mask from input values:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomEmbedding(keras.layers.Layer):\n", + " def __init__(self, input_dim, output_dim, mask_zero=False, **kwargs):\n", + " super(CustomEmbedding, self).__init__(**kwargs)\n", + " self.input_dim = input_dim\n", + " self.output_dim = output_dim\n", + " self.mask_zero = mask_zero\n", + "\n", + " def build(self, input_shape):\n", + " self.embeddings = self.add_weight(\n", + " shape=(self.input_dim, self.output_dim),\n", + " initializer=\"random_normal\",\n", + " dtype=\"float32\",\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " return tf.nn.embedding_lookup(self.embeddings, inputs)\n", + "\n", + " def compute_mask(self, inputs, mask=None):\n", + " if not self.mask_zero:\n", + " return None\n", + " return tf.not_equal(inputs, 0)\n", + "\n", + "\n", + "layer = CustomEmbedding(10, 32, mask_zero=True)\n", + "x = np.random.random((3, 10)) * 9\n", + "x = x.astype(\"int32\")\n", + "\n", + "y = layer(x)\n", + "mask = layer.compute_mask(x)\n", + "\n", + "print(mask)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Opting-in to mask propagation on compatible layers\n", + "\n", + "Most layers don't modify the time dimension, so don't need to modify the current mask.\n", + "However, they may still want to be able to **propagate** the current mask, unchanged,\n", + "to the next layer. **This is an opt-in behavior.** By default, a custom layer will\n", + "destroy the current mask (since the framework has no way to tell whether propagating\n", + "the mask is safe to do).\n", + "\n", + "If you have a custom layer that does not modify the time dimension, and if you want it\n", + "to be able to propagate the current input mask, you should set `self.supports_masking\n", + "= True` in the layer constructor. In this case, the default behavior of\n", + "`compute_mask()` is just pass the current mask through.\n", + "\n", + "Here's an example of a layer that is whitelisted for mask propagation:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class MyActivation(keras.layers.Layer):\n", + " def __init__(self, **kwargs):\n", + " super(MyActivation, self).__init__(**kwargs)\n", + " # Signal that the layer is safe for mask propagation\n", + " self.supports_masking = True\n", + "\n", + " def call(self, inputs):\n", + " return tf.nn.relu(inputs)\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "You can now use this custom layer in-between a mask-generating layer (like `Embedding`)\n", + "and a mask-consuming layer (like `LSTM`), and it will pass the mask along so that it\n", + "reachs the mask-consuming layer." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(None,), dtype=\"int32\")\n", + "x = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)(inputs)\n", + "x = MyActivation()(x) # Will pass the mask along\n", + "print(\"Mask found:\", x._keras_mask)\n", + "outputs = layers.LSTM(32)(x) # Will receive the mask\n", + "\n", + "model = keras.Model(inputs, outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Writing layers that need mask information\n", + "\n", + "Some layers are mask *consumers*: they accept a `mask` argument in `call` and use it to\n", + "determine whether to skip certain time steps.\n", + "\n", + "To write such a layer, you can simply add a `mask=None` argument in your `call`\n", + "signature. The mask associated with the inputs will be passed to your layer whenever\n", + "it is available.\n", + "\n", + "Here's a simple example below: a layer that computes a softmax over the time dimension\n", + "(axis 1) of an input sequence, while discarding masked timesteps." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class TemporalSoftmax(keras.layers.Layer):\n", + " def call(self, inputs, mask=None):\n", + " broadcast_float_mask = tf.expand_dims(tf.cast(mask, \"float32\"), -1)\n", + " inputs_exp = tf.exp(inputs) * broadcast_float_mask\n", + " inputs_sum = tf.reduce_sum(inputs * broadcast_float_mask, axis=1, keepdims=True)\n", + " return inputs_exp / inputs_sum\n", + "\n", + "\n", + "inputs = keras.Input(shape=(None,), dtype=\"int32\")\n", + "x = layers.Embedding(input_dim=10, output_dim=32, mask_zero=True)(inputs)\n", + "x = layers.Dense(1)(x)\n", + "outputs = TemporalSoftmax()(x)\n", + "\n", + "model = keras.Model(inputs, outputs)\n", + "y = model(np.random.randint(0, 10, size=(32, 100)), np.random.random((32, 100, 1)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Summary\n", + "\n", + "That is all you need to know about padding & masking in Keras. To recap:\n", + "\n", + "- \"Masking\" is how layers are able to know when to skip / ignore certain timesteps in\n", + "sequence inputs.\n", + "- Some layers are mask-generators: `Embedding` can generate a mask from input values\n", + "(if `mask_zero=True`), and so can the `Masking` layer.\n", + "- Some layers are mask-consumers: they expose a `mask` argument in their `__call__`\n", + "method. This is the case for RNN layers.\n", + "- In the Functional API and Sequential API, mask information is propagated\n", + "automatically.\n", + "- When using layers in a standalone way, you can pass the `mask` arguments to layers\n", + "manually.\n", + "- You can easily write layers that modify the current mask, that generate a new mask,\n", + "or that consume the mask associated with the inputs." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "masking_and_padding", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/tf/rnn.ipynb b/tf/rnn.ipynb new file mode 100644 index 0000000000..91565a66e4 --- /dev/null +++ b/tf/rnn.ipynb @@ -0,0 +1,918 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "cellView": "form", + "colab": {}, + "colab_type": "code" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# Recurrent Neural Networks (RNN) with Keras" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Introduction\n", + "\n", + "Recurrent neural networks (RNN) are a class of neural networks that is powerful for\n", + "modeling sequence data such as time series or natural language.\n", + "\n", + "Schematically, a RNN layer uses a `for` loop to iterate over the timesteps of a\n", + "sequence, while maintaining an internal state that encodes information about the\n", + "timesteps it has seen so far.\n", + "\n", + "The Keras RNN API is designed with a focus on:\n", + "\n", + "- **Ease of use**: the built-in `keras.layers.RNN`, `keras.layers.LSTM`,\n", + "`keras.layers.GRU` layers enable you to quickly build recurrent models without\n", + "having to make difficult configuration choices.\n", + "\n", + "- **Ease of customization**: You can also define your own RNN cell layer (the inner\n", + "part of the `for` loop) with custom behavior, and use it with the generic\n", + "`keras.layers.RNN` layer (the `for` loop itself). This allows you to quickly\n", + "prototype different research ideas in a flexible way with minimal code." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "from tensorflow.keras import layers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Built-in RNN layers: a simple example" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "There are three built-in RNN layers in Keras:\n", + "\n", + "1. `keras.layers.SimpleRNN`, a fully-connected RNN where the output from previous\n", + "timestep is to be fed to next timestep.\n", + "\n", + "2. `keras.layers.GRU`, first proposed in\n", + "[Cho et al., 2014](https://arxiv.org/abs/1406.1078).\n", + "\n", + "3. `keras.layers.LSTM`, first proposed in\n", + "[Hochreiter & Schmidhuber, 1997](https://www.bioinf.jku.at/publications/older/2604.pdf).\n", + "\n", + "In early 2015, Keras had the first reusable open-source Python implementations of LSTM\n", + "and GRU.\n", + "\n", + "Here is a simple example of a `Sequential` model that processes sequences of integers,\n", + "embeds each integer into a 64-dimensional vector, then processes the sequence of\n", + "vectors using a `LSTM` layer." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential()\n", + "# Add an Embedding layer expecting input vocab of size 1000, and\n", + "# output embedding dimension of size 64.\n", + "model.add(layers.Embedding(input_dim=1000, output_dim=64))\n", + "\n", + "# Add a LSTM layer with 128 internal units.\n", + "model.add(layers.LSTM(128))\n", + "\n", + "# Add a Dense layer with 10 units.\n", + "model.add(layers.Dense(10))\n", + "\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Built-in RNNs support a number of useful features:\n", + "\n", + "- Recurrent dropout, via the `dropout` and `recurrent_dropout` arguments\n", + "- Ability to process an input sequence in reverse, via the `go_backwards` argument\n", + "- Loop unrolling (which can lead to a large speedup when processing short sequences on\n", + "CPU), via the `unroll` argument\n", + "- ...and more.\n", + "\n", + "For more information, see the\n", + "[RNN API documentation](https://keras.io/api/layers/recurrent_layers/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Outputs and states\n", + "\n", + "By default, the output of a RNN layer contain a single vector per sample. This vector\n", + "is the RNN cell output corresponding to the last timestep, containing information\n", + "about the entire input sequence. The shape of this output is `(batch_size, units)`\n", + "where `units` corresponds to the `units` argument passed to the layer's constructor.\n", + "\n", + "A RNN layer can also return the entire sequence of outputs for each sample (one vector\n", + "per timestep per sample), if you set `return_sequences=True`. The shape of this output\n", + "is `(batch_size, timesteps, units)`." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential()\n", + "model.add(layers.Embedding(input_dim=1000, output_dim=64))\n", + "\n", + "# The output of GRU will be a 3D tensor of shape (batch_size, timesteps, 256)\n", + "model.add(layers.GRU(256, return_sequences=True))\n", + "\n", + "# The output of SimpleRNN will be a 2D tensor of shape (batch_size, 128)\n", + "model.add(layers.SimpleRNN(128))\n", + "\n", + "model.add(layers.Dense(10))\n", + "\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "In addition, a RNN layer can return its final internal state(s). The returned states\n", + "can be used to resume the RNN execution later, or\n", + "[to initialize another RNN](https://arxiv.org/abs/1409.3215).\n", + "This setting is commonly used in the\n", + "encoder-decoder sequence-to-sequence model, where the encoder final state is used as\n", + "the initial state of the decoder.\n", + "\n", + "To configure a RNN layer to return its internal state, set the `return_state` parameter\n", + "to `True` when creating the layer. Note that `LSTM` has 2 state tensors, but `GRU`\n", + "only has one.\n", + "\n", + "To configure the initial state of the layer, just call the layer with additional\n", + "keyword argument `initial_state`.\n", + "Note that the shape of the state needs to match the unit size of the layer, like in the\n", + "example below." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "encoder_vocab = 1000\n", + "decoder_vocab = 2000\n", + "\n", + "encoder_input = layers.Input(shape=(None,))\n", + "encoder_embedded = layers.Embedding(input_dim=encoder_vocab, output_dim=64)(\n", + " encoder_input\n", + ")\n", + "\n", + "# Return states in addition to output\n", + "output, state_h, state_c = layers.LSTM(64, return_state=True, name=\"encoder\")(\n", + " encoder_embedded\n", + ")\n", + "encoder_state = [state_h, state_c]\n", + "\n", + "decoder_input = layers.Input(shape=(None,))\n", + "decoder_embedded = layers.Embedding(input_dim=decoder_vocab, output_dim=64)(\n", + " decoder_input\n", + ")\n", + "\n", + "# Pass the 2 states to a new LSTM layer, as initial state\n", + "decoder_output = layers.LSTM(64, name=\"decoder\")(\n", + " decoder_embedded, initial_state=encoder_state\n", + ")\n", + "output = layers.Dense(10)(decoder_output)\n", + "\n", + "model = keras.Model([encoder_input, decoder_input], output)\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## RNN layers and RNN cells\n", + "\n", + "In addition to the built-in RNN layers, the RNN API also provides cell-level APIs.\n", + "Unlike RNN layers, which processes whole batches of input sequences, the RNN cell only\n", + "processes a single timestep.\n", + "\n", + "The cell is the inside of the `for` loop of a RNN layer. Wrapping a cell inside a\n", + "`keras.layers.RNN` layer gives you a layer capable of processing batches of\n", + "sequences, e.g. `RNN(LSTMCell(10))`.\n", + "\n", + "Mathematically, `RNN(LSTMCell(10))` produces the same result as `LSTM(10)`. In fact,\n", + "the implementation of this layer in TF v1.x was just creating the corresponding RNN\n", + "cell and wrapping it in a RNN layer. However using the built-in `GRU` and `LSTM`\n", + "layers enables the use of CuDNN and you may see better performance.\n", + "\n", + "There are three built-in RNN cells, each of them corresponding to the matching RNN\n", + "layer.\n", + "\n", + "- `keras.layers.SimpleRNNCell` corresponds to the `SimpleRNN` layer.\n", + "\n", + "- `keras.layers.GRUCell` corresponds to the `GRU` layer.\n", + "\n", + "- `keras.layers.LSTMCell` corresponds to the `LSTM` layer.\n", + "\n", + "The cell abstraction, together with the generic `keras.layers.RNN` class, make it\n", + "very easy to implement custom RNN architectures for your research." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Cross-batch statefulness\n", + "\n", + "When processing very long sequences (possibly infinite), you may want to use the\n", + "pattern of **cross-batch statefulness**.\n", + "\n", + "Normally, the internal state of a RNN layer is reset every time it sees a new batch\n", + "(i.e. every sample seen by the layer is assume to be independent from the past). The\n", + "layer will only maintain a state while processing a given sample.\n", + "\n", + "If you have very long sequences though, it is useful to break them into shorter\n", + "sequences, and to feed these shorter sequences sequentially into a RNN layer without\n", + "resetting the layer's state. That way, the layer can retain information about the\n", + "entirety of the sequence, even though it's only seeing one sub-sequence at a time.\n", + "\n", + "You can do this by setting `stateful=True` in the constructor.\n", + "\n", + "If you have a sequence `s = [t0, t1, ... t1546, t1547]`, you would split it into e.g.\n", + "\n", + "```\n", + "s1 = [t0, t1, ... t100]\n", + "s2 = [t101, ... t201]\n", + "...\n", + "s16 = [t1501, ... t1547]\n", + "```\n", + "\n", + "Then you would process it via:\n", + "\n", + "```python\n", + "lstm_layer = layers.LSTM(64, stateful=True)\n", + "for s in sub_sequences:\n", + " output = lstm_layer(s)\n", + "```\n", + "\n", + "When you want to clear the state, you can use `layer.reset_states()`.\n", + "\n", + "\n", + "> Note: In this setup, sample `i` in a given batch is assumed to be the continuation of\n", + "sample `i` in the previous batch. This means that all batches should contain the same\n", + "number of samples (batch size). E.g. if a batch contains `[sequence_A_from_t0_to_t100,\n", + " sequence_B_from_t0_to_t100]`, the next batch should contain\n", + "`[sequence_A_from_t101_to_t200, sequence_B_from_t101_to_t200]`.\n", + "\n", + "\n", + "\n", + "\n", + "Here is a complete example:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "paragraph1 = np.random.random((20, 10, 50)).astype(np.float32)\n", + "paragraph2 = np.random.random((20, 10, 50)).astype(np.float32)\n", + "paragraph3 = np.random.random((20, 10, 50)).astype(np.float32)\n", + "\n", + "lstm_layer = layers.LSTM(64, stateful=True)\n", + "output = lstm_layer(paragraph1)\n", + "output = lstm_layer(paragraph2)\n", + "output = lstm_layer(paragraph3)\n", + "\n", + "# reset_states() will reset the cached state to the original initial_state.\n", + "# If no initial_state was provided, zero-states will be used by default.\n", + "lstm_layer.reset_states()\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### RNN State Reuse\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "The recorded states of the RNN layer are not included in the `layer.weights()`. If you\n", + "would like to reuse the state from a RNN layer, you can retrieve the states value by\n", + "`layer.states` and use it as the\n", + "initial state for a new layer via the Keras functional API like `new_layer(inputs,\n", + "initial_state=layer.states)`, or model subclassing.\n", + "\n", + "Please also note that sequential model might not be used in this case since it only\n", + "supports layers with single input and output, the extra input of initial state makes\n", + "it impossible to use here." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "paragraph1 = np.random.random((20, 10, 50)).astype(np.float32)\n", + "paragraph2 = np.random.random((20, 10, 50)).astype(np.float32)\n", + "paragraph3 = np.random.random((20, 10, 50)).astype(np.float32)\n", + "\n", + "lstm_layer = layers.LSTM(64, stateful=True)\n", + "output = lstm_layer(paragraph1)\n", + "output = lstm_layer(paragraph2)\n", + "\n", + "existing_state = lstm_layer.states\n", + "\n", + "new_lstm_layer = layers.LSTM(64)\n", + "new_output = new_lstm_layer(paragraph3, initial_state=existing_state)\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Bidirectional RNNs\n", + "\n", + "For sequences other than time series (e.g. text), it is often the case that a RNN model\n", + "can perform better if it not only processes sequence from start to end, but also\n", + "backwards. For example, to predict the next word in a sentence, it is often useful to\n", + "have the context around the word, not only just the words that come before it.\n", + "\n", + "Keras provides an easy API for you to build such bidirectional RNNs: the\n", + "`keras.layers.Bidirectional` wrapper." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential()\n", + "\n", + "model.add(\n", + " layers.Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(5, 10))\n", + ")\n", + "model.add(layers.Bidirectional(layers.LSTM(32)))\n", + "model.add(layers.Dense(10))\n", + "\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Under the hood, `Bidirectional` will copy the RNN layer passed in, and flip the\n", + "`go_backwards` field of the newly copied layer, so that it will process the inputs in\n", + "reverse order.\n", + "\n", + "The output of the `Bidirectional` RNN will be, by default, the sum of the forward layer\n", + "output and the backward layer output. If you need a different merging behavior, e.g.\n", + "concatenation, change the `merge_mode` parameter in the `Bidirectional` wrapper\n", + "constructor. For more details about `Bidirectional`, please check\n", + "[the API docs](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Bidirectional/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Performance optimization and CuDNN kernels\n", + "\n", + "In TensorFlow 2.0, the built-in LSTM and GRU layers have been updated to leverage CuDNN\n", + "kernels by default when a GPU is available. With this change, the prior\n", + "`keras.layers.CuDNNLSTM/CuDNNGRU` layers have been deprecated, and you can build your\n", + "model without worrying about the hardware it will run on.\n", + "\n", + "Since the CuDNN kernel is built with certain assumptions, this means the layer **will\n", + "not be able to use the CuDNN kernel if you change the defaults of the built-in LSTM or\n", + "GRU layers**. E.g.:\n", + "\n", + "- Changing the `activation` function from `tanh` to something else.\n", + "- Changing the `recurrent_activation` function from `sigmoid` to something else.\n", + "- Using `recurrent_dropout` > 0.\n", + "- Setting `unroll` to True, which forces LSTM/GRU to decompose the inner\n", + "`tf.while_loop` into an unrolled `for` loop.\n", + "- Setting `use_bias` to False.\n", + "- Using masking when the input data is not strictly right padded (if the mask\n", + "corresponds to strictly right padded data, CuDNN can still be used. This is the most\n", + "common case).\n", + "\n", + "For the detailed list of constraints, please see the documentation for the\n", + "[LSTM](https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM/) and\n", + "[GRU](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GRU/) layers." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Using CuDNN kernels when available\n", + "\n", + "Let's build a simple LSTM model to demonstrate the performance difference.\n", + "\n", + "We'll use as input sequences the sequence of rows of MNIST digits (treating each row of\n", + "pixels as a timestep), and we'll predict the digit's label." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "batch_size = 64\n", + "# Each MNIST image batch is a tensor of shape (batch_size, 28, 28).\n", + "# Each input sequence will be of size (28, 28) (height is treated like time).\n", + "input_dim = 28\n", + "\n", + "units = 64\n", + "output_size = 10 # labels are from 0 to 9\n", + "\n", + "# Build the RNN model\n", + "def build_model(allow_cudnn_kernel=True):\n", + " # CuDNN is only available at the layer level, and not at the cell level.\n", + " # This means `LSTM(units)` will use the CuDNN kernel,\n", + " # while RNN(LSTMCell(units)) will run on non-CuDNN kernel.\n", + " if allow_cudnn_kernel:\n", + " # The LSTM layer with default options uses CuDNN.\n", + " lstm_layer = keras.layers.LSTM(units, input_shape=(None, input_dim))\n", + " else:\n", + " # Wrapping a LSTMCell in a RNN layer will not use CuDNN.\n", + " lstm_layer = keras.layers.RNN(\n", + " keras.layers.LSTMCell(units), input_shape=(None, input_dim)\n", + " )\n", + " model = keras.models.Sequential(\n", + " [\n", + " lstm_layer,\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Dense(output_size),\n", + " ]\n", + " )\n", + " return model\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's load the MNIST dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "mnist = keras.datasets.mnist\n", + "\n", + "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", + "x_train, x_test = x_train / 255.0, x_test / 255.0\n", + "sample, sample_label = x_train[0], y_train[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's create a model instance and train it.\n", + "\n", + "We choose `sparse_categorical_crossentropy` as the loss function for the model. The\n", + "output of the model has shape of `[batch_size, 10]`. The target for the model is a\n", + "integer vector, each of the integer is in the range of 0 to 9." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = build_model(allow_cudnn_kernel=True)\n", + "\n", + "model.compile(\n", + " loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " optimizer=\"sgd\",\n", + " metrics=[\"accuracy\"],\n", + ")\n", + "\n", + "\n", + "model.fit(\n", + " x_train, y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=1\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Now, let's compare to a model that does not use the CuDNN kernel:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "noncudnn_model = build_model(allow_cudnn_kernel=False)\n", + "noncudnn_model.set_weights(model.get_weights())\n", + "noncudnn_model.compile(\n", + " loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " optimizer=\"sgd\",\n", + " metrics=[\"accuracy\"],\n", + ")\n", + "noncudnn_model.fit(\n", + " x_train, y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=1\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "When running on a machine with a NVIDIA GPU and CuDNN installed,\n", + "the model built with CuDNN is much faster to train compared to the\n", + "model that use the regular TensorFlow kernel.\n", + "\n", + "The same CuDNN-enabled model can also be use to run inference in a CPU-only\n", + "environment. The `tf.device` annotation below is just forcing the device placement.\n", + "The model will run on CPU by default if no GPU is available.\n", + "\n", + "You simply don't have to worry about the hardware you're running on anymore. Isn't that\n", + "pretty cool?" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "with tf.device(\"CPU:0\"):\n", + " cpu_model = build_model(allow_cudnn_kernel=True)\n", + " cpu_model.set_weights(model.get_weights())\n", + " result = tf.argmax(cpu_model.predict_on_batch(tf.expand_dims(sample, 0)), axis=1)\n", + " print(\n", + " \"Predicted result is: %s, target result is: %s\" % (result.numpy(), sample_label)\n", + " )\n", + " plt.imshow(sample, cmap=plt.get_cmap(\"gray\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## RNNs with list/dict inputs, or nested inputs\n", + "\n", + "Nested structures allow implementers to include more information within a single\n", + "timestep. For example, a video frame could have audio and video input at the same\n", + "time. The data shape in this case could be:\n", + "\n", + "`[batch, timestep, {\"video\": [height, width, channel], \"audio\": [frequency]}]`\n", + "\n", + "In another example, handwriting data could have both coordinates x and y for the\n", + "current position of the pen, as well as pressure information. So the data\n", + "representation could be:\n", + "\n", + "`[batch, timestep, {\"location\": [x, y], \"pressure\": [force]}]`\n", + "\n", + "The following code provides an example of how to build a custom RNN cell that accepts\n", + "such structured inputs." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Define a custom cell that support nested input/output" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "See [Making new Layers & Models via subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models/)\n", + "for details on writing your own layers." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class NestedCell(keras.layers.Layer):\n", + " def __init__(self, unit_1, unit_2, unit_3, **kwargs):\n", + " self.unit_1 = unit_1\n", + " self.unit_2 = unit_2\n", + " self.unit_3 = unit_3\n", + " self.state_size = [tf.TensorShape([unit_1]), tf.TensorShape([unit_2, unit_3])]\n", + " self.output_size = [tf.TensorShape([unit_1]), tf.TensorShape([unit_2, unit_3])]\n", + " super(NestedCell, self).__init__(**kwargs)\n", + "\n", + " def build(self, input_shapes):\n", + " # expect input_shape to contain 2 items, [(batch, i1), (batch, i2, i3)]\n", + " i1 = input_shapes[0][1]\n", + " i2 = input_shapes[1][1]\n", + " i3 = input_shapes[1][2]\n", + "\n", + " self.kernel_1 = self.add_weight(\n", + " shape=(i1, self.unit_1), initializer=\"uniform\", name=\"kernel_1\"\n", + " )\n", + " self.kernel_2_3 = self.add_weight(\n", + " shape=(i2, i3, self.unit_2, self.unit_3),\n", + " initializer=\"uniform\",\n", + " name=\"kernel_2_3\",\n", + " )\n", + "\n", + " def call(self, inputs, states):\n", + " # inputs should be in [(batch, input_1), (batch, input_2, input_3)]\n", + " # state should be in shape [(batch, unit_1), (batch, unit_2, unit_3)]\n", + " input_1, input_2 = tf.nest.flatten(inputs)\n", + " s1, s2 = states\n", + "\n", + " output_1 = tf.matmul(input_1, self.kernel_1)\n", + " output_2_3 = tf.einsum(\"bij,ijkl->bkl\", input_2, self.kernel_2_3)\n", + " state_1 = s1 + output_1\n", + " state_2_3 = s2 + output_2_3\n", + "\n", + " output = (output_1, output_2_3)\n", + " new_states = (state_1, state_2_3)\n", + "\n", + " return output, new_states\n", + "\n", + " def get_config(self):\n", + " return {\"unit_1\": self.unit_1, \"unit_2\": unit_2, \"unit_3\": self.unit_3}\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Build a RNN model with nested input/output\n", + "\n", + "Let's build a Keras model that uses a `keras.layers.RNN` layer and the custom cell\n", + "we just defined." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "unit_1 = 10\n", + "unit_2 = 20\n", + "unit_3 = 30\n", + "\n", + "i1 = 32\n", + "i2 = 64\n", + "i3 = 32\n", + "batch_size = 64\n", + "num_batches = 10\n", + "timestep = 50\n", + "\n", + "cell = NestedCell(unit_1, unit_2, unit_3)\n", + "rnn = keras.layers.RNN(cell)\n", + "\n", + "input_1 = keras.Input((None, i1))\n", + "input_2 = keras.Input((None, i2, i3))\n", + "\n", + "outputs = rnn((input_1, input_2))\n", + "\n", + "model = keras.models.Model([input_1, input_2], outputs)\n", + "\n", + "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"accuracy\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Train the model with randomly generated data\n", + "\n", + "Since there isn't a good candidate dataset for this model, we use random Numpy data for\n", + "demonstration." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "input_1_data = np.random.random((batch_size * num_batches, timestep, i1))\n", + "input_2_data = np.random.random((batch_size * num_batches, timestep, i2, i3))\n", + "target_1_data = np.random.random((batch_size * num_batches, unit_1))\n", + "target_2_data = np.random.random((batch_size * num_batches, unit_2, unit_3))\n", + "input_data = [input_1_data, input_2_data]\n", + "target_data = [target_1_data, target_2_data]\n", + "\n", + "model.fit(input_data, target_data, batch_size=batch_size)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "With the Keras `keras.layers.RNN` layer, You are only expected to define the math\n", + "logic for individual step within the sequence, and the `keras.layers.RNN` layer\n", + "will handle the sequence iteration for you. It's an incredibly powerful way to quickly\n", + "prototype new kinds of RNNs (e.g. a LSTM variant).\n", + "\n", + "For more details, please visit the [API docs](https://https://www.tensorflow.org/api_docs/python/tf/keras/layers/RNN/)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "rnn", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/tf/save_and_serialize.ipynb b/tf/save_and_serialize.ipynb new file mode 100644 index 0000000000..d9ceed80df --- /dev/null +++ b/tf/save_and_serialize.ipynb @@ -0,0 +1,1330 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "cellView": "form", + "colab": {}, + "colab_type": "code" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# Save and load Keras models" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Introduction\n", + "\n", + "A Keras model consists of multiple components:\n", + "\n", + "- An architecture, or configuration, which specifyies what layers the model\n", + "contain, and how they're connected.\n", + "- A set of weights values (the \"state of the model\").\n", + "- An optimizer (defined by compiling the model).\n", + "- A set of losses and metrics (defined by compiling the model or calling\n", + "`add_loss()` or `add_metric()`).\n", + "\n", + "The Keras API makes it possible to save of these pieces to disk at once,\n", + "or to only selectively save some of them:\n", + "\n", + "- Saving everything into a single archive in the TensorFlow SavedModel format\n", + "(or in the older Keras H5 format). This is the standard practice.\n", + "- Saving the architecture / configuration only, typically as a JSON file.\n", + "- Saving the weights values only. This is generally used when training the model.\n", + "\n", + "Let's take a look at each of these options: when would you use one or the other?\n", + "How do they work?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## The short answer to saving & loading\n", + "\n", + "If you only have 10 seconds to read this guide, here's what you need to know.\n", + "\n", + "**Saving a Keras model:**\n", + "\n", + "```python\n", + "model = ... # Get model (Sequential, Functional Model, or Model subclass)\n", + "model.save('path/to/location')\n", + "```\n", + "\n", + "**Loading the model back:**\n", + "\n", + "```python\n", + "from tensorflow import keras\n", + "model = keras.models.load_model('path/to/location')\n", + "```\n", + "\n", + "Now, let's look at the details." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow import keras" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Whole-model saving & loading\n", + "\n", + "You can save an entire model to a single artifact. It will include:\n", + "\n", + "- The model's architecture/config\n", + "- The model's weight values (which were learned during training)\n", + "- The model's compilation information (if `compile()`) was called\n", + "- The optimizer and its state, if any (this enables you to restart training\n", + "where you left)\n", + "\n", + "#### APIs\n", + "\n", + "- `model.save()` or `tf.keras.models.save_model()`\n", + "- `tf.keras.models.load_model()`\n", + "\n", + "There are two formats you can use to save an entire model to disk:\n", + "**the TensorFlow SavedModel format**, and **the older Keras H5 format**.\n", + "The recommended format is SavedModel. It is the default when you use `model.save()`.\n", + "\n", + "You can switch to the H5 format by:\n", + "\n", + "- Passing `format='h5'` to `save()`.\n", + "- Passing a filename that ends in `.h5` or `.keras` to `save()`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### SavedModel format\n", + "\n", + "**Example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "def get_model():\n", + " # Create a simple model.\n", + " inputs = keras.Input(shape=(32,))\n", + " outputs = keras.layers.Dense(1)(inputs)\n", + " model = keras.Model(inputs, outputs)\n", + " model.compile(optimizer=\"adam\", loss=\"mean_squared_error\")\n", + " return model\n", + "\n", + "\n", + "model = get_model()\n", + "\n", + "# Train the model.\n", + "test_input = np.random.random((128, 32))\n", + "test_target = np.random.random((128, 1))\n", + "model.fit(test_input, test_target)\n", + "\n", + "# Calling `save('my_model')` creates a SavedModel folder `my_model`.\n", + "model.save(\"my_model\")\n", + "\n", + "# It can be used to reconstruct the model identically.\n", + "reconstructed_model = keras.models.load_model(\"my_model\")\n", + "\n", + "# Let's check:\n", + "np.testing.assert_allclose(\n", + " model.predict(test_input), reconstructed_model.predict(test_input)\n", + ")\n", + "\n", + "# The reconstructed model is already compiled and has retained the optimizer\n", + "# state, so training can resume:\n", + "reconstructed_model.fit(test_input, test_target)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "#### What the SavedModel contains\n", + "\n", + "Calling `model.save('my_model')` creates a folder named `my_model`,\n", + "containing the following:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "!ls my_model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "The model architecture, and training configuration\n", + "(including the optimizer, losses, and metrics) are stored in `saved_model.pb`.\n", + "The weights are saved in the `variables/` directory.\n", + "\n", + "For detailed information on the SavedModel format, see the\n", + "[SavedModel guide (*The SavedModel format on disk*)](\n", + " https://www.tensorflow.org/guide/saved_model#the_savedmodel_format_on_disk).\n", + "\n", + "\n", + "#### How SavedModel handles custom objects\n", + "\n", + "When saving the model and its layers, the SavedModel format stores the\n", + "class name, **call function**, losses, and weights (and the config, if implemented).\n", + "The call function defines the computation graph of the model/layer.\n", + "\n", + "In the absence of the model/layer config, the call function is used to create\n", + "a model that exists like the original model which can be trained, evaluated,\n", + "and used for inference.\n", + "\n", + "Nevertheless, it is always a good practice to define the `get_config`\n", + "and `from_config` methods when writing a custom model or layer class.\n", + "This allows you to easily update the computation later if needed.\n", + "See the section about [Custom objects](save_and_serialize.ipynb#custom-objects)\n", + "for more information.\n", + "\n", + "Below is an example of what happens when loading custom layers from\n", + "he SavedModel format **without** overwriting the config methods." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomModel(keras.Model):\n", + " def __init__(self, hidden_units):\n", + " super(CustomModel, self).__init__()\n", + " self.dense_layers = [keras.layers.Dense(u) for u in hidden_units]\n", + "\n", + " def call(self, inputs):\n", + " x = inputs\n", + " for layer in self.dense_layers:\n", + " x = layer(x)\n", + " return x\n", + "\n", + "\n", + "model = CustomModel([16, 16, 10])\n", + "# Build the model by calling it\n", + "input_arr = tf.random.uniform((1, 5))\n", + "outputs = model(input_arr)\n", + "model.save(\"my_model\")\n", + "\n", + "# Delete the custom-defined model class to ensure that the loader does not have\n", + "# access to it.\n", + "del CustomModel\n", + "\n", + "loaded = keras.models.load_model(\"my_model\")\n", + "np.testing.assert_allclose(loaded(input_arr), outputs)\n", + "\n", + "print(\"Original model:\", model)\n", + "print(\"Loaded model:\", loaded)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "As seen in the example above, the loader dynamically creates a new model class\n", + "that acts like the original model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Keras H5 format\n", + "\n", + "Keras also supports saving a single HDF5 file containing the model's architecture,\n", + "weights values, and `compile()` information.\n", + "It is a light-weight alternative to SavedModel.\n", + "\n", + "**Example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = get_model()\n", + "\n", + "# Train the model.\n", + "test_input = np.random.random((128, 32))\n", + "test_target = np.random.random((128, 1))\n", + "model.fit(test_input, test_target)\n", + "\n", + "# Calling `save('my_model.h5')` creates a h5 file `my_model.h5`.\n", + "model.save(\"my_h5_model.h5\")\n", + "\n", + "# It can be used to reconstruct the model identically.\n", + "reconstructed_model = keras.models.load_model(\"my_h5_model.h5\")\n", + "\n", + "# Let's check:\n", + "np.testing.assert_allclose(\n", + " model.predict(test_input), reconstructed_model.predict(test_input)\n", + ")\n", + "\n", + "# The reconstructed model is already compiled and has retained the optimizer\n", + "# state, so training can resume:\n", + "reconstructed_model.fit(test_input, test_target)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "#### Limitations\n", + "\n", + "Compared to the SavedModel format, there are two things that don't\n", + "get included in the H5 file:\n", + "\n", + "- **External losses & metrics** added via `model.add_loss()`\n", + "& `model.add_metric()` are not saved (unlike SavedModel).\n", + "If you have such losses & metrics on your model and you want to resume training,\n", + "you need to add these losses back yourself after loading the model.\n", + "Note that this does not apply to losses/metrics created *inside* layers via\n", + "`self.add_loss()` & `self.add_metric()`. As long as the layer gets loaded,\n", + "these losses & metrics are kept, since they are part of the `call` method of the layer.\n", + "- The **computation graph of custom objects** such as custom layers\n", + "is not included in the saved file. At loading time, Keras will need access\n", + "to the Python classes/functions of these objects in order to reconstruct the model.\n", + "See [Custom objects](save_and_serialize.ipynb#custom-objects).\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Saving the architecture\n", + "\n", + "The model's configuration (or architecture) specifies what layers the model\n", + "contains, and how these layers are connected*. If you have the configuration of a model,\n", + "then the model can be created with a freshly initialized state for the weights\n", + "and no compilation information.\n", + "\n", + "*Note this only applies to models defined using the functional or Sequential apis\n", + " not subclassed models." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Configuration of a Sequential model or Functional API model\n", + "\n", + "These types of models are explicit graphs of layers: their configuration\n", + "is always available in a structured form.\n", + "\n", + "#### APIs\n", + "\n", + "- `get_config()` and `from_config()`\n", + "- `tf.keras.models.model_to_json()` and `tf.keras.models.model_from_json()`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "#### `get_config()` and `from_config()`\n", + "\n", + "Calling `config = model.get_config()` will return a Python dict containing\n", + "the configuration of the model. The same model can then be reconstructed via\n", + "`Sequential.from_config(config)` (for a `Sequential` model) or\n", + "`Model.from_config(config)` (for a Functional API model).\n", + "\n", + "The same workflow also works for any serializable layer.\n", + "\n", + "**Layer example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "layer = keras.layers.Dense(3, activation=\"relu\")\n", + "layer_config = layer.get_config()\n", + "new_layer = keras.layers.Dense.from_config(layer_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "**Sequential model example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential([keras.Input((32,)), keras.layers.Dense(1)])\n", + "config = model.get_config()\n", + "new_model = keras.Sequential.from_config(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "**Functional model example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input((32,))\n", + "outputs = keras.layers.Dense(1)(inputs)\n", + "model = keras.Model(inputs, outputs)\n", + "config = model.get_config()\n", + "new_model = keras.Model.from_config(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "#### `to_json()` and `tf.keras.models.model_from_json()`\n", + "\n", + "This is similar to `get_config` / `from_config`, except it turns the model\n", + "into a JSON string, which can then be loaded without the original model class.\n", + "It is also specific to models, it isn't meant for layers.\n", + "\n", + "**Example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential([keras.Input((32,)), keras.layers.Dense(1)])\n", + "json_config = model.to_json()\n", + "new_model = keras.models.model_from_json(json_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Custom objects\n", + "\n", + "**Models and layers**\n", + "\n", + "The architecture of subclassed models and layers are defined in the methods\n", + "`__init__` and `call`. They are considered Python bytecode,\n", + "which cannot be serialized into a JSON-compatible config\n", + "-- you could try serializing the bytecode (e.g. via `pickle`),\n", + "but it's completely unsafe and means your model cannot be loaded on a different system.\n", + "\n", + "In order to save/load a model with custom-defined layers, or a subclassed model,\n", + "you should overwrite the `get_config` and optionally `from_config` methods.\n", + "Additionally, you should use register the custom object so that Keras is aware of it.\n", + "\n", + "**Custom functions**\n", + "\n", + "Custom-defined functions (e.g. activation loss or initialization) do not need\n", + "a `get_config` method. The function name is sufficient for loading as long\n", + "as it is registered as a custom object.\n", + "\n", + "**Loading the TensorFlow graph only**\n", + "\n", + "It's possible to load the TensorFlow graph generated by the Keras. If you\n", + "do so, you won't need to provide any `custom_objects`. You can do so like\n", + "this:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.save(\"my_model\")\n", + "tensorflow_graph = tf.saved_model.load(\"my_model\")\n", + "x = np.random.uniform(size=(4, 32)).astype(np.float32)\n", + "predicted = tensorflow_graph(x).numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Note that this method has several drawbacks:\n", + "* For traceability reasons, you should always have access to the custom\n", + "objects that were used. You wouldn't want to put in production a model\n", + "that you cannot re-create.\n", + "* The object returned by `tf.saved_model.load` isn't a Keras model. So it's\n", + "not as easy to use. For example, you won't have access to `.predict()` or `.fit()`\n", + "\n", + "Even if its use is discouraged, it can help you if you're in a tight spot,\n", + "for example, if you lost the code of your custom objects or have issues\n", + "loading the model with `tf.keras.models.load_model()`.\n", + "\n", + "You can find out more in\n", + "the [page about `tf.saved_model.load`](https://www.tensorflow.org/api_docs/python/tf/saved_model/load)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "#### Defining the config methods\n", + "\n", + "Specifications:\n", + "\n", + "* `get_config` should return a JSON-serializable dictionary in order to be\n", + "compatible with the Keras architecture- and model-saving APIs.\n", + "* `from_config(config)` (`classmethod`) should return a new layer or model\n", + "object that is created from the config.\n", + "The default implementation returns `cls(**config)`.\n", + "\n", + "**Example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomLayer(keras.layers.Layer):\n", + " def __init__(self, a):\n", + " self.var = tf.Variable(a, name=\"var_a\")\n", + "\n", + " def call(self, inputs, training=False):\n", + " if training:\n", + " return inputs * self.var\n", + " else:\n", + " return inputs\n", + "\n", + " def get_config(self):\n", + " return {\"a\": self.var.numpy()}\n", + "\n", + " # There's actually no need to define `from_config` here, since returning\n", + " # `cls(**config)` is the default behavior.\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " return cls(**config)\n", + "\n", + "\n", + "layer = CustomLayer(5)\n", + "layer.var.assign(2)\n", + "\n", + "serialized_layer = keras.layers.serialize(layer)\n", + "new_layer = keras.layers.deserialize(\n", + " serialized_layer, custom_objects={\"CustomLayer\": CustomLayer}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "#### Registering the custom object\n", + "\n", + "Keras keeps a note of which class generated the config.\n", + "From the example above, `tf.keras.layers.serialize`\n", + "generates a serialized form of the custom layer:\n", + "\n", + "```\n", + "{'class_name': 'CustomLayer', 'config': {'a': 2}}\n", + "```\n", + "\n", + "Keras keeps a master list of all built-in layer, model, optimizer,\n", + "and metric classes, which is used to find the correct class to call `from_config`.\n", + "If the class can't be found, than an error is raised (`Value Error: Unknown layer`).\n", + "There are a few ways to register custom classes to this list:\n", + "\n", + "1. Setting `custom_objects` argument in the loading function. (see the example\n", + "in section above \"Defining the config methods\")\n", + "2. `tf.keras.utils.custom_object_scope` or `tf.keras.utils.CustomObjectScope`\n", + "3. `tf.keras.utils.register_keras_serializable`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "#### Custom layer and function example" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomLayer(keras.layers.Layer):\n", + " def __init__(self, units=32, **kwargs):\n", + " super(CustomLayer, self).__init__(**kwargs)\n", + " self.units = units\n", + "\n", + " def build(self, input_shape):\n", + " self.w = self.add_weight(\n", + " shape=(input_shape[-1], self.units),\n", + " initializer=\"random_normal\",\n", + " trainable=True,\n", + " )\n", + " self.b = self.add_weight(\n", + " shape=(self.units,), initializer=\"random_normal\", trainable=True\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " return tf.matmul(inputs, self.w) + self.b\n", + "\n", + " def get_config(self):\n", + " config = super(CustomLayer, self).get_config()\n", + " config.update({\"units\": self.units})\n", + " return config\n", + "\n", + "\n", + "def custom_activation(x):\n", + " return tf.nn.tanh(x) ** 2\n", + "\n", + "\n", + "# Make a model with the CustomLayer and custom_activation\n", + "inputs = keras.Input((32,))\n", + "x = CustomLayer(32)(inputs)\n", + "outputs = keras.layers.Activation(custom_activation)(x)\n", + "model = keras.Model(inputs, outputs)\n", + "\n", + "# Retrieve the config\n", + "config = model.get_config()\n", + "\n", + "# At loading time, register the custom objects with a `custom_object_scope`:\n", + "custom_objects = {\"CustomLayer\": CustomLayer, \"custom_activation\": custom_activation}\n", + "with keras.utils.custom_object_scope(custom_objects):\n", + " new_model = keras.Model.from_config(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### In-memory model cloning\n", + "\n", + "You can also do in-memory cloning of a model via `tf.keras.models.clone_model()`.\n", + "This is equivalent to getting the config then recreating the model from its config\n", + "(so it does not preserve compilation information or layer weights values).\n", + "\n", + "**Example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "with keras.utils.custom_object_scope(custom_objects):\n", + " new_model = keras.models.clone_model(model)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Saving & loading only the model's weights values\n", + "\n", + "You can choose to only save & load a model's weights. This can be useful if:\n", + "\n", + "- You only need the model for inference: in this case you won't need to\n", + "restart training, so you don't need the compilation information or optimizer state.\n", + "- You are doing transfer learning: in this case you will be training a new model\n", + "reusing the state of a prior model, so you don't need the compilation\n", + "information of the prior model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### APIs for in-memory weight transfer\n", + "\n", + "Weights can be copied between different objects by using `get_weights`\n", + "and `set_weights`:\n", + "\n", + "* `tf.keras.layers.Layer.get_weights()`: Returns a list of numpy arrays.\n", + "* `tf.keras.layers.Layer.set_weights()`: Sets the model weights to the values\n", + "in the `weights` argument.\n", + "\n", + "Examples below.\n", + "\n", + "\n", + "***Transfering weights from one layer to another, in memory***" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "def create_layer():\n", + " layer = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")\n", + " layer.build((None, 784))\n", + " return layer\n", + "\n", + "\n", + "layer_1 = create_layer()\n", + "layer_2 = create_layer()\n", + "\n", + "# Copy weights from layer 2 to layer 1\n", + "layer_2.set_weights(layer_1.get_weights())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "***Transfering weights from one model to another model with a\n", + "compatible architecture, in memory***" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Create a simple functional model\n", + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + "outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", + "functional_model = keras.Model(inputs=inputs, outputs=outputs, name=\"3_layer_mlp\")\n", + "\n", + "# Define a subclassed model with the same architecture\n", + "class SubclassedModel(keras.Model):\n", + " def __init__(self, output_dim, name=None):\n", + " super(SubclassedModel, self).__init__(name=name)\n", + " self.output_dim = output_dim\n", + " self.dense_1 = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")\n", + " self.dense_2 = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")\n", + " self.dense_3 = keras.layers.Dense(output_dim, name=\"predictions\")\n", + "\n", + " def call(self, inputs):\n", + " x = self.dense_1(inputs)\n", + " x = self.dense_2(x)\n", + " x = self.dense_3(x)\n", + " return x\n", + "\n", + " def get_config(self):\n", + " return {\"output_dim\": self.output_dim, \"name\": self.name}\n", + "\n", + "\n", + "subclassed_model = SubclassedModel(10)\n", + "# Call the subclassed model once to create the weights.\n", + "subclassed_model(tf.ones((1, 784)))\n", + "\n", + "# Copy weights from functional_model to subclassed_model.\n", + "subclassed_model.set_weights(functional_model.get_weights())\n", + "\n", + "assert len(functional_model.weights) == len(subclassed_model.weights)\n", + "for a, b in zip(functional_model.weights, subclassed_model.weights):\n", + " np.testing.assert_allclose(a.numpy(), b.numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "***The case of stateless layers***\n", + "\n", + "Because stateless layers do not change the order or number of weights,\n", + "models can have compatible architectures even if there are extra/missing\n", + "stateless layers." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + "outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", + "functional_model = keras.Model(inputs=inputs, outputs=outputs, name=\"3_layer_mlp\")\n", + "\n", + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + "\n", + "# Add a dropout layer, which does not contain any weights.\n", + "x = keras.layers.Dropout(0.5)(x)\n", + "outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", + "functional_model_with_dropout = keras.Model(\n", + " inputs=inputs, outputs=outputs, name=\"3_layer_mlp\"\n", + ")\n", + "\n", + "functional_model_with_dropout.set_weights(functional_model.get_weights())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### APIs for saving weights to disk & loading them back\n", + "\n", + "Weights can be saved to disk by calling `model.save_weights`\n", + "in the following formats:\n", + "\n", + "* TensorFlow Checkpoint\n", + "* HDF5\n", + "\n", + "The default format for `model.save_weights` is TensorFlow checkpoint.\n", + "There are two ways to specify the save format:\n", + "\n", + "1. `save_format` argument: Set the value to `save_format=\"tf\"` or `save_format=\"h5\"`.\n", + "2. `path` argument: If the path ends with `.h5` or `.hdf5`,\n", + "then the HDF5 format is used. Other suffixes will result in a TensorFlow\n", + "checkpoint unless `save_format` is set.\n", + "\n", + "There is also an option of retrieving weights as in-memory numpy arrays.\n", + "Each API has their pros and cons which are detailed below ." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### TF Checkpoint format\n", + "\n", + "**Example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Runnable example\n", + "sequential_model = keras.Sequential(\n", + " [\n", + " keras.Input(shape=(784,), name=\"digits\"),\n", + " keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\"),\n", + " keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\"),\n", + " keras.layers.Dense(10, name=\"predictions\"),\n", + " ]\n", + ")\n", + "sequential_model.save_weights(\"ckpt\")\n", + "load_status = sequential_model.load_weights(\"ckpt\")\n", + "\n", + "# `assert_consumed` can be used as validation that all variable values have been\n", + "# restored from the checkpoint. See `tf.train.Checkpoint.restore` for other\n", + "# methods in the Status object.\n", + "load_status.assert_consumed()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "#### Format details\n", + "\n", + "The TensorFlow Checkpoint format saves and restores the weights using\n", + "object attribute names. For instance, consider the `tf.keras.layers.Dense` layer.\n", + "The layer contains two weights: `dense.kernel` and `dense.bias`.\n", + "When the layer is saved to the `tf` format, the resulting checkpoint contains the keys\n", + "`\"kernel\"` and `\"bias\"` and their corresponding weight values.\n", + "For more information see\n", + "[\"Loading mechanics\" in the TF Checkpoint guide](https://www.tensorflow.org/guide/checkpoint#loading_mechanics).\n", + "\n", + "Note that attribute/graph edge is named after **the name used in parent object,\n", + "not the name of the variable**. Consider the `CustomLayer` in the example below.\n", + "The variable `CustomLayer.var` is saved with `\"var\"` as part of key, not `\"var_a\"`." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomLayer(keras.layers.Layer):\n", + " def __init__(self, a):\n", + " self.var = tf.Variable(a, name=\"var_a\")\n", + "\n", + "\n", + "layer = CustomLayer(5)\n", + "layer_ckpt = tf.train.Checkpoint(layer=layer).save(\"custom_layer\")\n", + "\n", + "ckpt_reader = tf.train.load_checkpoint(layer_ckpt)\n", + "\n", + "ckpt_reader.get_variable_to_dtype_map()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "#### Transfer learning example\n", + "\n", + "Essentially, as long as two models have the same architecture,\n", + "they are able to share the same checkpoint.\n", + "\n", + "**Example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + "outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", + "functional_model = keras.Model(inputs=inputs, outputs=outputs, name=\"3_layer_mlp\")\n", + "\n", + "# Extract a portion of the functional model defined in the Setup section.\n", + "# The following lines produce a new model that excludes the final output\n", + "# layer of the functional model.\n", + "pretrained = keras.Model(\n", + " functional_model.inputs, functional_model.layers[-1].input, name=\"pretrained_model\"\n", + ")\n", + "# Randomly assign \"trained\" weights.\n", + "for w in pretrained.weights:\n", + " w.assign(tf.random.normal(w.shape))\n", + "pretrained.save_weights(\"pretrained_ckpt\")\n", + "pretrained.summary()\n", + "\n", + "# Assume this is a separate program where only 'pretrained_ckpt' exists.\n", + "# Create a new functional model with a different output dimension.\n", + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + "outputs = keras.layers.Dense(5, name=\"predictions\")(x)\n", + "model = keras.Model(inputs=inputs, outputs=outputs, name=\"new_model\")\n", + "\n", + "# Load the weights from pretrained_ckpt into model.\n", + "model.load_weights(\"pretrained_ckpt\")\n", + "\n", + "# Check that all of the pretrained weights have been loaded.\n", + "for a, b in zip(pretrained.weights, model.weights):\n", + " np.testing.assert_allclose(a.numpy(), b.numpy())\n", + "\n", + "print(\"\\n\", \"-\" * 50)\n", + "model.summary()\n", + "\n", + "# Example 2: Sequential model\n", + "# Recreate the pretrained model, and load the saved weights.\n", + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + "x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + "pretrained_model = keras.Model(inputs=inputs, outputs=x, name=\"pretrained\")\n", + "\n", + "# Sequential example:\n", + "model = keras.Sequential([pretrained_model, keras.layers.Dense(5, name=\"predictions\")])\n", + "model.summary()\n", + "\n", + "pretrained_model.load_weights(\"pretrained_ckpt\")\n", + "\n", + "# Warning! Calling `model.load_weights('pretrained_ckpt')` won't throw an error,\n", + "# but will *not* work as expected. If you inspect the weights, you'll see that\n", + "# none of the weights will have loaded. `pretrained_model.load_weights()` is the\n", + "# correct method to call." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "It is generally recommended to stick to the same API for building models. If you\n", + "switch between Sequential and Functional, or Functional and subclassed,\n", + "etc., then always rebuild the pre-trained model and load the pre-trained\n", + "weights to that model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "The next question is, how can weights be saved and loaded to different models\n", + "if the model architectures are quite different?\n", + "The solution is to use `tf.train.Checkpoint` to save and restore the exact layers/variables.\n", + "\n", + "**Example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Create a subclassed model that essentially uses functional_model's first\n", + "# and last layers.\n", + "# First, save the weights of functional_model's first and last dense layers.\n", + "first_dense = functional_model.layers[1]\n", + "last_dense = functional_model.layers[-1]\n", + "ckpt_path = tf.train.Checkpoint(\n", + " dense=first_dense, kernel=last_dense.kernel, bias=last_dense.bias\n", + ").save(\"ckpt\")\n", + "\n", + "# Define the subclassed model.\n", + "class ContrivedModel(keras.Model):\n", + " def __init__(self):\n", + " super(ContrivedModel, self).__init__()\n", + " self.first_dense = keras.layers.Dense(64)\n", + " self.kernel = self.add_variable(\"kernel\", shape=(64, 10))\n", + " self.bias = self.add_variable(\"bias\", shape=(10,))\n", + "\n", + " def call(self, inputs):\n", + " x = self.first_dense(inputs)\n", + " return tf.matmul(x, self.kernel) + self.bias\n", + "\n", + "\n", + "model = ContrivedModel()\n", + "# Call model on inputs to create the variables of the dense layer.\n", + "_ = model(tf.ones((1, 784)))\n", + "\n", + "# Create a Checkpoint with the same structure as before, and load the weights.\n", + "tf.train.Checkpoint(\n", + " dense=model.first_dense, kernel=model.kernel, bias=model.bias\n", + ").restore(ckpt_path).assert_consumed()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### HDF5 format\n", + "\n", + "The HDF5 format contains weights grouped by layer names.\n", + "The weights are lists ordered by concatenating the list of trainable weights\n", + "to the list of non-trainable weights (same as `layer.weights`).\n", + "Thus, a model can use a hdf5 checkpoint if it has the same layers and trainable\n", + "statuses as saved in the checkpoint.\n", + "\n", + "**Example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Runnable example\n", + "sequential_model = keras.Sequential(\n", + " [\n", + " keras.Input(shape=(784,), name=\"digits\"),\n", + " keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\"),\n", + " keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\"),\n", + " keras.layers.Dense(10, name=\"predictions\"),\n", + " ]\n", + ")\n", + "sequential_model.save_weights(\"weights.h5\")\n", + "sequential_model.load_weights(\"weights.h5\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Note that changing `layer.trainable` may result in a different\n", + "`layer.weights` ordering when the model contains nested layers." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class NestedDenseLayer(keras.layers.Layer):\n", + " def __init__(self, units, name=None):\n", + " super(NestedDenseLayer, self).__init__(name=name)\n", + " self.dense_1 = keras.layers.Dense(units, name=\"dense_1\")\n", + " self.dense_2 = keras.layers.Dense(units, name=\"dense_2\")\n", + "\n", + " def call(self, inputs):\n", + " return self.dense_2(self.dense_1(inputs))\n", + "\n", + "\n", + "nested_model = keras.Sequential([keras.Input((784,)), NestedDenseLayer(10, \"nested\")])\n", + "variable_names = [v.name for v in nested_model.weights]\n", + "print(\"variables: {}\".format(variable_names))\n", + "\n", + "print(\"\\nChanging trainable status of one of the nested layers...\")\n", + "nested_model.get_layer(\"nested\").dense_1.trainable = False\n", + "\n", + "variable_names_2 = [v.name for v in nested_model.weights]\n", + "print(\"\\nvariables: {}\".format(variable_names_2))\n", + "print(\"variable ordering changed:\", variable_names != variable_names_2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "#### Transfer learning example\n", + "\n", + "When loading pretrained weights from HDF5, it is recommended to load\n", + "the weights into the original checkpointed model, and then extract\n", + "the desired weights/layers into a new model.\n", + "\n", + "**Example:**" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "def create_functional_model():\n", + " inputs = keras.Input(shape=(784,), name=\"digits\")\n", + " x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + " x = keras.layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + " outputs = keras.layers.Dense(10, name=\"predictions\")(x)\n", + " return keras.Model(inputs=inputs, outputs=outputs, name=\"3_layer_mlp\")\n", + "\n", + "\n", + "functional_model = create_functional_model()\n", + "functional_model.save_weights(\"pretrained_weights.h5\")\n", + "\n", + "# In a separate program:\n", + "pretrained_model = create_functional_model()\n", + "pretrained_model.load_weights(\"pretrained_weights.h5\")\n", + "\n", + "# Create a new model by extracting layers from the original model:\n", + "extracted_layers = pretrained_model.layers[:-1]\n", + "extracted_layers.append(keras.layers.Dense(5, name=\"dense_3\"))\n", + "model = keras.Sequential(extracted_layers)\n", + "model.summary()" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "save_and_serialize", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/tf/sequential_model.ipynb b/tf/sequential_model.ipynb new file mode 100644 index 0000000000..d279239b63 --- /dev/null +++ b/tf/sequential_model.ipynb @@ -0,0 +1,736 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "cellView": "form", + "colab": {}, + "colab_type": "code" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# The Sequential model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "from tensorflow.keras import layers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## When to use a Sequential model\n", + "\n", + "A `Sequential` model is appropriate for **a plain stack of layers**\n", + "where each layer has **exactly one input tensor and one output tensor**.\n", + "\n", + "Schematically, the following `Sequential` model:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Define Sequential model with 3 layers\n", + "model = keras.Sequential(\n", + " [\n", + " layers.Dense(2, activation=\"relu\", name=\"layer1\"),\n", + " layers.Dense(3, activation=\"relu\", name=\"layer2\"),\n", + " layers.Dense(4, name=\"layer3\"),\n", + " ]\n", + ")\n", + "# Call model on a test input\n", + "x = tf.ones((3, 3))\n", + "y = model(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "is equivalent to this function:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Create 3 layers\n", + "layer1 = layers.Dense(2, activation=\"relu\", name=\"layer1\")\n", + "layer2 = layers.Dense(3, activation=\"relu\", name=\"layer2\")\n", + "layer3 = layers.Dense(4, name=\"layer3\")\n", + "\n", + "# Call layers on a test input\n", + "x = tf.ones((3, 3))\n", + "y = layer3(layer2(layer1(x)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "A Sequential model is **not appropriate** when:\n", + "\n", + "- Your model has multiple inputs or multiple outputs\n", + "- Any of your layers has multiple inputs or multiple outputs\n", + "- You need to do layer sharing\n", + "- You want non-linear topology (e.g. a residual connection, a multi-branch\n", + "model)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Creating a Sequential model\n", + "\n", + "You can create a Sequential model by passing a list of layers to the Sequential\n", + "constructor:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential(\n", + " [\n", + " layers.Dense(2, activation=\"relu\"),\n", + " layers.Dense(3, activation=\"relu\"),\n", + " layers.Dense(4),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Its layers are accessible via the `layers` attribute:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.layers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "You can also create a Sequential model incrementally via the `add()` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential()\n", + "model.add(layers.Dense(2, activation=\"relu\"))\n", + "model.add(layers.Dense(3, activation=\"relu\"))\n", + "model.add(layers.Dense(4))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Note that there's also a corresponding `pop()` method to remove layers:\n", + "a Sequential model behaves very much like a list of layers." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.pop()\n", + "print(len(model.layers)) # 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Also note that the Sequential constructor accepts a `name` argument, just like\n", + "any layer or model in Keras. This is useful to annotate TensorBoard graphs\n", + "with semantically meaningful names." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential(name=\"my_sequential\")\n", + "model.add(layers.Dense(2, activation=\"relu\", name=\"layer1\"))\n", + "model.add(layers.Dense(3, activation=\"relu\", name=\"layer2\"))\n", + "model.add(layers.Dense(4, name=\"layer3\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Specifying the input shape in advance\n", + "\n", + "Generally, all layers in Keras need to know the shape of their inputs\n", + "in order to be able to create their weights. So when you create a layer like\n", + "this, initially, it has no weights:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "layer = layers.Dense(3)\n", + "layer.weights # Empty" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "It creates its weights the first time it is called on an input, since the shape\n", + "of the weights depends on the shape of the inputs:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Call layer on a test input\n", + "x = tf.ones((1, 4))\n", + "y = layer(x)\n", + "layer.weights # Now it has weights, of shape (4, 3) and (3,)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Naturally, this also applies to Sequential models. When you instantiate a\n", + "Sequential model without an input shape, it isn't \"built\": it has no weights\n", + "(and calling\n", + "`model.weights` results in an error stating just this). The weights are created\n", + "when the model first sees some input data:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential(\n", + " [\n", + " layers.Dense(2, activation=\"relu\"),\n", + " layers.Dense(3, activation=\"relu\"),\n", + " layers.Dense(4),\n", + " ]\n", + ") # No weights at this stage!\n", + "\n", + "# At this point, you can't do this:\n", + "# model.weights\n", + "\n", + "# You also can't do this:\n", + "# model.summary()\n", + "\n", + "# Call the model on a test input\n", + "x = tf.ones((1, 4))\n", + "y = model(x)\n", + "print(\"Number of weights after calling the model:\", len(model.weights)) # 6" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Once a model is \"built\", you can call its `summary()` method to display its\n", + "contents:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "However, it can be very useful when building a Sequential model incrementally\n", + "to be able to display the summary of the model so far, including the current\n", + "output shape. In this case, you should start your model by passing an `Input`\n", + "object to your model, so that it knows its input shape from the start:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential()\n", + "model.add(keras.Input(shape=(4,)))\n", + "model.add(layers.Dense(2, activation=\"relu\"))\n", + "\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Note that the `Input` object is not displayed as part of `model.layers`, since\n", + "it isn't a layer:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.layers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "A simple alternative is to just pass an `input_shape` argument to your first\n", + "layer:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential()\n", + "model.add(layers.Dense(2, activation=\"relu\", input_shape=(4,)))\n", + "\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Models built with a predefined input shape like this always have weights (even\n", + "before seeing any data) and always have a defined output shape.\n", + "\n", + "In general, it's a recommended best practice to always specify the input shape\n", + "of a Sequential model in advance if you know what it is." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## A common debugging workflow: `add()` + `summary()`\n", + "\n", + "When building a new Sequential architecture, it's useful to incrementally stack\n", + "layers with `add()` and frequently print model summaries. For instance, this\n", + "enables you to monitor how a stack of `Conv2D` and `MaxPooling2D` layers is\n", + "downsampling image feature maps:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = keras.Sequential()\n", + "model.add(keras.Input(shape=(250, 250, 3))) # 250x250 RGB images\n", + "model.add(layers.Conv2D(32, 5, strides=2, activation=\"relu\"))\n", + "model.add(layers.Conv2D(32, 3, activation=\"relu\"))\n", + "model.add(layers.MaxPooling2D(3))\n", + "\n", + "# Can you guess what the current output shape is at this point? Probably not.\n", + "# Let's just print it:\n", + "model.summary()\n", + "\n", + "# The answer was: (40, 40, 32), so we can keep downsampling...\n", + "\n", + "model.add(layers.Conv2D(32, 3, activation=\"relu\"))\n", + "model.add(layers.Conv2D(32, 3, activation=\"relu\"))\n", + "model.add(layers.MaxPooling2D(3))\n", + "model.add(layers.Conv2D(32, 3, activation=\"relu\"))\n", + "model.add(layers.Conv2D(32, 3, activation=\"relu\"))\n", + "model.add(layers.MaxPooling2D(2))\n", + "\n", + "# And now?\n", + "model.summary()\n", + "\n", + "# Now that we have 4x4 feature maps, time to apply global max pooling.\n", + "model.add(layers.GlobalMaxPooling2D())\n", + "\n", + "# Finally, we add a classification layer.\n", + "model.add(layers.Dense(10))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Very practical, right?\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## What to do once you have a model\n", + "\n", + "Once your model architecture is ready, you will want to:\n", + "\n", + "- Train your model, evaluate it, and run inference. See our\n", + "[guide to training & evaluation with the built-in loops](\n", + "https://www.tensorflow.org/guide/keras/train_and_evaluate/)\n", + "- Save your model to disk and restore it. See our\n", + "[guide to serialization & saving](https://www.tensorflow.org/guide/keras/save_and_serialize/).\n", + "- Speed up model training by leveraging multiple GPUs. See our\n", + "[guide to multi-GPU and distributed training](distributed_training)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Feature extraction with a Sequential model\n", + "\n", + "Once a Sequential model has been built, it behaves like a [Functional API\n", + "model](https://www.tensorflow.org/guide/keras/functional/). This means that every layer has an `input`\n", + "and `output` attribute. These attributes can be used to do neat things, like\n", + "quickly\n", + "creating a model that extracts the outputs of all intermediate layers in a\n", + "Sequential model:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "initial_model = keras.Sequential(\n", + " [\n", + " keras.Input(shape=(250, 250, 3)),\n", + " layers.Conv2D(32, 5, strides=2, activation=\"relu\"),\n", + " layers.Conv2D(32, 3, activation=\"relu\"),\n", + " layers.Conv2D(32, 3, activation=\"relu\"),\n", + " ]\n", + ")\n", + "feature_extractor = keras.Model(\n", + " inputs=initial_model.inputs,\n", + " outputs=[layer.output for layer in initial_model.layers],\n", + ")\n", + "\n", + "# Call feature extractor on test input.\n", + "x = tf.ones((1, 250, 250, 3))\n", + "features = feature_extractor(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here's a similar example that only extract features from one layer:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "initial_model = keras.Sequential(\n", + " [\n", + " keras.Input(shape=(250, 250, 3)),\n", + " layers.Conv2D(32, 5, strides=2, activation=\"relu\"),\n", + " layers.Conv2D(32, 3, activation=\"relu\", name=\"my_intermediate_layer\"),\n", + " layers.Conv2D(32, 3, activation=\"relu\"),\n", + " ]\n", + ")\n", + "feature_extractor = keras.Model(\n", + " inputs=initial_model.inputs,\n", + " outputs=initial_model.get_layer(name=\"my_intermediate_layer\").output,\n", + ")\n", + "# Call feature extractor on test input.\n", + "x = tf.ones((1, 250, 250, 3))\n", + "features = feature_extractor(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Transfer learning with a Sequential model\n", + "\n", + "Transfer learning consists of freezing the bottom layers in a model and only training\n", + "the top layers. If you aren't familiar with it, make sure to read our [guide\n", + "to transfer learning](https://www.tensorflow.org/guide/keras/transfer_learning/).\n", + "\n", + "Here are two common transfer learning blueprint involving Sequential models.\n", + "\n", + "First, let's say that you have a Sequential model, and you want to freeze all\n", + "layers except the last one. In this case, you would simply iterate over\n", + "`model.layers` and set `layer.trainable = False` on each layer, except the\n", + "last one. Like this:\n", + "\n", + "```python\n", + "model = keras.Sequential([\n", + " keras.Input(shape=(784))\n", + " layers.Dense(32, activation='relu'),\n", + " layers.Dense(32, activation='relu'),\n", + " layers.Dense(32, activation='relu'),\n", + " layers.Dense(10),\n", + "])\n", + "\n", + "# Presumably you would want to first load pre-trained weights.\n", + "model.load_weights(...)\n", + "\n", + "# Freeze all layers except the last one.\n", + "for layer in model.layers[:-1]:\n", + " layer.trainable = False\n", + "\n", + "# Recompile and train (this will only update the weights of the last layer).\n", + "model.compile(...)\n", + "model.fit(...)\n", + "```\n", + "\n", + "Another common blueprint is to use a Sequential model to stack a pre-trained\n", + "model and some freshly initialized classification layers. Like this:\n", + "\n", + "```python\n", + "# Load a convolutional base with pre-trained weights\n", + "base_model = keras.applications.Xception(\n", + " weights='imagenet',\n", + " include_top=False,\n", + " pooling='avg')\n", + "\n", + "# Freeze the base model\n", + "base_model.trainable = False\n", + "\n", + "# Use a Sequential model to add a trainable classifier on top\n", + "model = keras.Sequential([\n", + " base_model,\n", + " layers.Dense(1000),\n", + "])\n", + "\n", + "# Compile & train\n", + "model.compile(...)\n", + "model.fit(...)\n", + "```\n", + "\n", + "If you do transfer learning, you will probably find yourself frequently using\n", + "these two patterns." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "That's about all you need to know about Sequential models!\n", + "\n", + "To find out more about building models in Keras, see:\n", + "\n", + "- [Guide to the Functional API](https://www.tensorflow.org/guide/keras/functional/)\n", + "- [Guide to making new Layers & Models via subclassing](\n", + "https://www.tensorflow.org/guide/keras/custom_layers_and_models/)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "sequential_model", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/tf/train_and_evaluate.ipynb b/tf/train_and_evaluate.ipynb new file mode 100644 index 0000000000..931a661fec --- /dev/null +++ b/tf/train_and_evaluate.ipynb @@ -0,0 +1,1904 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "cellView": "form", + "colab": {}, + "colab_type": "code" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# Training & evaluation with the built-in methods" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "from tensorflow.keras import layers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Introduction\n", + "\n", + "This guide covers training, evaluation, and prediction (inference) models\n", + "when using built-in APIs for training & validation (such as `model.fit()`,\n", + "`model.evaluate()`, `model.predict()`).\n", + "\n", + "If you are interested in leveraging `fit()` while specifying your\n", + "own training step function, see the guide\n", + "[\"customizing what happens in `fit()`\"](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit/).\n", + "\n", + "If you are interested in writing your own training & evaluation loops from\n", + "scratch, see the guide\n", + "[\"writing a training loop from scratch\"](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch/).\n", + "\n", + "In general, whether you are using built-in loops or writing your own, model training &\n", + "evaluation works strictly in the same way across every kind of Keras model --\n", + "Sequential models, models built with the Functional API, and models written from\n", + "scratch via model subclassing.\n", + "\n", + "This guide doesn't cover distributed training. For distributed training, see\n", + "our [guide to multi-gpu & distributed training](/guides/distributed_training/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## API overview: a first end-to-end example\n", + "\n", + "When passing data to the built-in training loops of a model, you should either use\n", + "**NumPy arrays** (if your data is small and fits in memory) or **`tf.data Dataset`\n", + "objects**. In the next few paragraphs, we'll use the MNIST dataset as NumPy arrays, in\n", + "order to demonstrate how to use optimizers, losses, and metrics.\n", + "\n", + "Let's consider the following model (here, we build in with the Functional API, but it\n", + "could be a Sequential model or a subclassed model as well):" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x = layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + "x = layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + "outputs = layers.Dense(10, activation=\"softmax\", name=\"predictions\")(x)\n", + "\n", + "model = keras.Model(inputs=inputs, outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here's what the typical end-to-end workflow looks like, consisting of:\n", + "\n", + "- Training\n", + "- Validation on a holdout set generated from the original training data\n", + "- Evaluation on the test data\n", + "\n", + "We'll use MNIST data for this example." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", + "\n", + "# Preprocess the data (these are NumPy arrays)\n", + "x_train = x_train.reshape(60000, 784).astype(\"float32\") / 255\n", + "x_test = x_test.reshape(10000, 784).astype(\"float32\") / 255\n", + "\n", + "y_train = y_train.astype(\"float32\")\n", + "y_test = y_test.astype(\"float32\")\n", + "\n", + "# Reserve 10,000 samples for validation\n", + "x_val = x_train[-10000:]\n", + "y_val = y_train[-10000:]\n", + "x_train = x_train[:-10000]\n", + "y_train = y_train[:-10000]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "We specify the training configuration (optimizer, loss, metrics):" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(), # Optimizer\n", + " # Loss function to minimize\n", + " loss=keras.losses.SparseCategoricalCrossentropy(),\n", + " # List of metrics to monitor\n", + " metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "We call `fit()`, which will train the model by slicing the data into \"batches\" of size\n", + "\"batch_size\", and repeatedly iterating over the entire dataset for a given number of\n", + "\"epochs\"." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "print(\"Fit model on training data\")\n", + "history = model.fit(\n", + " x_train,\n", + " y_train,\n", + " batch_size=64,\n", + " epochs=2,\n", + " # We pass some validation for\n", + " # monitoring validation loss and metrics\n", + " # at the end of each epoch\n", + " validation_data=(x_val, y_val),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "The returned \"history\" object holds a record of the loss values and metric values\n", + "during training:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "history.history" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "We evaluate the model on the test data via `evaluate()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Evaluate the model on the test data using `evaluate`\n", + "print(\"Evaluate on test data\")\n", + "results = model.evaluate(x_test, y_test, batch_size=128)\n", + "print(\"test loss, test acc:\", results)\n", + "\n", + "# Generate predictions (probabilities -- the output of the last layer)\n", + "# on new data using `predict`\n", + "print(\"Generate predictions for 3 samples\")\n", + "predictions = model.predict(x_test[:3])\n", + "print(\"predictions shape:\", predictions.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Now, let's review each piece of this workflow in detail." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## The `compile()` method: specifying a loss, metrics, and an optimizer\n", + "\n", + "To train a model with `fit()`, you need to specify a loss function, an optimizer, and\n", + "optionally, some metrics to monitor.\n", + "\n", + "You pass these to the model as arguments to the `compile()` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),\n", + " loss=keras.losses.SparseCategoricalCrossentropy(),\n", + " metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "The `metrics` argument should be a list -- your model can have any number of metrics.\n", + "\n", + "If your model has multiple outputs, you can specify different losses and metrics for\n", + "each output, and you can modulate the contribution of each output to the total loss of\n", + "the model. You will find more details about this in the section **\"Passing data to\n", + "multi-input, multi-output models\"**.\n", + "\n", + "Note that if you're satisfied with the default settings, in many cases the optimizer,\n", + "loss, and metrics can be specified via string identifiers as a shortcut:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=\"rmsprop\",\n", + " loss=\"sparse_categorical_crossentropy\",\n", + " metrics=[\"sparse_categorical_accuracy\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "For later reuse, let's put our model definition and compile step in functions; we will\n", + "call them several times across different examples in this guide." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "def get_uncompiled_model():\n", + " inputs = keras.Input(shape=(784,), name=\"digits\")\n", + " x = layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + " x = layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + " outputs = layers.Dense(10, activation=\"softmax\", name=\"predictions\")(x)\n", + " model = keras.Model(inputs=inputs, outputs=outputs)\n", + " return model\n", + "\n", + "\n", + "def get_compiled_model():\n", + " model = get_uncompiled_model()\n", + " model.compile(\n", + " optimizer=\"rmsprop\",\n", + " loss=\"sparse_categorical_crossentropy\",\n", + " metrics=[\"sparse_categorical_accuracy\"],\n", + " )\n", + " return model\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Many built-in optimizers, losses, and metrics are available\n", + "\n", + "In general, you won't have to create from scratch your own losses, metrics, or\n", + "optimizers, because what you need is likely already part of the Keras API:\n", + "\n", + "Optimizers:\n", + "\n", + "- `SGD()` (with or without momentum)\n", + "- `RMSprop()`\n", + "- `Adam()`\n", + "- etc.\n", + "\n", + "Losses:\n", + "\n", + "- `MeanSquaredError()`\n", + "- `KLDivergence()`\n", + "- `CosineSimilarity()`\n", + "- etc.\n", + "\n", + "Metrics:\n", + "\n", + "- `AUC()`\n", + "- `Precision()`\n", + "- `Recall()`\n", + "- etc." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Custom losses\n", + "\n", + "There are two ways to provide custom losses with Keras. The first example creates a\n", + "function that accepts inputs `y_true` and `y_pred`. The following example shows a loss\n", + "function that computes the mean squared error between the real data and the\n", + "predictions:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "def custom_mean_squared_error(y_true, y_pred):\n", + " return tf.math.reduce_mean(tf.square(y_true - y_pred))\n", + "\n", + "\n", + "model = get_uncompiled_model()\n", + "model.compile(optimizer=keras.optimizers.Adam(), loss=custom_mean_squared_error)\n", + "\n", + "# We need to one-hot encode the labels to use MSE\n", + "y_train_one_hot = tf.one_hot(y_train, depth=10)\n", + "model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "If you need a loss function that takes in parameters beside `y_true` and `y_pred`, you\n", + "can subclass the `tf.keras.losses.Loss` class and implement the following two methods:\n", + "\n", + "- `__init__(self)`: accept parameters to pass during the call of your loss function\n", + "- `call(self, y_true, y_pred)`: use the targets (y_true) and the model predictions\n", + "(y_pred) to compute the model's loss\n", + "\n", + "Let's say you want to use mean squared error, but with an added term that\n", + "will de-incentivize prediction values far from 0.5 (we assume that the categorical\n", + "targets are one-hot encoded and take values between 0 and 1). This\n", + "creates an incentive for the model not to be too confident, which may help\n", + "reduce overfitting (we won't know if it works until we try!).\n", + "\n", + "Here's how you would do it:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CustomMSE(keras.losses.Loss):\n", + " def __init__(self, regularization_factor=0.1, name=\"custom_mse\"):\n", + " super().__init__(name=name)\n", + " self.regularization_factor = regularization_factor\n", + "\n", + " def call(self, y_true, y_pred):\n", + " mse = tf.math.reduce_mean(tf.square(y_true - y_pred))\n", + " reg = tf.math.reduce_mean(tf.square(0.5 - y_pred))\n", + " return mse + reg * self.regularization_factor\n", + "\n", + "\n", + "model = get_uncompiled_model()\n", + "model.compile(optimizer=keras.optimizers.Adam(), loss=CustomMSE())\n", + "\n", + "y_train_one_hot = tf.one_hot(y_train, depth=10)\n", + "model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Custom metrics\n", + "\n", + "If you need a metric that isn't part of the API, you can easily create custom metrics\n", + "by subclassing the `tf.keras.metrics.Metric` class. You will need to implement 4\n", + "methods:\n", + "\n", + "- `__init__(self)`, in which you will create state variables for your metric.\n", + "- `update_state(self, y_true, y_pred, sample_weight=None)`, which uses the targets\n", + "y_true and the model predictions y_pred to update the state variables.\n", + "- `result(self)`, which uses the state variables to compute the final results.\n", + "- `reset_states(self)`, which reinitializes the state of the metric.\n", + "\n", + "State update and results computation are kept separate (in `update_state()` and\n", + "`result()`, respectively) because in some cases, results computation might be very\n", + "expensive, and would only be done periodically.\n", + "\n", + "Here's a simple example showing how to implement a `CategoricalTruePositives` metric,\n", + "that counts how many samples were correctly classified as belonging to a given class:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class CategoricalTruePositives(keras.metrics.Metric):\n", + " def __init__(self, name=\"categorical_true_positives\", **kwargs):\n", + " super(CategoricalTruePositives, self).__init__(name=name, **kwargs)\n", + " self.true_positives = self.add_weight(name=\"ctp\", initializer=\"zeros\")\n", + "\n", + " def update_state(self, y_true, y_pred, sample_weight=None):\n", + " y_pred = tf.reshape(tf.argmax(y_pred, axis=1), shape=(-1, 1))\n", + " values = tf.cast(y_true, \"int32\") == tf.cast(y_pred, \"int32\")\n", + " values = tf.cast(values, \"float32\")\n", + " if sample_weight is not None:\n", + " sample_weight = tf.cast(sample_weight, \"float32\")\n", + " values = tf.multiply(values, sample_weight)\n", + " self.true_positives.assign_add(tf.reduce_sum(values))\n", + "\n", + " def result(self):\n", + " return self.true_positives\n", + "\n", + " def reset_states(self):\n", + " # The state of the metric will be reset at the start of each epoch.\n", + " self.true_positives.assign(0.0)\n", + "\n", + "\n", + "model = get_uncompiled_model()\n", + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),\n", + " loss=keras.losses.SparseCategoricalCrossentropy(),\n", + " metrics=[CategoricalTruePositives()],\n", + ")\n", + "model.fit(x_train, y_train, batch_size=64, epochs=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Handling losses and metrics that don't fit the standard signature\n", + "\n", + "The overwhelming majority of losses and metrics can be computed from `y_true` and\n", + "`y_pred`, where `y_pred` is an output of your model. But not all of them. For\n", + "instance, a regularization loss may only require the activation of a layer (there are\n", + "no targets in this case), and this activation may not be a model output.\n", + "\n", + "In such cases, you can call `self.add_loss(loss_value)` from inside the call method of\n", + "a custom layer. Losses added in this way get added to the \"main\" loss during training\n", + "(the one passed to `compile()`). Here's a simple example that adds activity\n", + "regularization (note that activity regularization is built-in in all Keras layers --\n", + "this layer is just for the sake of providing a concrete example):" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class ActivityRegularizationLayer(layers.Layer):\n", + " def call(self, inputs):\n", + " self.add_loss(tf.reduce_sum(inputs) * 0.1)\n", + " return inputs # Pass-through layer.\n", + "\n", + "\n", + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x = layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + "\n", + "# Insert activity regularization as a layer\n", + "x = ActivityRegularizationLayer()(x)\n", + "\n", + "x = layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + "outputs = layers.Dense(10, name=\"predictions\")(x)\n", + "\n", + "model = keras.Model(inputs=inputs, outputs=outputs)\n", + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),\n", + " loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + ")\n", + "\n", + "# The displayed loss will be much higher than before\n", + "# due to the regularization component.\n", + "model.fit(x_train, y_train, batch_size=64, epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "You can do the same for logging metric values, using `add_metric()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class MetricLoggingLayer(layers.Layer):\n", + " def call(self, inputs):\n", + " # The `aggregation` argument defines\n", + " # how to aggregate the per-batch values\n", + " # over each epoch:\n", + " # in this case we simply average them.\n", + " self.add_metric(\n", + " keras.backend.std(inputs), name=\"std_of_activation\", aggregation=\"mean\"\n", + " )\n", + " return inputs # Pass-through layer.\n", + "\n", + "\n", + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x = layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + "\n", + "# Insert std logging as a layer.\n", + "x = MetricLoggingLayer()(x)\n", + "\n", + "x = layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + "outputs = layers.Dense(10, name=\"predictions\")(x)\n", + "\n", + "model = keras.Model(inputs=inputs, outputs=outputs)\n", + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),\n", + " loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + ")\n", + "model.fit(x_train, y_train, batch_size=64, epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "In the [Functional API](https://www.tensorflow.org/guide/keras/functional/),\n", + "you can also call `model.add_loss(loss_tensor)`,\n", + "or `model.add_metric(metric_tensor, name, aggregation)`.\n", + "\n", + "Here's a simple example:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x1 = layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + "x2 = layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x1)\n", + "outputs = layers.Dense(10, name=\"predictions\")(x2)\n", + "model = keras.Model(inputs=inputs, outputs=outputs)\n", + "\n", + "model.add_loss(tf.reduce_sum(x1) * 0.1)\n", + "\n", + "model.add_metric(keras.backend.std(x1), name=\"std_of_activation\", aggregation=\"mean\")\n", + "\n", + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(1e-3),\n", + " loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + ")\n", + "model.fit(x_train, y_train, batch_size=64, epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Note that when you pass losses via `add_loss()`, it becomes possible to call\n", + "`compile()` without a loss function, since the model already has a loss to minimize.\n", + "\n", + "Consider the following `LogisticEndpoint` layer: it takes as inputs\n", + "targets & logits, and it tracks a crossentropy loss via `add_loss()`. It also\n", + "tracks classification accuracy via `add_metric()`." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class LogisticEndpoint(keras.layers.Layer):\n", + " def __init__(self, name=None):\n", + " super(LogisticEndpoint, self).__init__(name=name)\n", + " self.loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)\n", + " self.accuracy_fn = keras.metrics.BinaryAccuracy()\n", + "\n", + " def call(self, targets, logits, sample_weights=None):\n", + " # Compute the training-time loss value and add it\n", + " # to the layer using `self.add_loss()`.\n", + " loss = self.loss_fn(targets, logits, sample_weights)\n", + " self.add_loss(loss)\n", + "\n", + " # Log accuracy as a metric and add it\n", + " # to the layer using `self.add_metric()`.\n", + " acc = self.accuracy_fn(targets, logits, sample_weights)\n", + " self.add_metric(acc, name=\"accuracy\")\n", + "\n", + " # Return the inference-time prediction tensor (for `.predict()`).\n", + " return tf.nn.softmax(logits)\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "You can use it in a model with two inputs (input data & targets), compiled without a\n", + "`loss` argument, like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "inputs = keras.Input(shape=(3,), name=\"inputs\")\n", + "targets = keras.Input(shape=(10,), name=\"targets\")\n", + "logits = keras.layers.Dense(10)(inputs)\n", + "predictions = LogisticEndpoint(name=\"predictions\")(logits, targets)\n", + "\n", + "model = keras.Model(inputs=[inputs, targets], outputs=predictions)\n", + "model.compile(optimizer=\"adam\") # No loss argument!\n", + "\n", + "data = {\n", + " \"inputs\": np.random.random((3, 3)),\n", + " \"targets\": np.random.random((3, 10)),\n", + "}\n", + "model.fit(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "For more information about training multi-input models, see the section **Passing data\n", + "to multi-input, multi-output models**." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Automatically setting apart a validation holdout set\n", + "\n", + "In the first end-to-end example you saw, we used the `validation_data` argument to pass\n", + "a tuple of NumPy arrays `(x_val, y_val)` to the model for evaluating a validation loss\n", + "and validation metrics at the end of each epoch.\n", + "\n", + "Here's another option: the argument `validation_split` allows you to automatically\n", + "reserve part of your training data for validation. The argument value represents the\n", + "fraction of the data to be reserved for validation, so it should be set to a number\n", + "higher than 0 and lower than 1. For instance, `validation_split=0.2` means \"use 20% of\n", + "the data for validation\", and `validation_split=0.6` means \"use 60% of the data for\n", + "validation\".\n", + "\n", + "The way the validation is computed is by taking the last x% samples of the arrays\n", + "received by the fit call, before any shuffling.\n", + "\n", + "Note that you can only use `validation_split` when training with NumPy data." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = get_compiled_model()\n", + "model.fit(x_train, y_train, batch_size=64, validation_split=0.2, epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Training & evaluation from tf.data Datasets\n", + "\n", + "In the past few paragraphs, you've seen how to handle losses, metrics, and optimizers,\n", + "and you've seen how to use the `validation_data` and `validation_split` arguments in\n", + "fit, when your data is passed as NumPy arrays.\n", + "\n", + "Let's now take a look at the case where your data comes in the form of a\n", + "`tf.data.Dataset` object.\n", + "\n", + "The `tf.data` API is a set of utilities in TensorFlow 2.0 for loading and preprocessing\n", + "data in a way that's fast and scalable.\n", + "\n", + "For a complete guide about creating `Datasets`, see the\n", + "[tf.data documentation](https://www.tensorflow.org/guide/data).\n", + "\n", + "You can pass a `Dataset` instance directly to the methods `fit()`, `evaluate()`, and\n", + "`predict()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = get_compiled_model()\n", + "\n", + "# First, let's create a training Dataset instance.\n", + "# For the sake of our example, we'll use the same MNIST data as before.\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", + "# Shuffle and slice the dataset.\n", + "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", + "\n", + "# Now we get a test dataset.\n", + "test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))\n", + "test_dataset = test_dataset.batch(64)\n", + "\n", + "# Since the dataset already takes care of batching,\n", + "# we don't pass a `batch_size` argument.\n", + "model.fit(train_dataset, epochs=3)\n", + "\n", + "# You can also evaluate or predict on a dataset.\n", + "print(\"Evaluate\")\n", + "result = model.evaluate(test_dataset)\n", + "dict(zip(model.metrics_names, result))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Note that the Dataset is reset at the end of each epoch, so it can be reused of the\n", + "next epoch.\n", + "\n", + "If you want to run training only on a specific number of batches from this Dataset, you\n", + "can pass the `steps_per_epoch` argument, which specifies how many training steps the\n", + "model should run using this Dataset before moving on to the next epoch.\n", + "\n", + "If you do this, the dataset is not reset at the end of each epoch, instead we just keep\n", + "drawing the next batches. The dataset will eventually run out of data (unless it is an\n", + "infinitely-looping dataset)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = get_compiled_model()\n", + "\n", + "# Prepare the training dataset\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", + "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", + "\n", + "# Only use the 100 batches per epoch (that's 64 * 100 samples)\n", + "model.fit(train_dataset, epochs=3, steps_per_epoch=100)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Using a validation dataset\n", + "\n", + "You can pass a `Dataset` instance as the `validation_data` argument in `fit()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = get_compiled_model()\n", + "\n", + "# Prepare the training dataset\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", + "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", + "\n", + "# Prepare the validation dataset\n", + "val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))\n", + "val_dataset = val_dataset.batch(64)\n", + "\n", + "model.fit(train_dataset, epochs=1, validation_data=val_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "At the end of each epoch, the model will iterate over the validation dataset and\n", + "compute the validation loss and validation metrics.\n", + "\n", + "If you want to run validation only on a specific number of batches from this dataset,\n", + "you can pass the `validation_steps` argument, which specifies how many validation\n", + "steps the model should run with the validation dataset before interrupting validation\n", + "and moving on to the next epoch:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = get_compiled_model()\n", + "\n", + "# Prepare the training dataset\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", + "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", + "\n", + "# Prepare the validation dataset\n", + "val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))\n", + "val_dataset = val_dataset.batch(64)\n", + "\n", + "model.fit(\n", + " train_dataset,\n", + " epochs=1,\n", + " # Only run validation using the first 10 batches of the dataset\n", + " # using the `validation_steps` argument\n", + " validation_data=val_dataset,\n", + " validation_steps=10,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Note that the validation dataset will be reset after each use (so that you will always\n", + "be evaluating on the same samples from epoch to epoch).\n", + "\n", + "The argument `validation_split` (generating a holdout set from the training data) is\n", + "not supported when training from `Dataset` objects, since this features requires the\n", + "ability to index the samples of the datasets, which is not possible in general with\n", + "the `Dataset` API." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Other input formats supported\n", + "\n", + "Besides NumPy arrays, eager tensors, and TensorFlow `Datasets`, it's possible to train\n", + "a Keras model using Pandas dataframes, or from Python generators that yield batches of\n", + "data & labels.\n", + "\n", + "In particular, the `keras.utils.Sequence` class offers a simple interface to build\n", + "Python data generators that are multiprocessing-aware and can be shuffled.\n", + "\n", + "In general, we recommend that you use:\n", + "\n", + "- NumPy input data if your data is small and fits in memory\n", + "- `Dataset` objects if you have large datasets and you need to do distributed training\n", + "- `Sequence` objects if you have large datasets and you need to do a lot of custom\n", + "Python-side processing that cannot be done in TensorFlow (e.g. if you rely on external libraries\n", + "for data loading or preprocessing).\n", + "\n", + "\n", + "## Using a `keras.utils.Sequence` object as input\n", + "\n", + "`keras.utils.Sequence` is a utility that you can subclass to obtain a Python generator with\n", + "two important properties:\n", + "\n", + "- It works well with multiprocessing.\n", + "- It can be shuffled (e.g. when passing `shuffle=True` in `fit()`).\n", + "\n", + "A `Sequence` must implement two methods:\n", + "\n", + "- `__getitem__`\n", + "- `__len__`\n", + "\n", + "The method `__getitem__` should return a complete batch.\n", + "If you want to modify your dataset between epochs, you may implement `on_epoch_end`.\n", + "\n", + "Here's a quick example:\n", + "\n", + "```python\n", + "from skimage.io import imread\n", + "from skimage.transform import resize\n", + "import numpy as np\n", + "\n", + "# Here, `filenames` is list of path to the images\n", + "# and `labels` are the associated labels.\n", + "\n", + "class CIFAR10Sequence(Sequence):\n", + " def __init__(self, filenames, labels, batch_size):\n", + " self.filenames, self.labels = filenames, labels\n", + " self.batch_size = batch_size\n", + "\n", + " def __len__(self):\n", + " return int(np.ceil(len(self.filenames) / float(self.batch_size)))\n", + "\n", + " def __getitem__(self, idx):\n", + " batch_x = self.filenames[idx * self.batch_size:(idx + 1) * self.batch_size]\n", + " batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]\n", + " return np.array([\n", + " resize(imread(filename), (200, 200))\n", + " for filename in batch_x]), np.array(batch_y)\n", + "\n", + "sequence = CIFAR10Sequence(filenames, labels, batch_size)\n", + "model.fit(sequence, epochs=10)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Using sample weighting and class weighting\n", + "\n", + "With the default settings the weight of a sample is decided by its frequency\n", + "in the dataset. There are two methods to weight the data, independent of\n", + "sample frequency:\n", + "\n", + "* Class weights\n", + "* Sample weights" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Class weights\n", + "\n", + "This is set by passing a dictionary to the `class_weight` argument to\n", + "`Model.fit()`. This dictionary maps class indices to the weight that should\n", + "be used for samples belonging to this class.\n", + "\n", + "This can be used to balance classes without resampling, or to train a\n", + "model that has a gives more importance to a particular class.\n", + "\n", + "For instance, if class \"0\" is half as represented as class \"1\" in your data,\n", + "you could use `Model.fit(..., class_weight={0: 1., 1: 0.5})`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here's a NumPy example where we use class weights or sample weights to\n", + "give more importance to the correct classification of class #5 (which\n", + "is the digit \"5\" in the MNIST dataset)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "class_weight = {\n", + " 0: 1.0,\n", + " 1: 1.0,\n", + " 2: 1.0,\n", + " 3: 1.0,\n", + " 4: 1.0,\n", + " # Set weight \"2\" for class \"5\",\n", + " # making this class 2x more important\n", + " 5: 2.0,\n", + " 6: 1.0,\n", + " 7: 1.0,\n", + " 8: 1.0,\n", + " 9: 1.0,\n", + "}\n", + "\n", + "print(\"Fit with class weight\")\n", + "model = get_compiled_model()\n", + "model.fit(x_train, y_train, class_weight=class_weight, batch_size=64, epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Sample weights\n", + "\n", + "For fine grained control, or if you are not building a classifier,\n", + "you can use \"sample weights\".\n", + "\n", + "- When training from NumPy data: Pass the `sample_weight`\n", + " argument to `Model.fit()`.\n", + "- When training from `tf.data` or any other sort of iterator:\n", + " Yield `(input_batch, label_batch, sample_weight_batch)` tuples.\n", + "\n", + "A \"sample weights\" array is an array of numbers that specify how much weight\n", + "each sample in a batch should have in computing the total loss. It is commonly\n", + "used in imbalanced classification problems (the idea being to give more weight\n", + "to rarely-seen classes).\n", + "\n", + "When the weights used are ones and zeros, the array can be used as a *mask* for\n", + "the loss function (entirely discarding the contribution of certain samples to\n", + "the total loss)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "sample_weight = np.ones(shape=(len(y_train),))\n", + "sample_weight[y_train == 5] = 2.0\n", + "\n", + "print(\"Fit with sample weight\")\n", + "model = get_compiled_model()\n", + "model.fit(x_train, y_train, sample_weight=sample_weight, batch_size=64, epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here's a matching `Dataset` example:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "sample_weight = np.ones(shape=(len(y_train),))\n", + "sample_weight[y_train == 5] = 2.0\n", + "\n", + "# Create a Dataset that includes sample weights\n", + "# (3rd element in the return tuple).\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train, sample_weight))\n", + "\n", + "# Shuffle and slice the dataset.\n", + "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", + "\n", + "model = get_compiled_model()\n", + "model.fit(train_dataset, epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Passing data to multi-input, multi-output models\n", + "\n", + "In the previous examples, we were considering a model with a single input (a tensor of\n", + "shape `(764,)`) and a single output (a prediction tensor of shape `(10,)`). But what\n", + "about models that have multiple inputs or outputs?\n", + "\n", + "Consider the following model, which has an image input of shape `(32, 32, 3)` (that's\n", + "`(height, width, channels)`) and a timeseries input of shape `(None, 10)` (that's\n", + "`(timesteps, features)`). Our model will have two outputs computed from the\n", + "combination of these inputs: a \"score\" (of shape `(1,)`) and a probability\n", + "distribution over five classes (of shape `(5,)`)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "image_input = keras.Input(shape=(32, 32, 3), name=\"img_input\")\n", + "timeseries_input = keras.Input(shape=(None, 10), name=\"ts_input\")\n", + "\n", + "x1 = layers.Conv2D(3, 3)(image_input)\n", + "x1 = layers.GlobalMaxPooling2D()(x1)\n", + "\n", + "x2 = layers.Conv1D(3, 3)(timeseries_input)\n", + "x2 = layers.GlobalMaxPooling1D()(x2)\n", + "\n", + "x = layers.concatenate([x1, x2])\n", + "\n", + "score_output = layers.Dense(1, name=\"score_output\")(x)\n", + "class_output = layers.Dense(5, activation=\"softmax\", name=\"class_output\")(x)\n", + "\n", + "model = keras.Model(\n", + " inputs=[image_input, timeseries_input], outputs=[score_output, class_output]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's plot this model, so you can clearly see what we're doing here (note that the\n", + "shapes shown in the plot are batch shapes, rather than per-sample shapes)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "keras.utils.plot_model(model, \"multi_input_and_output_model.png\", show_shapes=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "At compilation time, we can specify different losses to different outputs, by passing\n", + "the loss functions as a list:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(1e-3),\n", + " loss=[keras.losses.MeanSquaredError(), keras.losses.CategoricalCrossentropy()],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "If we only passed a single loss function to the model, the same loss function would be\n", + "applied to every output (which is not appropriate here).\n", + "\n", + "Likewise for metrics:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(1e-3),\n", + " loss=[keras.losses.MeanSquaredError(), keras.losses.CategoricalCrossentropy()],\n", + " metrics=[\n", + " [\n", + " keras.metrics.MeanAbsolutePercentageError(),\n", + " keras.metrics.MeanAbsoluteError(),\n", + " ],\n", + " [keras.metrics.CategoricalAccuracy()],\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Since we gave names to our output layers, we could also specify per-output losses and\n", + "metrics via a dict:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(1e-3),\n", + " loss={\n", + " \"score_output\": keras.losses.MeanSquaredError(),\n", + " \"class_output\": keras.losses.CategoricalCrossentropy(),\n", + " },\n", + " metrics={\n", + " \"score_output\": [\n", + " keras.metrics.MeanAbsolutePercentageError(),\n", + " keras.metrics.MeanAbsoluteError(),\n", + " ],\n", + " \"class_output\": [keras.metrics.CategoricalAccuracy()],\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "We recommend the use of explicit names and dicts if you have more than 2 outputs.\n", + "\n", + "It's possible to give different weights to different output-specific losses (for\n", + "instance, one might wish to privilege the \"score\" loss in our example, by giving to 2x\n", + "the importance of the class loss), using the `loss_weights` argument:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(1e-3),\n", + " loss={\n", + " \"score_output\": keras.losses.MeanSquaredError(),\n", + " \"class_output\": keras.losses.CategoricalCrossentropy(),\n", + " },\n", + " metrics={\n", + " \"score_output\": [\n", + " keras.metrics.MeanAbsolutePercentageError(),\n", + " keras.metrics.MeanAbsoluteError(),\n", + " ],\n", + " \"class_output\": [keras.metrics.CategoricalAccuracy()],\n", + " },\n", + " loss_weights={\"score_output\": 2.0, \"class_output\": 1.0},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "You could also chose not to compute a loss for certain outputs, if these outputs meant\n", + "for prediction but not for training:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# List loss version\n", + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(1e-3),\n", + " loss=[None, keras.losses.CategoricalCrossentropy()],\n", + ")\n", + "\n", + "# Or dict loss version\n", + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(1e-3),\n", + " loss={\"class_output\": keras.losses.CategoricalCrossentropy()},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Passing data to a multi-input or multi-output model in fit works in a similar way as\n", + "specifying a loss function in compile: you can pass **lists of NumPy arrays** (with\n", + "1:1 mapping to the outputs that received a loss function) or **dicts mapping output\n", + "names to NumPy arrays**." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=keras.optimizers.RMSprop(1e-3),\n", + " loss=[keras.losses.MeanSquaredError(), keras.losses.CategoricalCrossentropy()],\n", + ")\n", + "\n", + "# Generate dummy NumPy data\n", + "img_data = np.random.random_sample(size=(100, 32, 32, 3))\n", + "ts_data = np.random.random_sample(size=(100, 20, 10))\n", + "score_targets = np.random.random_sample(size=(100, 1))\n", + "class_targets = np.random.random_sample(size=(100, 5))\n", + "\n", + "# Fit on lists\n", + "model.fit([img_data, ts_data], [score_targets, class_targets], batch_size=32, epochs=1)\n", + "\n", + "# Alternatively, fit on dicts\n", + "model.fit(\n", + " {\"img_input\": img_data, \"ts_input\": ts_data},\n", + " {\"score_output\": score_targets, \"class_output\": class_targets},\n", + " batch_size=32,\n", + " epochs=1,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here's the `Dataset` use case: similarly as what we did for NumPy arrays, the `Dataset`\n", + "should return a tuple of dicts." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "train_dataset = tf.data.Dataset.from_tensor_slices(\n", + " (\n", + " {\"img_input\": img_data, \"ts_input\": ts_data},\n", + " {\"score_output\": score_targets, \"class_output\": class_targets},\n", + " )\n", + ")\n", + "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)\n", + "\n", + "model.fit(train_dataset, epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Using callbacks\n", + "\n", + "Callbacks in Keras are objects that are called at different point during training (at\n", + "the start of an epoch, at the end of a batch, at the end of an epoch, etc.) and which\n", + "can be used to implement behaviors such as:\n", + "\n", + "- Doing validation at different points during training (beyond the built-in per-epoch\n", + "validation)\n", + "- Checkpointing the model at regular intervals or when it exceeds a certain accuracy\n", + "threshold\n", + "- Changing the learning rate of the model when training seems to be plateauing\n", + "- Doing fine-tuning of the top layers when training seems to be plateauing\n", + "- Sending email or instant message notifications when training ends or where a certain\n", + "performance threshold is exceeded\n", + "- Etc.\n", + "\n", + "Callbacks can be passed as a list to your call to `fit()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = get_compiled_model()\n", + "\n", + "callbacks = [\n", + " keras.callbacks.EarlyStopping(\n", + " # Stop training when `val_loss` is no longer improving\n", + " monitor=\"val_loss\",\n", + " # \"no longer improving\" being defined as \"no better than 1e-2 less\"\n", + " min_delta=1e-2,\n", + " # \"no longer improving\" being further defined as \"for at least 2 epochs\"\n", + " patience=2,\n", + " verbose=1,\n", + " )\n", + "]\n", + "model.fit(\n", + " x_train,\n", + " y_train,\n", + " epochs=20,\n", + " batch_size=64,\n", + " callbacks=callbacks,\n", + " validation_split=0.2,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Many built-in callbacks are available\n", + "\n", + "- `ModelCheckpoint`: Periodically save the model.\n", + "- `EarlyStopping`: Stop training when training is no longer improving the validation\n", + "metrics.\n", + "- `TensorBoard`: periodically write model logs that can be visualized in\n", + "[TensorBoard](https://www.tensorflow.org/tensorboard) (more details in the section\n", + "\"Visualization\").\n", + "- `CSVLogger`: streams loss and metrics data to a CSV file.\n", + "- etc.\n", + "\n", + "See the [callbacks documentation](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/) for the complete list.\n", + "\n", + "### Writing your own callback\n", + "\n", + "You can create a custom callback by extending the base class\n", + "`keras.callbacks.Callback`. A callback has access to its associated model through the\n", + "class property `self.model`.\n", + "\n", + "Make sure to read the\n", + "[complete guide to writing custom callbacks](https://www.tensorflow.org/guide/keras/custom_callback/).\n", + "\n", + "Here's a simple example saving a list of per-batch loss values during training:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class LossHistory(keras.callbacks.Callback):\n", + " def on_train_begin(self, logs):\n", + " self.per_batch_losses = []\n", + "\n", + " def on_batch_end(self, batch, logs):\n", + " self.per_batch_losses.append(logs.get(\"loss\"))\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Checkpointing models\n", + "\n", + "When you're training model on relatively large datasets, it's crucial to save\n", + "checkpoints of your model at frequent intervals.\n", + "\n", + "The easiest way to achieve this is with the `ModelCheckpoint` callback:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model = get_compiled_model()\n", + "\n", + "callbacks = [\n", + " keras.callbacks.ModelCheckpoint(\n", + " # Path where to save the model\n", + " # The two parameters below mean that we will overwrite\n", + " # the current checkpoint if and only if\n", + " # the `val_loss` score has improved.\n", + " # The saved model name will include the current epoch.\n", + " filepath=\"mymodel_{epoch}\",\n", + " save_best_only=True, # Only save a model if `val_loss` has improved.\n", + " monitor=\"val_loss\",\n", + " verbose=1,\n", + " )\n", + "]\n", + "model.fit(\n", + " x_train, y_train, epochs=2, batch_size=64, callbacks=callbacks, validation_split=0.2\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "The `ModelCheckpoint` callback can be used to implement fault-tolerance:\n", + "the ability to restart training from the last saved state of the model in case training\n", + "gets randomly interrupted. Here's a basic example:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Prepare a directory to store all the checkpoints.\n", + "checkpoint_dir = \"./ckpt\"\n", + "if not os.path.exists(checkpoint_dir):\n", + " os.makedirs(checkpoint_dir)\n", + "\n", + "\n", + "def make_or_restore_model():\n", + " # Either restore the latest model, or create a fresh one\n", + " # if there is no checkpoint available.\n", + " checkpoints = [checkpoint_dir + \"/\" + name for name in os.listdir(checkpoint_dir)]\n", + " if checkpoints:\n", + " latest_checkpoint = max(checkpoints, key=os.path.getctime)\n", + " print(\"Restoring from\", latest_checkpoint)\n", + " return keras.models.load_model(latest_checkpoint)\n", + " print(\"Creating a new model\")\n", + " return get_compiled_model()\n", + "\n", + "\n", + "model = make_or_restore_model()\n", + "callbacks = [\n", + " # This callback saves a SavedModel every 100 batches.\n", + " # We include the training loss in the saved model name.\n", + " keras.callbacks.ModelCheckpoint(\n", + " filepath=checkpoint_dir + \"/ckpt-loss={loss:.2f}\", save_freq=100\n", + " )\n", + "]\n", + "model.fit(x_train, y_train, epochs=1, callbacks=callbacks)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "You call also write your own callback for saving and restoring models.\n", + "\n", + "For a complete guide on serialization and saving, see the\n", + "[guide to saving and serializing Models](https://www.tensorflow.org/guide/keras/save_and_serialize/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Using learning rate schedules\n", + "\n", + "A common pattern when training deep learning models is to gradually reduce the learning\n", + "as training progresses. This is generally known as \"learning rate decay\".\n", + "\n", + "The learning decay schedule could be static (fixed in advance, as a function of the\n", + "current epoch or the current batch index), or dynamic (responding to the current\n", + "behavior of the model, in particular the validation loss).\n", + "\n", + "### Passing a schedule to an optimizer\n", + "\n", + "You can easily use a static learning rate decay schedule by passing a schedule object\n", + "as the `learning_rate` argument in your optimizer:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "initial_learning_rate = 0.1\n", + "lr_schedule = keras.optimizers.schedules.ExponentialDecay(\n", + " initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True\n", + ")\n", + "\n", + "optimizer = keras.optimizers.RMSprop(learning_rate=lr_schedule)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Several built-in schedules are available: `ExponentialDecay`, `PiecewiseConstantDecay`,\n", + "`PolynomialDecay`, and `InverseTimeDecay`.\n", + "\n", + "### Using callbacks to implement a dynamic learning rate schedule\n", + "\n", + "A dynamic learning rate schedule (for instance, decreasing the learning rate when the\n", + "validation loss is no longer improving) cannot be achieved with these schedule objects\n", + "since the optimizer does not have access to validation metrics.\n", + "\n", + "However, callbacks do have access to all metrics, including validation metrics! You can\n", + "thus achieve this pattern by using a callback that modifies the current learning rate\n", + "on the optimizer. In fact, this is even built-in as the `ReduceLROnPlateau` callback." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Visualizing loss and metrics during training\n", + "\n", + "The best way to keep an eye on your model during training is to use\n", + "[TensorBoard](https://www.tensorflow.org/tensorboard), a browser-based application\n", + "that you can run locally that provides you with:\n", + "\n", + "- Live plots of the loss and metrics for training and evaluation\n", + "- (optionally) Visualizations of the histograms of your layer activations\n", + "- (optionally) 3D visualizations of the embedding spaces learned by your `Embedding`\n", + "layers\n", + "\n", + "If you have installed TensorFlow with pip, you should be able to launch TensorBoard\n", + "from the command line:\n", + "\n", + "```\n", + "tensorboard --logdir=/full_path_to_your_logs\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Using the TensorBoard callback\n", + "\n", + "The easiest way to use TensorBoard with a Keras model and the fit method is the\n", + "`TensorBoard` callback.\n", + "\n", + "In the simplest case, just specify where you want the callback to write logs, and\n", + "you're good to go:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "keras.callbacks.TensorBoard(\n", + " log_dir=\"/full_path_to_your_logs\",\n", + " histogram_freq=0, # How often to log histogram visualizations\n", + " embeddings_freq=0, # How often to log embedding visualizations\n", + " update_freq=\"epoch\",\n", + ") # How often to write logs (default: once per epoch)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "For more information, see the\n", + "[documentation for the `TensorBoard` callback](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/tensorboard/)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "train_and_evaluate", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/tf/transfer_learning.ipynb b/tf/transfer_learning.ipynb new file mode 100644 index 0000000000..d8a4e10a42 --- /dev/null +++ b/tf/transfer_learning.ipynb @@ -0,0 +1,926 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "cellView": "form", + "colab": {}, + "colab_type": "code" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# Transfer learning & fine-tuning" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Introduction\n", + "\n", + "**Transfer learning** consists of taking features learned on one problem, and\n", + "leveraging them on a new, similar problem. For instance, features from a model that has\n", + "learned to identify racoons may be useful to kick-start a model meant to identify\n", + " tanukis.\n", + "\n", + "Transfer learning is usually done for tasks where your dataset has too little data to\n", + " train a full-scale model from scratch.\n", + "\n", + "The most common incarnation of transfer learning in the context of deep learning is the\n", + " following worfklow:\n", + "\n", + "1. Take layers from a previously trained model.\n", + "2. Freeze them, so as to avoid destroying any of the information they contain during\n", + " future training rounds.\n", + "3. Add some new, trainable layers on top of the frozen layers. They will learn to turn\n", + " the old features into predictions on a new dataset.\n", + "4. Train the new layers on your dataset.\n", + "\n", + "A last, optional step, is **fine-tuning**, which consists of unfreezing the entire\n", + "model you obtained above (or part of it), and re-training it on the new data with a\n", + "very low learning rate. This can potentially achieve meaningful improvements, by\n", + " incrementally adapting the pretrained features to the new data.\n", + "\n", + "First, we will go over the Keras `trainable` API in detail, which underlies most\n", + " transfer learning & fine-tuning workflows.\n", + "\n", + "Then, we'll demonstrate the typical workflow by taking a model pretrained on the\n", + "ImageNet dataset, and retraining it on the Kaggle \"cats vs dogs\" classification\n", + " dataset.\n", + "\n", + "This is adapted from\n", + "[Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python)\n", + " and the 2016 blog post\n", + "[\"building powerful image classification models using very little\n", + " data\"](https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Freezing layers: understanding the `trainable` attribute\n", + "\n", + "Layers & models have three weight attributes:\n", + "\n", + "- `weights` is the list of all weights variables of the layer.\n", + "- `trainable_weights` is the list of those that are meant to be updated (via gradient\n", + " descent) to minimize the loss during training.\n", + "- `non_trainable_weights` is the list of those that aren't meant to be trained.\n", + " Typically they are updated by the model during the forward pass.\n", + "\n", + "**Example: the `Dense` layer has 2 trainable weights (kernel & bias)**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "layer = keras.layers.Dense(3)\n", + "layer.build((None, 4)) # Create the weights\n", + "\n", + "print(\"weights:\", len(layer.weights))\n", + "print(\"trainable_weights:\", len(layer.trainable_weights))\n", + "print(\"non_trainable_weights:\", len(layer.non_trainable_weights))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "In general, all weights are trainable weights. The only built-in layer that has\n", + "non-trainable weights is the `BatchNormalization` layer. It uses non-trainable weights\n", + " to keep track of the mean and variance of its inputs during training.\n", + "To learn how to use non-trainable weights in your own custom layers, see the\n", + "[guide to writing new layers from scratch](making_new_layers_and_models_via_subclassing).\n", + "\n", + "**Example: the `BatchNormalization` layer has 2 trainable weights and 2 non-trainable\n", + " weights**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "layer = keras.layers.BatchNormalization()\n", + "layer.build((None, 4)) # Create the weights\n", + "\n", + "print(\"weights:\", len(layer.weights))\n", + "print(\"trainable_weights:\", len(layer.trainable_weights))\n", + "print(\"non_trainable_weights:\", len(layer.non_trainable_weights))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Layers & models also feature a boolean attribute `trainable`. Its value can be changed.\n", + "Setting `layer.trainable` to `False` moves all the layer's weights from trainable to\n", + "non-trainable. This is called \"freezing\" the layer: the state of a frozen layer won't\n", + "be updated during training (either when training with `fit()` or when training with\n", + " any custom loop that relies on `trainable_weights` to apply gradient updates).\n", + "\n", + "**Example: setting `trainable` to `False`**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "layer = keras.layers.Dense(3)\n", + "layer.build((None, 4)) # Create the weights\n", + "layer.trainable = False # Freeze the layer\n", + "\n", + "print(\"weights:\", len(layer.weights))\n", + "print(\"trainable_weights:\", len(layer.trainable_weights))\n", + "print(\"non_trainable_weights:\", len(layer.non_trainable_weights))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "When a trainable weight becomes non-trainable, its value is no longer updated during\n", + " training.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Make a model with 2 layers\n", + "layer1 = keras.layers.Dense(3, activation=\"relu\")\n", + "layer2 = keras.layers.Dense(3, activation=\"sigmoid\")\n", + "model = keras.Sequential([keras.Input(shape=(3,)), layer1, layer2])\n", + "\n", + "# Freeze the first layer\n", + "layer1.trainable = False\n", + "\n", + "# Keep a copy of the weights of layer1 for later reference\n", + "initial_layer1_weights_values = layer1.get_weights()\n", + "\n", + "# Train the model\n", + "model.compile(optimizer=\"adam\", loss=\"mse\")\n", + "model.fit(np.random.random((2, 3)), np.random.random((2, 3)))\n", + "\n", + "# Check that the weights of layer1 have not changed during training\n", + "final_layer1_weights_values = layer1.get_weights()\n", + "np.testing.assert_allclose(\n", + " initial_layer1_weights_values[0], final_layer1_weights_values[0]\n", + ")\n", + "np.testing.assert_allclose(\n", + " initial_layer1_weights_values[1], final_layer1_weights_values[1]\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Do not confuse the `layer.trainable` attribute with the argument `training` in\n", + "`layer.__call__()` (which controls whether the layer should run its forward pass in\n", + " inference mode or training mode). For more information, see the\n", + "[Keras FAQ](\n", + " https://keras.io/getting_started/faq/#whats-the-difference-between-the-training-argument-in-call-and-the-trainable-attribute).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Recursive setting of the `trainable` attribute\n", + "\n", + "If you set `trainable = False` on a model or on any layer that has sublayers,\n", + "all children layers become non-trainable as well.\n", + "\n", + "**Example:**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inner_model = keras.Sequential(\n", + " [\n", + " keras.Input(shape=(3,)),\n", + " keras.layers.Dense(3, activation=\"relu\"),\n", + " keras.layers.Dense(3, activation=\"relu\"),\n", + " ]\n", + ")\n", + "\n", + "model = keras.Sequential(\n", + " [keras.Input(shape=(3,)), inner_model, keras.layers.Dense(3, activation=\"sigmoid\"),]\n", + ")\n", + "\n", + "model.trainable = False # Freeze the outer model\n", + "\n", + "assert inner_model.trainable == False # All layers in `model` are now frozen\n", + "assert inner_model.layers[0].trainable == False # `trainable` is propagated recursively\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## The typical transfer-learning workflow\n", + "\n", + "This leads us to how a typical transfer learning workflow can be implemented in Keras:\n", + "\n", + "1. Instantiate a base model and load pre-trained weights into it.\n", + "2. Freeze all layers in the base model by setting `trainable = False`.\n", + "3. Create a new model on top of the output of one (or several) layers from the base\n", + " model.\n", + "4. Train your new model on your new dataset.\n", + "\n", + "Note that an alternative, more lightweight workflow could also be:\n", + "\n", + "1. Instantiate a base model and load pre-trained weights into it.\n", + "2. Run your new dataset through it and record the output of one (or several) layers\n", + " from the base model. This is called **feature extraction**.\n", + "3. Use that output as input data for a new, smaller model.\n", + "\n", + "A key advantage of that second workflow is that you only run the base model once one\n", + " your data, rather than once per epoch of training. So it's a lot faster & cheaper.\n", + "\n", + "An issue with that second workflow, though, is that it doesn't allow you to dynamically\n", + "modify the input data of your new model during training, which is required when doing\n", + "data augmentation, for instance. Transfer learning is typically used for tasks when\n", + "your new dataset has too little data to train a full-scale model from scratch, and in\n", + "such scenarios data augmentation is very important. So in what follows, we will focus\n", + " on the first workflow.\n", + "\n", + "Here's what the first workflow looks like in Keras:\n", + "\n", + "First, instantiate a base model with pre-trained weigts.\n", + "\n", + "```python\n", + "base_model = keras.applications.Xception(\n", + " weights='imagenet', # Load weights pre-trained on ImageNet.\n", + " input_shape=(150, 150, 3),\n", + " include_top=False) # Do not include the ImageNet classifier at the top.\n", + "```\n", + "\n", + "Then, freeze the base model.\n", + "\n", + "```python\n", + "base_model.trainable = False\n", + "```\n", + "\n", + "Create a new model on top.\n", + "\n", + "```python\n", + "inputs = keras.Input(shape=(150, 150, 3))\n", + "# We make sure that the base_model is running in inference mode here,\n", + "# by passing `training=False`. This is important for fine-tuning, as you will\n", + "# learn in a few paragraphs.\n", + "x = base_model(inputs, training=False)\n", + "# Convert features of shape `base_model.output_shape[1:]` to vectors\n", + "x = keras.layers.GlobalAveragePooling2D()(x)\n", + "# A Dense classifier with a single unit (binary classification)\n", + "outputs = keras.layers.Dense(1)(x)\n", + "model = keras.Model(inputs, outputs)\n", + "```\n", + "\n", + "Train the model on new data.\n", + "\n", + "```python\n", + "model.compile(optimizer=keras.optimizers.Adam(),\n", + " loss=keras.losses.BinaryCrossentropy(from_logits=True),\n", + " metrics=[keras.metrics.BinaryAccuracy()])\n", + "model.fit(new_dataset, epochs=20, callbacks=..., validation_data=...)\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Fine-tuning\n", + "\n", + "Once your model has converged on the new data, you can try to unfreeze all or part of\n", + " the base model and retrain the whole model end-to-end with a very low learning rate.\n", + "\n", + "This is an optional last step that can potentially give you incremental improvements.\n", + " It could also potentially lead to quick overfitting -- keep that in mind.\n", + "\n", + "It is critical to only do this step *after* the model with frozen layers has been\n", + "trained to convergence. If you mix randomly-initialized trainable layers with\n", + "trainable layers that hold pre-trained features, the randomly-initialized layers will\n", + "cause very large gradient updates during training, which will destroy your pre-trained\n", + " features.\n", + "\n", + "It's also critical to use a very low learning rate at this stage, because\n", + "you are training a much larger model than in the first round of training, on a dataset\n", + " that is typically very small.\n", + "As a result, you are at risk of overfitting very quickly if you apply large weight\n", + " updates. Here, you only want to readapt the pretrained weights in an incremental way.\n", + "\n", + "This is how to implement fine-tuning of the whole base model:\n", + "\n", + "```python\n", + "# Unfreeze the base model\n", + "base_model.trainable = True\n", + "\n", + "# It's important to recompile your model after you make any changes\n", + "# to the `trainable` attribute of any inner layer, so that your changes\n", + "# are take into account\n", + "model.compile(optimizer=keras.optimizers.Adam(1e-5), # Very low learning rate\n", + " loss=keras.losses.BinaryCrossentropy(from_logits=True),\n", + " metrics=[keras.metrics.BinaryAccuracy()])\n", + "\n", + "# Train end-to-end. Be careful to stop before you overfit!\n", + "model.fit(new_dataset, epochs=10, callbacks=..., validation_data=...)\n", + "```\n", + "\n", + "**Important note about `compile()` and `trainable`**\n", + "\n", + "Calling `compile()` on a model is meant to \"freeze\" the behavior of that model. This\n", + " implies that the `trainable`\n", + "attribute values at the time the model is compiled should be preserved throughout the\n", + " lifetime of that model,\n", + "until `compile` is called again. Hence, if you change any `trainable` value, make sure\n", + " to call `compile()` again on your\n", + "model for your changes to be taken into account.\n", + "\n", + "**Important notes about `BatchNormalization` layer**\n", + "\n", + "Many image models contain `BatchNormalization` layers. That layer is a special case on\n", + " every imaginable count. Here are a few things to keep in mind.\n", + "\n", + "- `BatchNormalization` contains 2 non-trainable weights that get updated during\n", + "training. These are the variables tracking the mean and variance of the inputs.\n", + "- When you set `bn_layer.trainable = False`, the `BatchNormalization` layer will\n", + "run in inference mode, and will not update its mean & variance statistics. This is not\n", + "the case for other layers in general, as\n", + "[weight trainability & inference/training modes are two orthogonal concepts](\n", + " https://keras.io/getting_started/faq/#whats-the-difference-between-the-training-argument-in-call-and-the-trainable-attribute).\n", + "But the two are tied in the case of the `BatchNormalization` layer.\n", + "- When you unfreeze a model that contains `BatchNormalization` layers in order to do\n", + "fine-tuning, you should keep the `BatchNormalization` layers in inference mode by\n", + " passing `training=False` when calling the base model.\n", + "Otherwise the updates applied to the non-trainable weights will suddenly destroy\n", + "what the model the model has learned.\n", + "\n", + "You'll see this pattern in action in the end-to-end example at the end of this guide.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Transfer learning & fine-tuning with a custom training loop\n", + "\n", + "If instead of `fit()`, you are using your own low-level training loop, the workflow\n", + "stays essentially the same. You should be careful to only take into account the list\n", + " `model.trainable_weights` when applying gradient updates:\n", + "\n", + "```python\n", + "# Create base model\n", + "base_model = keras.applications.Xception(\n", + " weights='imagenet',\n", + " input_shape=(150, 150, 3),\n", + " include_top=False)\n", + "# Freeze base model\n", + "base_model.trainable = False\n", + "\n", + "# Create new model on top.\n", + "inputs = keras.Input(shape=(150, 150, 3))\n", + "x = base_model(inputs, training=False)\n", + "x = keras.layers.GlobalAveragePooling2D()(x)\n", + "outputs = keras.layers.Dense(1)(x)\n", + "model = keras.Model(inputs, outputs)\n", + "\n", + "loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)\n", + "optimizer = keras.optimizers.Adam()\n", + "\n", + "# Iterate over the batches of a dataset.\n", + "for inputs, targets in new_dataset:\n", + " # Open a GradientTape.\n", + " with tf.GradientTape() as tape:\n", + " # Forward pass.\n", + " predictions = model(inputs)\n", + " # Compute the loss value for this batch.\n", + " loss_value = loss_fn(targets, predictions)\n", + "\n", + " # Get gradients of loss wrt the *trainable* weights.\n", + " gradients = tape.gradient(loss_value, model.trainable_weights)\n", + " # Update the weights of the model.\n", + " optimizer.apply_gradients(zip(gradients, model.trainable_weights))\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Likewise for fine-tuning.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## An end-to-end example: fine-tuning an image classification model on a cats vs. dogs\n", + " dataset\n", + "\n", + "To solidify these concepts, let's walk you through a concrete end-to-end transfer\n", + "learning & fine-tuning example. We will load the Xception model, pre-trained on\n", + " ImageNet, and use it on the Kaggle \"cats vs. dogs\" classification dataset.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Getting the data\n", + "\n", + "First, let's fetch the cats vs. dogs dataset using TFDS. If you have your own dataset,\n", + "you'll probably want to use the utility\n", + "`tf.keras.preprocessing.image_dataset_from_directory` to generate similar labeled\n", + " dataset objects from a set of images on disk filed into class-specific folders.\n", + "\n", + "Tansfer learning is most useful when working with very small datases. To keep our\n", + "dataset small, we will use 40% of the original training data (25,000 images) for\n", + " training, 10% for validation, and 10% for testing.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import tensorflow_datasets as tfds\n", + "\n", + "tfds.disable_progress_bar()\n", + "\n", + "train_ds, validation_ds, test_ds = tfds.load(\n", + " \"cats_vs_dogs\",\n", + " # Reserve 10% for validation and 10% for test\n", + " split=[\"train[:40%]\", \"train[40%:50%]\", \"train[50%:60%]\"],\n", + " as_supervised=True, # Include labels\n", + ")\n", + "\n", + "print(\"Number of training samples: %d\" % tf.data.experimental.cardinality(train_ds))\n", + "print(\n", + " \"Number of validation samples: %d\" % tf.data.experimental.cardinality(validation_ds)\n", + ")\n", + "print(\"Number of test samples: %d\" % tf.data.experimental.cardinality(test_ds))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "These are the first 9 images in the training dataset -- as you can see, they're all\n", + " different sizes.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.figure(figsize=(10, 10))\n", + "for i, (image, label) in enumerate(train_ds.take(9)):\n", + " ax = plt.subplot(3, 3, i + 1)\n", + " plt.imshow(image)\n", + " plt.title(int(label))\n", + " plt.axis(\"off\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "We can also see that label 1 is \"dog\" and label 0 is \"cat\".\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Standardizing the data\n", + "\n", + "Our raw images have a variety of sizes. In addition, each pixel consists of 3 integer\n", + "values between 0 and 255 (RGB level values). This isn't a great fit for feeding a\n", + " neural network. We need to do 2 things:\n", + "\n", + "- Standardize to a fixed image size. We pick 150x150.\n", + "- Normalize pixel values between -1 and 1. We'll do this using a `Normalization` layer as\n", + " part of the model itself.\n", + "\n", + "In general, it's a good practice to develop models that take raw data as input, as\n", + "opposed to models that take already-preprocessed data. The reason being that, if your\n", + "model expects preprocessed data, any time you export your model to use it elsewhere\n", + "(in a web browser, in a mobile app), you'll need to reimplement the exact same\n", + "preprocessing pipeline. This get very tricky very quickly. So we should do the least\n", + " possible amount of preprocessing before hitting the model.\n", + "\n", + "Here, we'll do image resizing in the data pipeline (because a deep neural network can\n", + "only process contiguous batches of data), and we'll do the input value scaling as part\n", + " of the model, when we create it.\n", + "\n", + "Let's resize images to 150x150:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "size = (150, 150)\n", + "\n", + "train_ds = train_ds.map(lambda x, y: (tf.image.resize(x, size), y))\n", + "validation_ds = validation_ds.map(lambda x, y: (tf.image.resize(x, size), y))\n", + "test_ds = test_ds.map(lambda x, y: (tf.image.resize(x, size), y))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Besides, let's batch the data and use caching & prefetching to optimize loading speed.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "batch_size = 32\n", + "\n", + "train_ds = train_ds.cache().batch(batch_size).prefetch(buffer_size=10)\n", + "validation_ds = validation_ds.cache().batch(batch_size).prefetch(buffer_size=10)\n", + "test_ds = test_ds.cache().batch(batch_size).prefetch(buffer_size=10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "### Using random data augmentation\n", + "\n", + "When you don't have a large image dataset, it's a good practice to artificially\n", + " introduce sample diversity by applying random yet realistic transformations to\n", + "the training images, such as random horizontal flipping or small random rotations. This\n", + "helps expose the model to different aspects of the training data while slowing down\n", + " overfitting.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "from tensorflow import keras\n", + "from tensorflow.keras import layers\n", + "\n", + "data_augmentation = keras.Sequential(\n", + " [\n", + " layers.experimental.preprocessing.RandomFlip(\"horizontal\"),\n", + " layers.experimental.preprocessing.RandomRotation(0.1),\n", + " ]\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's visualize what the first image of the first batch looks like after various random\n", + " transformations:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "for images, labels in train_ds.take(1):\n", + " plt.figure(figsize=(10, 10))\n", + " first_image = images[0]\n", + " for i in range(9):\n", + " ax = plt.subplot(3, 3, i + 1)\n", + " augmented_image = data_augmentation(\n", + " tf.expand_dims(first_image, 0), training=True\n", + " )\n", + " plt.imshow(augmented_image[0].numpy().astype(\"int32\"))\n", + " plt.title(int(labels[i]))\n", + " plt.axis(\"off\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Build a model\n", + "\n", + "Now let's built a model that follows the blueprint we've explained earlier.\n", + "\n", + "Note that:\n", + "\n", + "- We add a `Normalization` layer to scale input values (initially in the `[0, 255]`\n", + " range) to the `[-1, 1]` range.\n", + "- We add a `Dropout` layer before the classification layer, for regularization.\n", + "- We make sure to pass `training=False` when calling the base model, so that\n", + "it runs in inference mode, so that batchnorm statistics don't get updated\n", + "even after we unfreeze the base model for fine-tuning.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "base_model = keras.applications.Xception(\n", + " weights=\"imagenet\", # Load weights pre-trained on ImageNet.\n", + " input_shape=(150, 150, 3),\n", + " include_top=False,\n", + ") # Do not include the ImageNet classifier at the top.\n", + "\n", + "# Freeze the base_model\n", + "base_model.trainable = False\n", + "\n", + "# Create new model on top\n", + "inputs = keras.Input(shape=(150, 150, 3))\n", + "x = data_augmentation(inputs) # Apply random data augmentation\n", + "\n", + "# Pre-trained Xception weights requires that input be normalized\n", + "# from (0, 255) to a range (-1., +1.), the normalization layer\n", + "# does the following, outputs = (inputs - mean) / sqrt(var)\n", + "norm_layer = keras.layers.experimental.preprocessing.Normalization()\n", + "mean = np.array([127.5] * 3)\n", + "var = mean ** 2\n", + "# Scale inputs to [-1, +1]\n", + "x = norm_layer(x)\n", + "norm_layer.set_weights([mean, var])\n", + "\n", + "# The base model contains batchnorm layers. We want to keep them in inference mode\n", + "# when we unfreeze the base model for fine-tuning, so we make sure that the\n", + "# base_model is running in inference mode here.\n", + "x = base_model(x, training=False)\n", + "x = keras.layers.GlobalAveragePooling2D()(x)\n", + "x = keras.layers.Dropout(0.2)(x) # Regularize with dropout\n", + "outputs = keras.layers.Dense(1)(x)\n", + "model = keras.Model(inputs, outputs)\n", + "\n", + "model.summary()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Train the top layer\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=keras.optimizers.Adam(),\n", + " loss=keras.losses.BinaryCrossentropy(from_logits=True),\n", + " metrics=[keras.metrics.BinaryAccuracy()],\n", + ")\n", + "\n", + "epochs = 20\n", + "model.fit(train_ds, epochs=epochs, validation_data=validation_ds)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Do a round of fine-tuning of the entire model\n", + "\n", + "Finally, let's unfreeze the base model and train the entire model end-to-end with a low\n", + " learning rate.\n", + "\n", + "Importantly, although the base model becomes trainable, it is still running in\n", + "inference mode since we passed `training=False` when calling it when we built the\n", + "model. This means that the batch normalization layers inside won't update their batch\n", + "statistics. If they did, they would wreck havoc on the representations learned by the\n", + " model so far.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Unfreeze the base_model. Note that it keeps running in inference mode\n", + "# since we passed `training=False` when calling it. This means that\n", + "# the batchnorm layers will not update their batch statistics.\n", + "# This prevents the batchnorm layers from undoing all the training\n", + "# we've done so far.\n", + "base_model.trainable = True\n", + "model.summary()\n", + "\n", + "model.compile(\n", + " optimizer=keras.optimizers.Adam(1e-5), # Low learning rate\n", + " loss=keras.losses.BinaryCrossentropy(from_logits=True),\n", + " metrics=[keras.metrics.BinaryAccuracy()],\n", + ")\n", + "\n", + "epochs = 10\n", + "model.fit(train_ds, epochs=epochs, validation_data=validation_ds)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "After 10 epochs, fine-tuning gains us a nice improvement here.\n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "transfer_learning", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/tf/writing_a_training_loop_from_scratch.ipynb b/tf/writing_a_training_loop_from_scratch.ipynb new file mode 100644 index 0000000000..17653ce2e8 --- /dev/null +++ b/tf/writing_a_training_loop_from_scratch.ipynb @@ -0,0 +1,852 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "cellView": "form", + "colab": {}, + "colab_type": "code" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "# Writing a training loop from scratch" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "from tensorflow.keras import layers\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Introduction\n", + "\n", + "Keras provides default training and evaluation loops, `fit()` and `evaluate()`.\n", + "Their usage is coverered in the guide\n", + "[Training & evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate/).\n", + "\n", + "If you want to customize the learning algorithm of your model while still leveraging\n", + "the convenience of `fit()`\n", + "(for instance, to train a GAN using `fit()`), you can subclass the `Model` class and\n", + "implement your own `train_step()` method, which\n", + "is called repeatedly during `fit()`. This is covered in the guide\n", + "[Customizing what happens in `fit()`](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit/).\n", + "\n", + "Now, if you want very low-level control over training & evaluation, you should write\n", + "your own training & evaluation loops from scratch. This is what this guide is about." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Using the `GradientTape`: a first end-to-end example\n", + "\n", + "Calling a model inside a `GradientTape` scope enables you to retrieve the gradients of\n", + "the trainable weights of the layer with respect to a loss value. Using an optimizer\n", + "instance, you can use these gradients to update these variables (which you can\n", + "retrieve using `model.trainable_weights`).\n", + "\n", + "Let's consider a simple MNIST model:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x1 = layers.Dense(64, activation=\"relu\")(inputs)\n", + "x2 = layers.Dense(64, activation=\"relu\")(x1)\n", + "outputs = layers.Dense(10, name=\"predictions\")(x2)\n", + "model = keras.Model(inputs=inputs, outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's train it using mini-batch gradient with a custom training loop.\n", + "\n", + "First, we're going to need an optimizer, a loss function, and a dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Instantiate an optimizer.\n", + "optimizer = keras.optimizers.SGD(learning_rate=1e-3)\n", + "# Instantiate a loss function.\n", + "loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", + "\n", + "# Prepare the training dataset.\n", + "batch_size = 64\n", + "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", + "x_train = np.reshape(x_train, (-1, 784))\n", + "x_test = np.reshape(x_train, (-1, 784))\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", + "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here's our training loop:\n", + "\n", + "- We open a `for` loop that iterates over epochs\n", + "- For each epoch, we open a `for` loop that iterates over the dataset, in batches\n", + "- For each batch, we open a `GradientTape()` scope\n", + "- Inside this scope, we call the model (forward pass) and compute the loss\n", + "- Outside the scope, we retrieve the gradients of the weights\n", + "of the model with regard to the loss\n", + "- Finally, we use the optimizer to update the weights of the model based on the\n", + "gradients" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "epochs = 2\n", + "for epoch in range(epochs):\n", + " print(\"\\nStart of epoch %d\" % (epoch,))\n", + "\n", + " # Iterate over the batches of the dataset.\n", + " for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):\n", + "\n", + " # Open a GradientTape to record the operations run\n", + " # during the forward pass, which enables autodifferentiation.\n", + " with tf.GradientTape() as tape:\n", + "\n", + " # Run the forward pass of the layer.\n", + " # The operations that the layer applies\n", + " # to its inputs are going to be recorded\n", + " # on the GradientTape.\n", + " logits = model(x_batch_train, training=True) # Logits for this minibatch\n", + "\n", + " # Compute the loss value for this minibatch.\n", + " loss_value = loss_fn(y_batch_train, logits)\n", + "\n", + " # Use the gradient tape to automatically retrieve\n", + " # the gradients of the trainable variables with respect to the loss.\n", + " grads = tape.gradient(loss_value, model.trainable_weights)\n", + "\n", + " # Run one step of gradient descent by updating\n", + " # the value of the variables to minimize the loss.\n", + " optimizer.apply_gradients(zip(grads, model.trainable_weights))\n", + "\n", + " # Log every 200 batches.\n", + " if step % 200 == 0:\n", + " print(\n", + " \"Training loss (for one batch) at step %d: %.4f\"\n", + " % (step, float(loss_value))\n", + " )\n", + " print(\"Seen so far: %s samples\" % ((step + 1) * 64))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Low-level handling of metrics\n", + "\n", + "Let's add metrics monitoring to this basic loop.\n", + "\n", + "You can readily reuse the built-in metrics (or custom ones you wrote) in such training\n", + "loops written from scratch. Here's the flow:\n", + "\n", + "- Instantiate the metric at the start of the loop\n", + "- Call `metric.update_state()` after each batch\n", + "- Call `metric.result()` when you need to display the current value of the metric\n", + "- Call `metric.reset_states()` when you need to clear the state of the metric\n", + "(typically at the end of an epoch)\n", + "\n", + "Let's use this knowledge to compute `SparseCategoricalAccuracy` on validation data at\n", + "the end of each epoch:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Get model\n", + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x = layers.Dense(64, activation=\"relu\", name=\"dense_1\")(inputs)\n", + "x = layers.Dense(64, activation=\"relu\", name=\"dense_2\")(x)\n", + "outputs = layers.Dense(10, name=\"predictions\")(x)\n", + "model = keras.Model(inputs=inputs, outputs=outputs)\n", + "\n", + "# Instantiate an optimizer to train the model.\n", + "optimizer = keras.optimizers.SGD(learning_rate=1e-3)\n", + "# Instantiate a loss function.\n", + "loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", + "\n", + "# Prepare the metrics.\n", + "train_acc_metric = keras.metrics.SparseCategoricalAccuracy()\n", + "val_acc_metric = keras.metrics.SparseCategoricalAccuracy()\n", + "\n", + "# Prepare the training dataset.\n", + "batch_size = 64\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", + "train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)\n", + "\n", + "# Prepare the validation dataset.\n", + "# Reserve 10,000 samples for validation.\n", + "x_val = x_train[-10000:]\n", + "y_val = y_train[-10000:]\n", + "x_train = x_train[:-10000]\n", + "y_train = y_train[:-10000]\n", + "val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))\n", + "val_dataset = val_dataset.batch(64)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here's our training & evaluation loop:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "epochs = 2\n", + "for epoch in range(epochs):\n", + " print(\"\\nStart of epoch %d\" % (epoch,))\n", + " start_time = time.time()\n", + "\n", + " # Iterate over the batches of the dataset.\n", + " for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):\n", + " with tf.GradientTape() as tape:\n", + " logits = model(x_batch_train, training=True)\n", + " loss_value = loss_fn(y_batch_train, logits)\n", + " grads = tape.gradient(loss_value, model.trainable_weights)\n", + " optimizer.apply_gradients(zip(grads, model.trainable_weights))\n", + "\n", + " # Update training metric.\n", + " train_acc_metric.update_state(y_batch_train, logits)\n", + "\n", + " # Log every 200 batches.\n", + " if step % 200 == 0:\n", + " print(\n", + " \"Training loss (for one batch) at step %d: %.4f\"\n", + " % (step, float(loss_value))\n", + " )\n", + " print(\"Seen so far: %d samples\" % ((step + 1) * 64))\n", + "\n", + " # Display metrics at the end of each epoch.\n", + " train_acc = train_acc_metric.result()\n", + " print(\"Training acc over epoch: %.4f\" % (float(train_acc),))\n", + "\n", + " # Reset training metrics at the end of each epoch\n", + " train_acc_metric.reset_states()\n", + "\n", + " # Run a validation loop at the end of each epoch.\n", + " for x_batch_val, y_batch_val in val_dataset:\n", + " val_logits = model(x_batch_val, training=False)\n", + " # Update val metrics\n", + " val_acc_metric.update_state(y_batch_val, val_logits)\n", + " val_acc = val_acc_metric.result()\n", + " val_acc_metric.reset_states()\n", + " print(\"Validation acc: %.4f\" % (float(val_acc),))\n", + " print(\"Time taken: %.2fs\" % (time.time() - start_time))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Speeding-up your training step with `tf.function`\n", + "\n", + "The default runtime in TensorFlow 2.0 is\n", + "[eager execution](https://www.tensorflow.org/guide/eager). As such, our training loop\n", + "above executes eagerly.\n", + "\n", + "This is great for debugging, but graph compilation has a definite performance\n", + "advantage. Decribing your computation as a static graph enables the framework\n", + "to apply global performance optimizations. This is impossible when\n", + "the framework is constrained to greedly execute one operation after another,\n", + "with no knowledge of what comes next.\n", + "\n", + "You can compile into a static graph any function that take tensors as input.\n", + "Just add a `@tf.function` decorator on it, like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "@tf.function\n", + "def train_step(x, y):\n", + " with tf.GradientTape() as tape:\n", + " logits = model(x, training=True)\n", + " loss_value = loss_fn(y, logits)\n", + " grads = tape.gradient(loss_value, model.trainable_weights)\n", + " optimizer.apply_gradients(zip(grads, model.trainable_weights))\n", + " train_acc_metric.update_state(y, logits)\n", + " return loss_value\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's do the same with the evaluation step:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "@tf.function\n", + "def test_step(x, y):\n", + " val_logits = model(x, training=False)\n", + " val_acc_metric.update_state(y, val_logits)\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Now, let's re-run our training loop with this compiled training step:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "epochs = 2\n", + "for epoch in range(epochs):\n", + " print(\"\\nStart of epoch %d\" % (epoch,))\n", + " start_time = time.time()\n", + "\n", + " # Iterate over the batches of the dataset.\n", + " for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):\n", + " loss_value = train_step(x_batch_train, y_batch_train)\n", + "\n", + " # Log every 200 batches.\n", + " if step % 200 == 0:\n", + " print(\n", + " \"Training loss (for one batch) at step %d: %.4f\"\n", + " % (step, float(loss_value))\n", + " )\n", + " print(\"Seen so far: %d samples\" % ((step + 1) * 64))\n", + "\n", + " # Display metrics at the end of each epoch.\n", + " train_acc = train_acc_metric.result()\n", + " print(\"Training acc over epoch: %.4f\" % (float(train_acc),))\n", + "\n", + " # Reset training metrics at the end of each epoch\n", + " train_acc_metric.reset_states()\n", + "\n", + " # Run a validation loop at the end of each epoch.\n", + " for x_batch_val, y_batch_val in val_dataset:\n", + " test_step(x_batch_val, y_batch_val)\n", + "\n", + " val_acc = val_acc_metric.result()\n", + " val_acc_metric.reset_states()\n", + " print(\"Validation acc: %.4f\" % (float(val_acc),))\n", + " print(\"Time taken: %.2fs\" % (time.time() - start_time))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Much faster, isn't it?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Low-level handling of losses tracked by the model\n", + "\n", + "Layers & models recursively track any losses created during the forward pass\n", + "by layers that call `self.add_loss(value)`. The resulting list of scalar loss\n", + "values are available via the property `model.losses`\n", + "at the end of the forward pass.\n", + "\n", + "If you want to be using these loss components, you should sum them\n", + "and add them to the main loss in your training step.\n", + "\n", + "Consider this layer, that creates an activity regularization loss:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "class ActivityRegularizationLayer(layers.Layer):\n", + " def call(self, inputs):\n", + " self.add_loss(1e-2 * tf.reduce_sum(inputs))\n", + " return inputs\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's build a really simple model that uses it:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(784,), name=\"digits\")\n", + "x = layers.Dense(64, activation=\"relu\")(inputs)\n", + "# Insert activity regularization as a layer\n", + "x = ActivityRegularizationLayer()(x)\n", + "x = layers.Dense(64, activation=\"relu\")(x)\n", + "outputs = layers.Dense(10, name=\"predictions\")(x)\n", + "\n", + "model = keras.Model(inputs=inputs, outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here's what our training step should look like now:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "\n", + "@tf.function\n", + "def train_step(x, y):\n", + " with tf.GradientTape() as tape:\n", + " logits = model(x, training=True)\n", + " loss_value = loss_fn(y, logits)\n", + " # Add any extra losses created during the forward pass.\n", + " loss_value += sum(model.losses)\n", + " grads = tape.gradient(loss_value, model.trainable_weights)\n", + " optimizer.apply_gradients(zip(grads, model.trainable_weights))\n", + " train_acc_metric.update_state(y, logits)\n", + " return loss_value\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## Summary\n", + "\n", + "Now you know everything there is to know about using built-in training loops and\n", + "writing your own from scratch.\n", + "\n", + "To conclude, here's a simple end-to-end example that ties together everything\n", + "you've learned in this guide: a DCGAN trained on MNIST digits." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "## End-to-end example: a GAN training loop from scratch\n", + "\n", + "You may be familiar with Generative Adversarial Networks (GANs). GANs can generate new\n", + "images that look almost real, by learning the latent distribution of a training\n", + "dataset of images (the \"latent space\" of the images).\n", + "\n", + "A GAN is made of two parts: a \"generator\" model that maps points in the latent\n", + "space to points in image space, an a \"discriminator\" model, a classifier\n", + "that can tell the difference between real imagees (from the training dataset)\n", + "and fake images (the output of the generator network).\n", + "\n", + "A GAN training loop looks like this:\n", + "\n", + "1) Train the discriminator.\n", + "- Sample a batch of random points in the latent space.\n", + "- Turn the points into fake images via the \"generator\" model.\n", + "- Get a batch of real images and combine them with the generated images.\n", + "- Train the \"discriminator\" model to classify generated vs. real images.\n", + "\n", + "2) Train the generator.\n", + "- Sample random points in the latent space.\n", + "- Turn the points into fake images via the \"generator\" network.\n", + "- Get a batch of real images and combine them with the generated images.\n", + "- Train the \"generator\" model to \"fool\" the discriminator and classify the fake images\n", + "as real.\n", + "\n", + "For a much more detailed overview of how GANs works, see\n", + "[Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python).\n", + "\n", + "Let's implement this training loop. First, create the discriminator meant to classify\n", + "fake vs real digits:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "discriminator = keras.Sequential(\n", + " [\n", + " keras.Input(shape=(28, 28, 1)),\n", + " layers.Conv2D(64, (3, 3), strides=(2, 2), padding=\"same\"),\n", + " layers.LeakyReLU(alpha=0.2),\n", + " layers.Conv2D(128, (3, 3), strides=(2, 2), padding=\"same\"),\n", + " layers.LeakyReLU(alpha=0.2),\n", + " layers.GlobalMaxPooling2D(),\n", + " layers.Dense(1),\n", + " ],\n", + " name=\"discriminator\",\n", + ")\n", + "discriminator.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Then let's create a generator network,\n", + "that turns latent vectors into outputs of shape `(28, 28, 1)` (representing\n", + "MNIST digits):" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "latent_dim = 128\n", + "\n", + "generator = keras.Sequential(\n", + " [\n", + " keras.Input(shape=(latent_dim,)),\n", + " # We want to generate 128 coefficients to reshape into a 7x7x128 map\n", + " layers.Dense(7 * 7 * 128),\n", + " layers.LeakyReLU(alpha=0.2),\n", + " layers.Reshape((7, 7, 128)),\n", + " layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n", + " layers.LeakyReLU(alpha=0.2),\n", + " layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n", + " layers.LeakyReLU(alpha=0.2),\n", + " layers.Conv2D(1, (7, 7), padding=\"same\", activation=\"sigmoid\"),\n", + " ],\n", + " name=\"generator\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Here's the key bit: the training loop. As you can see it is quite straightforward. The\n", + "training step function only takes 17 lines." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "# Instantiate one optimizer for the discriminator and another for the generator.\n", + "d_optimizer = keras.optimizers.Adam(learning_rate=0.0003)\n", + "g_optimizer = keras.optimizers.Adam(learning_rate=0.0004)\n", + "\n", + "# Instantiate a loss function.\n", + "loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)\n", + "\n", + "\n", + "@tf.function\n", + "def train_step(real_images):\n", + " # Sample random points in the latent space\n", + " random_latent_vectors = tf.random.normal(shape=(batch_size, latent_dim))\n", + " # Decode them to fake images\n", + " generated_images = generator(random_latent_vectors)\n", + " # Combine them with real images\n", + " combined_images = tf.concat([generated_images, real_images], axis=0)\n", + "\n", + " # Assemble labels discriminating real from fake images\n", + " labels = tf.concat(\n", + " [tf.ones((batch_size, 1)), tf.zeros((real_images.shape[0], 1))], axis=0\n", + " )\n", + " # Add random noise to the labels - important trick!\n", + " labels += 0.05 * tf.random.uniform(labels.shape)\n", + "\n", + " # Train the discriminator\n", + " with tf.GradientTape() as tape:\n", + " predictions = discriminator(combined_images)\n", + " d_loss = loss_fn(labels, predictions)\n", + " grads = tape.gradient(d_loss, discriminator.trainable_weights)\n", + " d_optimizer.apply_gradients(zip(grads, discriminator.trainable_weights))\n", + "\n", + " # Sample random points in the latent space\n", + " random_latent_vectors = tf.random.normal(shape=(batch_size, latent_dim))\n", + " # Assemble labels that say \"all real images\"\n", + " misleading_labels = tf.zeros((batch_size, 1))\n", + "\n", + " # Train the generator (note that we should *not* update the weights\n", + " # of the discriminator)!\n", + " with tf.GradientTape() as tape:\n", + " predictions = discriminator(generator(random_latent_vectors))\n", + " g_loss = loss_fn(misleading_labels, predictions)\n", + " grads = tape.gradient(g_loss, generator.trainable_weights)\n", + " g_optimizer.apply_gradients(zip(grads, generator.trainable_weights))\n", + " return d_loss, g_loss, generated_images\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "Let's train our GAN, by repeatedly calling `train_step` on batches of images.\n", + "\n", + "Since our discriminator and generator are convnets, you're going to want to\n", + "run this code on a GPU." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab_type": "code" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Prepare the dataset. We use both the training & test MNIST digits.\n", + "batch_size = 64\n", + "(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()\n", + "all_digits = np.concatenate([x_train, x_test])\n", + "all_digits = all_digits.astype(\"float32\") / 255.0\n", + "all_digits = np.reshape(all_digits, (-1, 28, 28, 1))\n", + "dataset = tf.data.Dataset.from_tensor_slices(all_digits)\n", + "dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)\n", + "\n", + "epochs = 1 # In practice you need at least 20 epochs to generate nice digits.\n", + "save_dir = \"./\"\n", + "\n", + "for epoch in range(epochs):\n", + " print(\"\\nStart epoch\", epoch)\n", + "\n", + " for step, real_images in enumerate(dataset):\n", + " # Train the discriminator & generator on one batch of real images.\n", + " d_loss, g_loss, generated_images = train_step(real_images)\n", + "\n", + " # Logging.\n", + " if step % 200 == 0:\n", + " # Print metrics\n", + " print(\"discriminator loss at step %d: %.2f\" % (step, d_loss))\n", + " print(\"adversarial loss at step %d: %.2f\" % (step, g_loss))\n", + "\n", + " # Save one generated image\n", + " img = tf.keras.preprocessing.image.array_to_img(\n", + " generated_images[0] * 255.0, scale=False\n", + " )\n", + " img.save(os.path.join(save_dir, \"generated_img\" + str(step) + \".png\"))\n", + "\n", + " # To limit execution time we stop after 10 steps.\n", + " # Remove the lines below to actually train the model!\n", + " if step > 10:\n", + " break" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text" + }, + "source": [ + "That's it! You'll get nice-looking fake MNIST digits after just ~30s of training on the\n", + "Colab GPU." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "writing_a_training_loop_from_scratch", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file