From 3e75b6a3b25a38f3c1b91f7ffd42d3be7e628fcd Mon Sep 17 00:00:00 2001 From: Mark Callow Date: Tue, 25 May 2021 18:47:52 +0900 Subject: [PATCH] git subrepo pull (merge) lib/astc-encoder subrepo: subdir: "lib/astc-encoder" merged: "d38c8c85" upstream: origin: "https://github.com/ARM-software/astc-encoder.git" branch: "main" commit: "2f9362c1" git-subrepo: version: "0.4.3" origin: "https://github.com/MarkCallow/git-subrepo.git" commit: "c1f1132" --- lib/astc-encoder/.gitrepo | 2 +- .../Docs/{ChangeLog.md => ChangeLog-2x.md} | 2 +- lib/astc-encoder/Docs/ChangeLog-3x.md | 56 + .../ChangeLogImg/relative-main-to-2.5.png | Bin 0 -> 125882 bytes lib/astc-encoder/Docs/Roadmap.md | 8 - lib/astc-encoder/README.md | 9 +- lib/astc-encoder/Source/astcenc.h | 346 +-- .../astcenc_averages_and_directions.cpp | 57 +- .../Source/astcenc_block_sizes.cpp | 526 ++--- .../Source/astcenc_color_quantize.cpp | 138 +- .../Source/astcenc_color_unquantize.cpp | 76 +- .../Source/astcenc_compress_symbolic.cpp | 661 +++--- .../Source/astcenc_compute_variance.cpp | 79 +- .../Source/astcenc_decompress_symbolic.cpp | 115 +- .../Source/astcenc_diagnostic_trace.h | 58 +- lib/astc-encoder/Source/astcenc_entry.cpp | 86 +- .../Source/astcenc_find_best_partitioning.cpp | 602 ++++- .../astcenc_ideal_endpoints_and_weights.cpp | 363 ++- lib/astc-encoder/Source/astcenc_image.cpp | 70 +- .../Source/astcenc_integer_sequence.cpp | 173 +- lib/astc-encoder/Source/astcenc_internal.h | 1938 ++++++++++------- .../Source/astcenc_kmeans_partitioning.cpp | 451 ---- lib/astc-encoder/Source/astcenc_mathlib.cpp | 4 +- .../Source/astcenc_partition_tables.cpp | 54 +- .../Source/astcenc_percentile_tables.cpp | 38 +- .../astcenc_pick_best_endpoint_format.cpp | 222 +- .../Source/astcenc_quantization.cpp | 19 +- .../Source/astcenc_symbolic_physical.cpp | 129 +- lib/astc-encoder/Source/astcenc_vecmathlib.h | 8 +- .../Source/astcenc_vecmathlib_neon_4.h | 6 - .../Source/astcenc_weight_align.cpp | 161 +- lib/astc-encoder/Source/cmake_core.cmake | 1 - 32 files changed, 3436 insertions(+), 3022 deletions(-) rename lib/astc-encoder/Docs/{ChangeLog.md => ChangeLog-2x.md} (99%) create mode 100644 lib/astc-encoder/Docs/ChangeLog-3x.md create mode 100644 lib/astc-encoder/Docs/ChangeLogImg/relative-main-to-2.5.png delete mode 100644 lib/astc-encoder/Source/astcenc_kmeans_partitioning.cpp diff --git a/lib/astc-encoder/.gitrepo b/lib/astc-encoder/.gitrepo index 9ae1e71f45..a7913c527c 100644 --- a/lib/astc-encoder/.gitrepo +++ b/lib/astc-encoder/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/ARM-software/astc-encoder.git branch = main - commit = 36070fff1c2435f6d9f547bab3456aee06bce7da + commit = 2f9362c16e71cb6da1ae48bc23ca7e9243ba8f99 parent = 90c01d4fe24470b1065d99cecff49e9514e5fad3 method = merge cmdver = 0.4.3 diff --git a/lib/astc-encoder/Docs/ChangeLog.md b/lib/astc-encoder/Docs/ChangeLog-2x.md similarity index 99% rename from lib/astc-encoder/Docs/ChangeLog.md rename to lib/astc-encoder/Docs/ChangeLog-2x.md index 72c469bde8..98aced2f39 100644 --- a/lib/astc-encoder/Docs/ChangeLog.md +++ b/lib/astc-encoder/Docs/ChangeLog-2x.md @@ -32,7 +32,7 @@ releases. Please update and rebuild your client-side code using the updated library users targeting older 32-bit Android and iOS devices. * **Feature:** A compressor mode for encoding HDR textures that have been encoded into LDR RGBM wrapper format is now supported. Note that this - encoding has some strong recomendations for how the RGBM encoding is + encoding has some strong recommendations for how the RGBM encoding is implemented to avoid block artifacts in the compressed image. * **Core API:** * **API Change:** The core API has been changed to be a pure C API, making it diff --git a/lib/astc-encoder/Docs/ChangeLog-3x.md b/lib/astc-encoder/Docs/ChangeLog-3x.md new file mode 100644 index 0000000000..17ad57c071 --- /dev/null +++ b/lib/astc-encoder/Docs/ChangeLog-3x.md @@ -0,0 +1,56 @@ +# 3.x series change log + +This page summarizes the major functional and performance changes in each +release of the 3.x series. + +All performance data on this page is measured on an Intel Core i5-9600K +clocked at 4.2 GHz, running astcenc using AVX2 and 6 threads. + + +## 3.0 + +**Status:** ETA, August 2021 + +The 3.0 alpha release is a preview release for the 3.x series. This series is +aiming to provide a major update to the codec with more experimental changes +than we could make with the 2.x series. + +Reminder for users of the library interface - the API is not designed to be +binary compatible across versions, and this release is not compatible with +earlier releases. Please update and rebuild your client-side code using the +updated `astcenc.h` header. + +* **General:** + * **Feature:** The code has been significantly cleaned up, with improved + API documentation, function and variable naming conventions, and comments. +* **Core API:** + * **Feature:** New heuristics have been added for controlling when to search + beyond 2 partitions and 1 plane, and when to search beyond 3 partitions and + 1 plane. The previous `tune_partition_early_out_limit` config option has + been removed, and replaced with two new options + `tune_2_partition_early_out_limit_factor` and + `tune_3_partition_early_out_limit_factor`. See command line help for more + detailed documentation. + * **Feature:** New heuristics have been added for controlling when to use + 2 planes. The previous `tune_two_plane_early_out_limit` has been renamed to + `tune_2_plane_early_out_limit_correlation` and a new heuristic + `tune_2_plane_early_out_limit_factor` has been added. See command line help + for more detailed documentation. + * **API Change:** The core APIs for `astcenc_compress_image()` and for + `astcenc_decompress_image()` now accept swizzle structures by `const` + pointer, instead of pass-by-value. + * **API Change:** Calling the `astcenc_compress_reset()` and the + `astcenc_decompress_reset()` functions between images is no longer required + if the context was created for use by a single thread. + +### Performance: + +The current performance and image quality for the 3.0 development build vs the +stable 2.5 release is shown below. + +* Color = block size (see legend). +* Letter = image format (N = normal map, G = greyscale, L = LDR, H = HDR). + +**Relative performance vs 2.5 release:** + +![Relative scores 3.0-develop vs 2.5](./ChangeLogImg/relative-main-to-2.5.png) diff --git a/lib/astc-encoder/Docs/ChangeLogImg/relative-main-to-2.5.png b/lib/astc-encoder/Docs/ChangeLogImg/relative-main-to-2.5.png new file mode 100644 index 0000000000000000000000000000000000000000..86ad353e005fd03c904d04f60350da2c2acb1321 GIT binary patch literal 125882 zcmdSBc|4YD+dY0akjyF(Aw`8EQX)eXMIn-ejG<&EGE+o`lnlvK$yAxkka;eVq(Wvg zC3BgF-@2;(Jn#E`_ul{id_K=(ll#7}>pYM1IM%V&I<8x%m5$R;vQUypBpUe>$5crq zvNjTF%}EM!{6_1hZaluNwUv=qqrjI7#YJEIyx!`>d0P^RPM`SSswAnz%lPFXJ6R38 zGnU474*E7mBy)W`D>F+wv&#nC?2T+}FI!sd=NIG`+_UYHot>4KfWUwMo!`>NM1bCy zX$y(8jU<0eTJ2iY_jX4OwNESS#%HKo(_be%^iA?$_4B)u=NWM;A^0SJ$gTW7O;fil z8^yXkb$i3x_V1-ydK%Bem9YNOeX0k!*K>oZw)t+at({xy4qCXn^$0!l?O8VoI_2`{ z{WDRv10&*gqtW$e*K@A*{qI-OiI4FTr0xItdYtJ&HG1s7UzPuV{!#gKvfBNF=9{m7TIPpQ(4N+b50?6dAU zHaj=>NITEQ%GUPiz`JN&U)jEbJr`bW8vdNZH==)<=YfRt!EEzZ^)KbuQq9|6F}lwG z@C*-UGBYzfoo{a*z#+w+oSdxenxd0d{p#v4{iaQ8>FDU1zkDfcYGT@N^6r?iv9ag8 zWo!0cHQvw$@>vai;QroG$U4%RBQV0rE3|*VzJ&#?t?kr?k!pMrBd=Nshx?*_ zx@-HZtIUV2wiy^0ynk`IaRb}Y%UAH>7M(?bXA+e{r%x+*M@O@DxJ)U=T;sV;$;kWA zrmtr3a|-KI^%TE*yOa&)Ci})GC(9b*j_aG7M-N;XYtQ$=pLF+y>&k`mU+DVw&2Mn< z(sSKn_T}YeEU1S@R;Z_^(tmbws-7nMT&nh#kuNX#!bCSPFf>fXU0k8NPVB+p z3fa1KhF@M>PS46ZlTE`SOd%;LiM@NEMm5S3nDD|!Z^O)3r%zy@A~r4Qgv`;S4cX>e zH_R0u2uq(hL2=Nc&6o4&&98HsCr_R{CMWmU=xta{_M@XdkDY#gyDxT?cY3s4>tX%Q zb?ess7;YMi7&wBpHClJFhMs{zB_j0xeP4zhau2cn>Dk$GxXp%V=LEE0*}Zvq!QTF0 zf^zKb2M4am#5Fv0nVY=S{^~$kdHE{R8gl9qkxrL2B=Wx1HiJK%_GstQ{~T`ek&E;6 z_oouG|9(l0w{Kyr`1rk@ihAbeG$tk{KSo-aumXaY?wzTh|1s2ftuOSPo}u9y2M34R z=)+By+#(_(aMLWNpPrHh%B>L*5&1FLu!c!Mr*IK&>HLzCBZovqyXSun zli2rqNE9{GlKS3;ACEj}9-tE$dATt`I$pKAEzfqaDUAbf-qq7{{ms2yM=3n{&ZTw* zigb7s7E17*ed^^n)z9eEAsX|%JEn2Qn~G_lUJ2=tm{?h9=_-7F#nQq|tk`s$TAB{q z%HkB=^jK%w#cyw9%YEoL4BB$7t*os*Q&RT6au~ab| z6tGqFLsFESQugxlg5~VOLU~mcIpWs((X>jT(^YEbeI-dmEU^Si2nsStOG~G}eChq{ z8K2~A*P8DwS?hR1YX<$?NMtpO_gq|ECu;c8n9ZApe_A;>RO9$0n=MRKb2L1W-x$Jo zj&^!_I!U)!ax=g7>_I{J+p z4TX!59UkIz6D#)ex}gtIf>K8xoZ{j9TxttnnCa&tFq= zMoMeX-n}%NTd5Z2t1rLQY|D2@D;c?kglN{oNE8D#g^H|#D^cc`fU?-cS@Z9^5x4KFHIS*-?$-YH+cF@y<0 z3KY#gF6ph$&?{XXz%IdJZ*N~?V2n4S`9l1tq@+aTI{piwQd8X)bWg`e2N- zdSB92>`RVi&`gIjok@L6Wv0n~miyUg{4 zlh~uX)?){-#DY#gE~!0LTi=v=p**d4+3({=wL^yv>6PE4sTp{eu3t_%GMD4LG=G(- zfvB)pbmng#lW*V*ataG?BI-nZJojj4agc`xNju-325))%)6=sIjEvs$_jYzy1+r?S z>v@*uikN@#`tie#nnQ9cZz!ehz&qI>b_se8jz8h&u1C;IcuW=FmP}Ld9GPUtjp{`to!&#PG?W7`*JC&&O_DY_j$G^xYzE)ST~s? zM?6Z0MJzi4P>gs)8ZKm;ZNYzeSNby<*4`In0ApixbT%t|Zg_U(^_j-YrkgsR|vMZRoiW7|ve9V=Q; zYWbY5f3zXbcEDxkfX&wvcUxnX@Mo(%nD-k8G}t;7>QHc2;B&l)Qgh%+Sn1R*<(N(V z{r%*vYgVrgsG1q?KB|(e%1vMlqgIr?wIo6JTKnq?A1+U7Rb$8;kbd-VzcL8v*b(Z0VnV!NcI4wm4SMfi_`lT8&H1PP>q=PNE0u5xFKMg({$LY`jzT%o%TbnYIGQGa8zje#j^k z(MpLgv@$Q=B5nKp{Cu3*+c%#l``*&b^VUk@Oc`69;^>^|ow_e#8AIBJ>XKmQJl^G* zbSCjOY1{NjtBG)GB4QmUhLKbO5JQt4#4IR-o8pOGWmtWi5rstN*{A;e;_EddSw^$t z-DOo(au_G$ahomGQ*QKwv{TTIG6e^!q= z?C`E@E=Y3z#QlhfTW`Y!s5re&oH#*GItCyjYh#o7!D3;0L~f;OX=zC|&!+Df@LQ^D z|E`?g$#Oa=3Q|vr=bD=At}eaqZ{JME-Eg>$CVes$G_0b&(FD9h#mdTRjX0VBN@MBc z6+rIY4B#nOo$wF|LE!tyh^&G_&6!qO7U2`8A`dE>wPeb=%(Odf-@W?;i5~gYEujdt zU~sM~Sm;+RkZ_AD`2O7taADm6(~ll;aXW6aOFR`H7##FP7GF0|A1h$Tp_Ogsi3;>F z`AlHUTb{#NSBKRBk$#M}v*uWKs=E=D3$ev=;pY{=qSI8(A3kiTixfHT;!+@8OaN9? zj;Ze=0=Uo`#$l&I_BU_dyf=M$>C21cQtQh|O1x@GdR7OnDkxCmB+}G~l8*S$vJrI# z2WpLk(+>~1xKcplNw#Xd?Zjs-%hJZijT?6<)#i-!vZqy>85`eBP>htJ@GQNtj<1!C zjZFoJ{f43Xd&hx+c8AUbvxSP02Xk!)Xg_@TKqIhZvT-(Q|XQ|xW+{m|l8nUCHp^-(l3#}&|KYsk;#f!U*9{YWps1HMSYAbSO zHUILW6yVCbzjg~L$kb^~NsNRuAim_ouv8F6!A1L(Ho5TYt6CjMWm3E zZnzr~DIUv5qd?_-^QKH%6RT_Xp7WV&X=!P%P1T7AI!<0Jn0j~kT5q5T))~v0?)1}? z=z0!Z`Rwv7H&)nr+H7%oVeI~XliQ^^L6e#I=Tp&QjM$!%!(#X?-#?liN{qF_ zcbOSw#8h81U2`fzNH*DA`;iP8FwC|C2hJuxq9*M(u03LE$_tF_4M5wA zGjI$Ym*;%$wTbP#=TfOnOa{bTqomx~Whg#AKfjTmpPyXH+S)qTaVkN20fEQ7*FscO zl#`pg@?j@duDnfrjM6j5qFqK+HF$FBnt3-sd>mkvLY)&66BDtWUAgxoBjb<+nu}Zy zvrD>AbTz!Wfg=znD@dCC_AIUI`}cs}>LA00$Ff#euh!X`p>hs>h^P9{*q8xq8TVy$ z&e4YRJy0@BU{%tO8~7>})rRdY~3XIffsPY}g_`ko{L?ClOa9X5>=GNBo%1T*L{X6%#m6eqP zSVd@`siy$6Fw!$K`&t&wB^)`Q5B!BQ5b}Z7Q50aq{iEcULclTcN9DS)Fa{OR>zKGlW&@L};Z%%RXt!Q*aavwb`E|%ig zDey&TmY^plZnL*4Fatqku*F9w%?G&(SeSvG-Qvg8*U&EHoxIWG;`Y5I*Drm3zNM|L zEhILrXr8hpE;5Ifc^?UQU8B(HrBEtb#x2C%p)0^{PqbPVuObm2j>NZ)hV`KEx3T67 z!N!f?DS*Mv$$k^V@}mg*!I}=dA~9Tv=WMaxvMXTU2m5%{AD^XPw9KU7Q3xP+1n4 z0LDNif{}&A52(8~Qbf5W4EYM!FfJQWL=YR~v|D{~QKg-o9E}NzXKKQ~zI;D8G7>1^ zJiQG^zW!NTVPPSsmq%h^V#BA%`kK~ORm%!Gu$}tFUSn}OFcOGgF zX=^W7+1S+l>{K=|cwBnr+1Y0|8xxg}Qt0oirefmXWMEI$i>9EzF47Q}!hFDVQ?^CB zAMjOaO-;?iXrSI#_9K2s1xVJ06}}7_83ttNrJ@ExXY%HV+%WKt&8+>^**s1G5*HWO z*DpornTSqUa_PsJW3sZ3&OTGuC~^@seiyZV+YB|5Zb?T6J90>k!ybfuuEUs)XUD+x z*N${k!#VopH|63TiAo+4vgvz!J`=9AI)vv;$iO7;r%z}x2_gvFA*-#u)r&rlKNl?x zD-K>+#jTAQ*1d9GUS3_}-Q{xrLx9%D5WB!Hei!oX8NX@mCt1yo>)VZWu%QZ9DoZV? z4OY(Y>T}U5sjQ4Yu|S~T0lGA+T?7h#TU%Q@SW^cu&3Ylw88>*NbL9K?({XY%@d1=1 zpy$;d4F-nwF%q0!C7@ga=j(^^(NXQ&zrURLv1*6czHi0bh$bJU&+tt!x8ro{)3bap zEIW1P-kd-6Hm~OAM8;N07v_eB2Dt^*XP=1LH}O`WnCM4A$d@c($dyk(# ze;!wsES--Q?9xlk8|DOkrI~VY-po;Gjp<7k1a>*m@}QMx4&+S4{yFau6VuFFy=aZL zOKwRj5JcDF(ozOKP8(sBmCLLfNk@zD;Eak8u zp_BWA>h;myU+O3jHLi~dMBsG;^WB$l&X&$c17tV&eig33dHOrgkL?bKATfv003iQf zEI{2%Z%+5#QUl ztHz2~ZUgRuf|oyVfj3-_l{*Tke&@~|kCo+_ zVxmI@DR(+4@8?)2N2luy8+uTm;^lcNAUkO}IdTF_1~3b5?~J6hANoLs>?u1l+D_y` z98*2?0ZCdpsnXFiTZs)uA0mkE>0_EM2S`%#`s2rr>48ckT7MQ13u=-^zWrX}=1_AC zN89p1`4#L1YrR+Vg-2i@P0`A-^Nl5(tqY~(#jw3vHxU|-u>^oHvibHS zl#2xXIrTtzJWeh9&DI0XB83n;g{G3=l^SGhK`G!4ymbnl z9^rK1bmI4GkRYfP3WYC!SX;cZe7L_pHVD0TIr;&C!XF!m;7d>6w|4DXJ^bzX#6&4B zavVF`3%FrrXIJ6F;pWSB*#03HC8AA#Abk0HKmc`=xZ{RLj~?aP4Fxw8(!>}8`Vo}@ z>mPu;NHiOWzA~^q){CCT^;Jd5Ylk4GOA87PTJllHMwQ?R3j=z^#cd zSJL`KN6|5?B`4p`%}vJPHv6bS@j6N#E{>|tfrEnscez`{1^9b6m@kH{TLV}QnBq3q z5vAH6ToBsvQ*)=dkXD&|x1q}M#(`?9Kz}IbsF%nUiGnW7F>N)uItB2>Ba=46p5-mEiT*Mj=ad&*&B-;$GmyBdR{CRJjq2ZM) zS3WMkF+6|YaSc5%ma8d2{qB9=JQW-g!i6NY2eGw_Ttd1l zPQzo+Y`?DexrRys*f{JIcW3X!lk4A)QgC7=ccEAG4h;>x^yz8nA)BwhP#;fM#q0o; z0*d;)hL#o;R!z`t@mhw_+s&(vY!$N`Ozy4W3v#@S7m&8yq~2oiL;KRDBs5evaFQ9r z!onUCC>PvB50Z{Tq?gy)RV7_noB{~A9-zU^AY2|p#WCoqxMhGcNj|2CE}U8T(jDZq z%`Aa(;3|m(C`&{n`GS~jf57<63=>`@x3p{LPCx7eaY_0{M%*ZC6|e8^s07~@dT?lH z$l${hc_O2fefXfNmULSBg;|TL%g;~eYD_fVwG}N+o{Y;AkGKk|ldHv`)o{&m~v)A>-E!qir~rrc+pf>SQ~DRYJ#4MtY>0TmkS&9n)Imnort>l`~0~ z7}%eP(^s=NxuESR6s}w&deCu6Ji=$!%oYv~0Z?w-PtT?C3$p~KXJjxyIbs0W!sJxA zoLn6Mj9J4TN?(7inj3MC10Vy6a225~Xcsy$j*gDTOKko1dxL5+hhKleYY)bAUy^q; zKq&4Jasi>B#2*s+^=HB5EWc$`weJMtM;w;b*V7S#9fCaw^u8WqE?z?T*B@^vC!1OQ zVfbgR+maZ<=iGM@NBsV=Nuv5QeG`*&U&^;V-i1SLfK%5Dlxio&_U&_uKeQ1BlmniT zM-1|PNWQ2@@e&H1t{$}Y${^e+fCwR&8GK&6kxzr@LqW5UBadxDUf&ej_89! zl(>h$=tbW7{c@)=TmqWLP?_S}RHI~eGou%v@oeYhTm_g-ECY_pL43^n)C-4^#X_d7 zd`bJlFKQN)K}3x)DJUx(pUv6%#y5}aykMkvcvu)Y^p7VA2~de1y-yps?z}2KKE4#q zFjm96u+a4~oe!Y^{{HeiuhQg6rR&vCR?SCu;^1%Qr*@~jcOJ6R_Q%0x#Ruf&YPsL| zwFpb_=Q0H=CGw4M(r*FJ;d|?r{y}-(T5hnN456ErQ{;sV_~q4W6NWOLoLZIfgqryM zuP>DrP+2>x(sb{b{cw`7=qPw&7OZyvWH4i~(x2DteBNKHiy)^Kd;P^&+O@Zz_p`4_+q8m19mJp{V#3t&nJeO@8K~t z5NAD>*??WPA7ROoSJfvj?yqIy;xoQya9UXGFE7Bw3Vg(U#m3(2&YXf#TtqMC&m2+y{#5J2NUU%ag`k-$C_MfHrn+q-6u$N~ephVp9pq`YR4jIs14UiR(+4$R0`IWRnb=7j74gdSzp78TD{y|58IH2kSOAQW_t~ok#a&fH% z(@}*Bex&}^o$g$;(z!23w+TIL9J1;zBm1*^_Z5Q~Y=14^fA=8a++)5rq%I$CZ&}YT z=+&6e@texiCTfhcXK}G|u0<`x>nMM}?spIpA^z8{>)Z+pd86#&B91PS#K93e&{Jdo ze=d3r3H6^qYDA+3DFJDN^Uxt?&=SXPkkfPx5BmdWg#Os)M^o%3o!J2T&bFWW@1o_e z2;rL`n*n!Bf$Dh_XO=+zgf0fyP!2|J98F(YTie!Hmq~ef2pKHf0fRb5Z_0X)#{|X-9&sT$OTE)p4u-!zf2{MG_0U8`V2MKD)S`u=O zL05_Ahgc(h`7M7Pi|vmyQ<8>DcAmF#|F%u->40&m1TZI*Ga|EjD5{aY8ILv?-qTJdkI9OHJpaXU&!jWX2B z#l^*OP*|J?4lts@Ykql!i2$@9HNwB{^JBE+Kawd3NQy)M@8^I&S85crcr_uCGBZAQWOkuEf+XiP{g~v5J$VQ?MdWbxG#&G&`kS`=I;hw& zt1i;N-igTBk zomb@}fCVy2%+s{LT4uQe`Q*hh28`(Koi3)8f8#Q9NUkk2Vky<_}45L9@3IwxDJ{!`P66C+dVKrYH)$V~>IVcyvvkK0Kj!|;11!b&~sx6wN_7oSxKZ`#@}u7up84ETrDRD64p+X%#q7drp0uW#-V=Q7iXDiy(tuIb0O zPjA)AcMGMcYyB$QpG-5ZH1Yr2%9>;b3-#56mQVCngLj|-Er6bw9wc`8t&O|Jt5_|& zKE$6SPB@WrLg`oIR6y=tH2s(T9?Q$S{4^ycMK!0ru8s!uq*_eA>q2(&O=-2A6MMM- zVB7y2D7ZDMQFORMwCv&(q-#q*n}|-W_Q3%v6!$W4hGs;i-nC?ZH~W&x+Yy%LRrs z4#eG}4cmP#b?X)DyTboao3h4ZB3VXtbaWth-fox%p^mMMFlCngg^*up?j`h?f8PMU zwoj%9f`fx$Q87(l>U3X`MXNFaa#v<#4CbrSa9y`sAoYaJn&|;!2?Eg8JRg=!>aB;~ z>qUl!GQzV+)yDC^Av2MH1v~|}pfT~8yhe1UeHwrbMEIdCWHOBfmspi+-J9+*XH)Rw z!%3oHTfJtD_4N1iab!zdALkmcr$V=mP=?5p6XlXjdqfNX4!)FZ#2*#dj16rEPDwn#3P3;1OynqG##1ZjH8e_UYoAw4>-JEMg5p~ zC>0eI;Ss@&@qLtp0v-esmiJtWfNs&&!w#br*s}44;~ahSETIbCaLY)PQ&SJwz1lu@ zBk9nPomfRh#X}I^i(OoQd;H$seExh7c)*ZkDhqG=*R{3BY;1(C^EP2&wez*>JV2WfScq^Dq@B;PU;|088f6oLys$+ZGYA$r za4VDF2SC`78(Dx9>FVzG{QC7GJcB$`V*p;{K`%84lj@{Kiu%zM^?^BX0<8FbjiIfr zEnH`Pg5yb_KlBFe-Lq$#*J<|QpH5K8`!b-b<>fiX1YH$l^P;cmZR;-;wN{GSI8gCV z-vnx+5O1kEg_J4kRB%6{AmZJ>?uqW7(MW4L8n4j6 zwQ*HMOGEJ25aihKdI|J(AQx{vJD1kqQV*Gp&`JBM%gfi0tkC?Cj)02p>FFVnU=TV4 zJ(Sdi@Pu1~L|XjWq?>EgM@b@#av~NmlW#p_MS|OboP@Fl@tToDzj^ab@N=XRC^UB> zBGyA3!?sI$z4rC+rDGZ9_o@f)_S>P$97i_fO{i(u!>j^Z%VT2t6?NQhQwaT z1c1MWM5x@jkf=sm@kH2{y1{}Rf_Ew+;>>xMSe+N5LqDBh2;@My6xeUe>p#}$e<%_A z3VTe}sk-DnMalwSqCnN9Bg4YV5E>yGS~)W$8+8xIL`Fs?;=!-C`6$|6;JESDty@N> zN=6f|PIvpCpc4kIDRdw)b5>HqT#0Dpp#*pwr+UE>NG=vl3+5jGfkcZKL3*#*ZvM^? zN92+)3qGb4Y>=awkr5dmA75NG{tgsX4e`$xO-zo)HI%fqX*)*Q3LS}SID%+PY=rT0 zH`QzWT}d3-SX4FXVZ)`6O5^?e_ZvMbcwQ?X*C5^)1^=Q-k&7eY?d0L%0UMBKoVHux z2jvVDWyahQU{1=9qi_ol%qtM5=8H>@%wi8&Z-+%S2rZw~RSvXeAb+J{T$5b*o~7OT z^{V#*gy-&_9@YN(Z`Mln^+afH-?5_s;t=cr;kSM-R^*v?*PUUc)w_K8{LX&0&7Fx% z)=DxILsRXwH;R}GOnfY34b1FHLOc%Oe z!bn~9v!H(HJcpEI5GZS+_9IDvRNntuOC`-ppn=RmS-Ji?jZD0Zi;Iir7s-?I5pAfy zR49+L-98+YAB&P45`XhyM1>JGGByScRH|1RU6Z=lG4?ohP534G2$1lrmp0kqfMFL1 zhb^?gb1ex?!x@kZZ}4?Y#i8|{{3;ItmC;Gb$+ac#^6F3;q*!$R|5yQ0xsZW(k6uY# zN6oBfC1__sC>nr+#m)13Mwdc2-RnFk3R@gZ8Q$s(-(Ok*srrDoG5{;#>+fIgJXYv~ zdq#nwC%p&IhmIkCSqh#g!WAGX$qw>Mw%Bcn5E&plY7}KA7ZzLsNCFcj@R*yY{WKXzQ z39loHAZ7~mq3zTJs-Zeiznpg;*Z^W-A8e>x+}x#a-mE7q?bv!yd|HE!Uq4Q927Bk_ zi9%*O>CAkL{O^jm+80>D(hVvpg)hA?(aTqemx1==_kxah>%9xzl8~gL4q2~uU!3rf zz7Pdd8=)P0cXTGH#1Xd-1-u89`r1%@c*wx=Y%d)Y7NxnR;k07)FFU!p^+5n!`k2(P zzwU+!uRKA>Art&lOA$YWz$1WqhE1OWs>c>_Dvp;$AA z=)(?#_Z<0g?f4}{IDGmEQ#eWhj*~a4>gxLV-1}ywt}sbyWL{j4)0EOyxS0g_-~$}; zfWSC(O2IV46?cDgeg9A6uJ%Cg_SReIf8yY%kgI=b-nw~*T)+`DjMgAU2|F^TBC5dc zhM00Hw;tCaph<4-p+%@U#Q8~w>Frdkq(uERF)9EpBOM4Fc47GH31$TuhvWgIAG86cITNR_Gt2CZu=jg0+VAHg>*_%d2%A*1dSJ(Q|&3cGk#l zc*D?GHHUiz!kS^@#(Tq4zumEwp!x;jhK&-tN(u3Yf|8PVTyoOj^5t8Es|Id;0&@p! z**}hZ@5!o{+`oHai;^OJ%tp|xbLpW|>)X1Z-t|T}X2M8V`v9!KvbA+3=VQD!mV0$NUmf_>H{0 zJn8e}5SZg&3qlrJGhWQsHnkCs6*|dT68hO1aFl>fh9w@Wprjo`0?e@LmVsk4?$s>+98V%`d()A*CHSXN6>s?X_^4=s@xT>=Oweethuj(^;k+5*BN+)Pl!TtE&fy&l zFU>iY9O$lZ;~=m~AYtwxays?^i1mhuD}rK%DrKZr%tkW( z{QQu=AD@lPn}a-I6Er_vn$UDk_eln07DDO~tEL7amj2hpZz}*szk7lOs@imp;@y zC{~dxnC$O}W~B@HRHSS1*?ixmvR{w(syhwGVDzC37Mf7!E3fkMxO(fMQ}kc#9QG3-wZ)q7Q7%vZpfgqfB*85YV+pJa6LQ$liv*j zoai?pP6vkTIIV~7B)9PNLjErS3ln6pTr$4ojvlU5th*|WF4-X!!6xZZ^`UgA>GLE7rJHxIE#|Ve(4+e5-W)B=i5($Tk z@DjPeJkBi|V8Fd_Yn9UkTB?f5%F^oUU7fEqk0tyqH%+=faZ|v!V4_%P50i1Rj$fSP zF$z!kgJ8#5t;?(^d*(6KDAH<8@0R_>dLR+Vbyp#c%K*DfV0?k_-E;X6CYnEAYL69O zJgMzP*|kE@VEr5Q7Er{y1_pf4UPT%(*9ZI$M!P1VXE)E$`k-KlT`q5%H}s&31SDfI z*Ttv}(*A#FU!}xykWv50A#;0`!0fF5_)6+}Zf(_mYs};vApBXKZVNPoWrNfODZgxP zYJiZy&(J{C$jr{>Cy4X^qpAH;Ni_1?Uz}1`B`2Y=^uu5vCqF+eiLjG6z=@NWTNeF4 z?_p^z7ndm}bhH{<8|wgo;OLePBq zz$f7AWA$g1=G zWQs2@pKi)h*FML$Df!iSHZ8iA+Uie5~^!Ph&FdkM2GKfZH zd}#&OiOq&}o9MBlqM}IRuZlfFL(dj1ysoJ!gCt0P?%X-An>Wka+u49el?$&&+n5A@ z>cw7lqfE;}%K!i%Z39gK-{di13qg~2ThCC(X^cOU=3+%6g>iG=KP+Hq7{;ttv$L}k zMydxQmbCCj6Mi%l1d<2v9henDy|1}K7=2M5d*LG_d7v~BRTEJ2C~opJMi$|=c?hha zJmQXDCY{gPiIF{mpaNMb-umsQ6EUiQ(HBqfUl3l>oo6g=96Xn%LnOYa=xBXRc%sG- z*gS7^46YVp^cI*MOsj#xcF2d6U^)qU9cHI^n^3n~0=XP$I&m4tPM@ZSd&UbAUJovf z7_r0--6uho9A-x5j;Ky;r@xoS9lA=h7sX21V~FwO$GjEP*2rP7S~NwVgu*9 zj>wNl*4972oh?`x%_pWBPv=-%FycVVTLK4&N``?fti1*x$^l%pxt`N1Be_~Q{)jH-JZ4e^LCzzV4+yIc^)12rK>Ig z<{HIr5@1PufZGglb%dAm+*nFf0i0L0lGLBBY`^iarzUz%8tRKBQUl1xQRKC~$$` ze!3x%F$)^3g@6D{-$(HUSS+f7xlbxsgf0E$1sMNuQeYJc)rk(6g}@l_Oy;R95O(yH zxq9~y**PLyQAo{nnFw_P?HceHyhj@KYsi^{v?yjvA zOTejvEm^z1HKoy!5L-?+cWByi7dg40Rd<;mN+d82z&s?xeGa2-12!3Kdf?9?QOd$( z_fJAsPqRbN^0T+A`HxFEMcE`*ylNc&d8sW|kZ{=nj_5;x!vM_gFacd!61~heOr3&g zlY=CN5h6ciE6gEp`Cg755yyWoVYS2PWEWgsD${qF08oXmd_FZ-2;mq`p<|eAR%u0h zKune*YX~)WfSHM3f(+0Aq-K;MArda2TZLS4ljlqV^PcQAC7^-!!T0sT^b;klTe2u? zMZE$%a5%&S2NUz|Bobs?eavI)>UIW?W><_;Is^x5rrb%qK2E-7P0Oy9&{J=k8rka1sEi<(D`Bny+I7uT&B& zB#_Hl14H-VmoI;wX@yN!2^k82SoJxHfdR)-gfnsB7?1wv3m^Kp!rm!PbFt0kM{`|T#VfN< zPZ{6ENL)!DfsYC50t*Q-%!$caVzis+$q3mFD4_~XD)qEA%oH~?hrkZpKmh>rGxc5T z*{2>H{+cn6GP0oAK`I4&`O?}-7?FGf0)(4kBuS3iN0^ZT+i!pU`c-`41(pWY{At^h z0}>MLU5>B{5gCOTyv2-%<>~qkHQePmhHMD`6vjq!kxO3L4XwtIE=)`(=EkPBA3mH< z6l5aV!!P`gO%bW>(8OSI-<8#cpO-B2waPaPPeOz&LFcj$gG$=DR;xhcmF0B0`J+`| zeNBzu7z5qod&YkzNV zGPxl3Kro8Euo_oE=p3JXD=?r55WXB#xNtk@u_o)G<;W2pu1%iwuYMdMQEijW@UW}o zPxA16eU{(q>{`aP(y}gj_TAqwt$K`JR`{-sG|#Hb@=2UWD7?H#kGzCc?M*tQZtgiB zd}MIOi_;hb??@>HMVY9zCP8(v5yacKWF**Qh*~`ag3M}~M<~iNZ zHH!(!_?3~XuTAL8Ge#L^&la-;G!?;-Wv96WyjA2aD0c)GY?9;7;|Wl(Im3MSYdmS2 zpz2C-OJPjhmn-4zKfbVu*>yn->IUUu1bzeZ!+m&t?(B-Ww%P;L;WqjW%;4OvG$=L5 z7)R^fn4%?u$q6uMuEdxEhOE1ESLT^8cpeW+cL3YrFo=lS)6FQ&KpMm_-@Q9`N)V*) zFj;?ga2g{mge#Abhv3)^Ms-2LmP4;WN^dcPX+9V|!L1_Kg&sSx^^}wZY4J8k0DTw1 zgTXIIP=pxGtL(FxD5DY}PGM{;2Sy_M5jeSiSz1DV370kVIDWE(Ac0`Q2&Vyl<+R7; z3^Ej)Ui82JFC-WM9OdmY@*u5G+{MG|+32*2Ja&a0c`3Iq|6WJSa@!Zxz_Z$gZ$Hg^ znqCt=7a{~5UUoiD{q?yUUJW=V^f0~V@leZ7sxpUuP1v{n%}ISVEP4``h&O}04kc-R znd6kix;m#48pZCwM#heo)Tg}Y-r1+J{4>w??`TupxzX?OSFN-d$E|a?V zX`fEZ2_>bf&dy?7Zkb}EFh*ySG0W_kc7T)j`q%hEjr~cc32Fp=iUXH{@c{q8o&_quyR z=db&6H1`wRNOr`-q2y{)OU$sTt?kFBDqd_^F}KheJ2UF*w`1hbDakui1HL`<8^sK!83~|v zH!4bd@F1$c5ylwspqQ{4!IRGJM)BGf4>a$9X4}BRQbQzjUat*z<<2k%elqQY?K!+{ z6MKSLRL`i{x)b&CCN)))`6OP)zp`B4%PTX?o9{16@3M$=XBAD2qP+d|hqk!| zyQD*NVcT=lELA@RipwJ2g7P|kemypM_r)EBFlbQq_AN1kvd?wiZYFAE=e=9tF%5(+ z6tT`?tn~@RD@gGw#cr<4KhxarMns%vm(3jwnWLD2?LIgxtP1FU)zVW1WJiMbA^}qv zpw)R{;cD(uc-ms$wg9=mjPvb9#aoq|U6@Hjg~NC83Rh@n}aqk#79_gn*$i-d|qeI7Q4nptUS@gv)IcbLvaSvRO*}-YXnFn$Ds~yz<1Ltj@q0W@tf=j+R97R(vL*1$ zPcY;)+UbsF3T<{ZR60>SrvpS6ANI`nypM|MMTIT;!b*wv;4D08L75X+eQ>pvZAxT| zo!Q1T%WKgo)j2j?KTqUUZu_$(D%Mk~2}ApCi%JZnUm4_|see`d`NFyAJ?C1Db;SCV zZaXanEHg2l{Of))!LW}OuejlpkAvsD2I!Rs{hwLtV<2sVR6TE zMbex(0N#y&4}d{@p*4lWqLDXaEei8yJY1cJw$h?{RDw37YF9sd7FCu|*wb4_FjW?V z#>v?X){c5qUtWQ zLW3;|S`iMp)m2p=Kw!;`VGKolalG7a+7P{mLx zv^u1W^FNP#)3G=mFa7^67JXlzX>0AUYYm%<_17hBqIM_k2I;Pkcp@ZZ4XOkngR5=%z-r-(IkooTOzVVQK|&?h^%X z4)b5&hSx%;Mwsn!0gi-Yp>p01-rEJSR0QP7>*$=8w4JH#E6Cc97K;_W zR@D|gi3x+Ra8iFo8ule(F*RZW5(8peEC`cm;Gj2!lKlL) zeb}9RVWK5j6iih0MAfLC>EC+<1hEEeBM+IJAbqu2SF?>)12ZP$$z;Uo)aE}Y>$A;k zUHDsqCgVn+S=H%vj811)zse8^>PyfR3Du(hgCiIiMFi4}AKX@XCdVKya9<*ECSjd%L!B;Dfz{5FU=Azsq=9lw*!;~b@husO0HEA0M zP2Ui>4pth(``HG4iW_-pZ(Q4&z7|m)F8T|1Ro1;NXtYB~(c9lL>pb46Q5U`v#cNy4 z8p$wx{L86d_z$Z@c0^8zoTr6{px3A?TqyeynBYu)+AMs&86Mum zn3D|t3c>>t4IS~k9JF6Eh7dy^g0KSHkq-I8xb}W&@ye2AP{U&1quTn<1eiv42VZ28 z+k5*k1lU{Ez0d*;F^cpH9Ri3oBt5Ocm?DDa(O)P+t*v0skT6@Q{-{G-AiV(Xj?Y0a zBL0JQJ(f&n^26zNa(uPxzf98nbm^>dbuG+sO`H}7Bir+mF0<@XiXtZY3YO=Gp^yfk zwYU!3v92{oUcEp-$e zS;S&1y0&cx4~F3>0&o)6uRgr;Rx?7Grxzdy#}`u5j>Cr|ok#P0K+|j*;#)0uG2^wO zn^y*o=xEWxs2}Z73cCNKkC5;<^*|d3_VCX%Ig_{N-`N=l$Mx$SV6=TzF~)KI#>$Pm z?$Qr461)rSyk_60<)quYZzv8Nm9vsq@uPa~%O4%tX_~#c*i9kgf5uc&Cg8(}zzaUK=zxYkQU!X+KXY$?HlZT2VguPnnlE93DY z#Kb?wvGg(Xw%e}^8X2UrhiDT5by3_1ivh@4e{3}aE-@(In^MrV*P?s@-G|1x--$h`|uVGsxglV2|1P;k37t>GAwv z-JzR-`Z)>LCZE_=&Nj8PSL+M89b}l~IDbK^sV_g4vu7gmO)QeHiidxaJo)WIf!$Lj zi}_Illj)VKD^dXL=bDQBV(Ws^rOASN-t9f}_S@bT2V)M8;}Pt0BMdW64`16B=jTQ3 zE4*1GKAC1SY*k-4o1VTRquF$c=17wz`Q69g`%`FjZQM`qBDwj`1vVWlkn?aLxr^ss zPe@yNlvXS!*5DD!_v}efi*I}QJu~L z1c+OKf$_lnhqm6{Fwz@c+}Z6j$Z@~XkoKp`@I*-(-TMNkDayHkh?9-NHa0ftnkhC@ zlj8j6I`fuFuqjjbcN8Vn6p+nMw^~L(mPZs5Q`U?45k+X_X|cf+o)Sn<;YUHk_NR4qG}p8_QoQc;?FZ%4_gYu^ zDhGX)5SS{+lJay-uvk_^l*?Xo#>1Tn_E8($`&4)MN>&u*qaTd21s}~l2 zd7WQ!^Q7|qZ*(ffM<-QW=cY!zt0Ej?$1I&=yBtfO-_}wz(A-`+b=~p%Om$mpfAr4& z)03Ptwr4giRb{Ln@@XGF$Ynji`xPIy|#k&t4adi?`Bb?)~}4|<>HD>Y^b0ocggJj z*2C-hNZ0ZaezdQQY9MY;eCP=arA z5O!ctqkF6t&{7FqdcQ5kv7Z?DAyjgZmlt8Ts+7zo+TSXJ46AB3hu{0KM~cJS{n&Vc z-pT>Gp!y`8JkE#uCG4)J7^KZOZG}~N6zqH~z)?I#Zelp@HS=C%y%|Kgy zju`cjVt*+rh9b;JPh|u-#a`N zI;Pquw6ahm!mo4x(8kEL{bFNnix<}92SWT-xW6 zA7bTpxSh`UBcINI>w2*T+t}U!I+KxQE)s3bsr*;2I{RiLJKNsoiL|iI|J=O%z@@s< zRi}AnXiX#)|4Gj3cR8`*sr{?vJgwWjG zESze0)_W?oZhO;!|5Ry<{#;L#bwiu3$+J?A263O4I5CV z41DnA(OufS-!0DW!$8%>tKx;tvHRb?+9P-P++%~?eljpiU$k=;<(;B+NNkL-)F}F7 z%Drn>V#R{X%F>J@9>?Yg-Gg9cq*mtWKq-1FotZ+5(va_92{Kw41k!yxMiI|^-499W zaz@bJbG*N*I{0GrS3odfvD^zk8B#cXWHgwHX?Mq|yz1^8Tp80`yAVG7)`TwH#hriG z;@rDRsoBnCD(7&K;6tPjI$@3j?7f0xSdEd1Uup&i#1$#LO}^((7k6;hZSyW!Ok z$wNE%M+}bZsZ5{$%wPAmxM$f$N?qWvPlO4-{d+Pa<8M^q1$n2*mieDdk@mi;X;MPJW9?ES1&1Z+ysKM9)0zX{;3O^W0glFDX-%Lu%1xrwR@k z93J!bF5`E3uAJMvzTbTM-8o73>dJ=U16vP>vp;*Bmrfe6)&82xzw)5iZh1zCVIpO@ zn2&Xuzc9BbY(ViNrP#`6uM}?!-8(0Xxw{`dyge)lDjVrAbAuI>$Se_Qjv%{F!y|O7iXe z%_8aR;!hft{_1NtEwJuXKU=UUy8c^!ug%ox(1vV_OMWMf%tp3SdM5+NZ5L|J$~)=X zJkz{kf912{3^F38PBA6VzFPo3gY7P-hhw#W#FO9rg=G?-!H$8HPyR?4_Em|?XH+^*~`0g#c z)%^-dzVlr!)3%0BVvIh!nGb}pm|S3gyiAOLMPQyn)!UL!p$Xnw<11FQnr2;FV3f9K zSlaf>LY^vVj&0?(9&^pa#(;(|JHZ_8qmJ*7O0CM6`=~=1Y1!)7aA3|h>Fe_~d<{Kz zo_jBp-1eP6DEc5G%A@89O#r-7?1J8{bQ(VE0v$ZYY< zY_hdX!sMnCO(>f-Juoj&csAH4pz7nhbhzwR`T)g0r`5$dC70#8OSaRi!Lb(YFq$e4 zH;TGNW2>9yctXmM_WFIs6xv9!ex29)oOKeX#_J?rQ!{KF(R|ZswNkI-==FRi?}{6@ zsluLR(sP@XJASF+UvU2Dd-Eo%1hz%H5tVCB$VY{I>J0UnGh-}m*RD#ONj^|bI~(!I zonbduGA_3?+-qa*OTEW09JLP?&e`pVzPod>;@H@6C*S?@0jC&fMhD+C{Hhr+G#+wO zuGIDu-{`T9**x9n238N*_k zM;mP^@w!K}ya&9qGv_jX<)mIHy>-Xf*F}e~;R`k6aH8>U*Tuv|6Yl;Y3sZ@v2PvI}$cLo3++id5&DxDxd2%q)|H?SG6}~LfYJ2d#X;T zDU8j0UB{o&KE^5Ar)JKmZ}sI`O67Z-w3#{{ieIGS9G~igU*Zm7=E(;%6jRm*$eoJh zou3b=pC4ZRF0u1xn#EDShO(}`yOv4K(XvQ;w%V*+?$Jgie*b>J;psbunjtfDiVOh~ zDQeYkcxMv(#)~eUwvPJPXUnrWU*VNxTfX})RTJYAc`tn#&iXza6M7n1+uQzw#hlBx z_6&RX^LyO*Hy8IygH@#`+wwzpE|xs*NN&C#XWAdp;_2|%lJ?RK3b#r7xze#0?j7p-LuZB*UT8PVNLH8t6(=ifd1bobRlz)NnX(AN23+u1Pc z>Q#-p>~qa09SftV!e7g)tJ6lFC^!DFhbJueeOS{6!CBS=Hnbb(=b6n~XapKKE#5p< z{lab1Qo~e3eN}KFDm;@#wDZwWeAU3@xvnR1tNTJB=l!QgK(LGJ@vdmvK^{*j&Az*Q6FVu+_S_jVu>-UeS2DoGl>mcE{CJe(oXm%n@FlKjg zi*{9eN3lO$bV|winwaK#yT+Guu0>Y+jQUJkRao4>`S%_27pgdYpFSGM8=7+FQ8oRj z`r@YT_@zK6&jnB}6V;P8gSIyojUF9!;Pn>@M< zI6@D-F1SX^S~8WRH}2!*^-kb+HLL{>Tg>_pyA4KK4}&p40VlENvx@57sy2Oq>swhe8oTFJ2t5HXHFsO#S8zMJX#P8i9&KK`R$% zCez#xjZ7u>9yFZZz4e{vjsb^1XDHj>*OzmL%#w2lFiAo-4*bF-po(t?G-@NLyWS{= zZ8$M4@JoE7^xmK4Jqn+M?^H)9-2N$it9ZNn?qr4VO!J;a^#!NFhEtvMB7se+52xhM zeYaOt4uAcX(Jxmbr=crJdbR25>C4kQTNt!{3VFDd<_xMHu)iWWqWeDV%=m+=I!lx^ z+nMMm;?vT%j}Epj$8FZrRXEEZDLodHqEd28tdrN zq<*I(rMc$Yw)2UNs+yWi1j>b~;B7altzvVP>Daj}RC>0J6~fb&P&W!lQi?MTid+7W z=c~dd`x<+v`XyOfPNVvE-M%utMgN4aCsx$1%-$o`Yieay%c0{>r^ex>G^-@pzevab z>{Y@&XQ~6Pn(UlWp`jHtevFg^(`OY|mL?9U1V$ zthiUtqFhD`iZm9u9HnF<@7M+>>I6A|+}~`J+;xuW$A0~{iuOVE-1k}uiYmLPua0wY>~en>c^|`F*Uzx*i0L`z z@G;BZJXx}#y==l@!lOy`V<`9AdEWs)CdYgC161Zy;y+u9h(sn7MrrMDIc=5o%lX;T zD*Cp}o4d;+T~sc{eE#yeBA?c^=#*Q^F9nNDJR9@B3Ohs;>cOT`H9W?efoCgq)CJoH*(V?v`!2g+SowS|)|l;0hE5#O+v-Z}KT zn*6riZC8&fa(Wn!Gix+_)yC&0_J7=+>9P>(WpZG{&#!57dVEsU(@xfow1 znj1{;HF(Uu*C=p5(f6*nw@uwz#+2lFU!)Qf=&M-?AA` zDJ_n-wlF)>)nDd2Hxf14*~vXGTGzGY*m=O4I#s^i=+o=-HW0jk@ z^;B^No6XI#h%GC^t(qt;QeFAnv?YRTWoV=32G$qKE)8)HeG8kC9q>w5&ma4xqnM|` z{DNINRcZa}Tk;q-W(2I-nt9SqIVZEA#9eXPXYWd;wZ^JXI#W9fZ+sO{JwI;O-k;*% zN&Di#@sV{>2a4B?v}F4r;G{|Pu;3E1p1;0U+#BS;lvr_bnHO!>QEJ_UzVYsq{Yifw z7h|{VKF;3!l9~1Le08vEZAa!2c8jjq8y>7ZZ5=7`uDLTlit9!8(&wKo+*R6`Cq3%Z zB4g*4&tdwZhIgppw3O|*n~O4erq@KgBqHD5YTDXTL#Nf&)-g6JW_#0Abl9T!N5;nu zZhi-XtZ#WTC0;hyij2&id+Ac!x%;8%_%c@~t(R{@p9Gw(&fooNVE5V2UG2%E%jF%H zM&IYR3ymJ!H2#aFM_IGr_hK1c#>Am{!@pDssuAd%S>69?>R2LnJM+|+`l6d1`T{ex zE>G2!gTCxDzkE3(SM?wVN2%1fle2TqLa&{sW>{{l^#1*g`L35PJ zv3L2Gw@xc{wHpOj4IE@tif_n|2naZ$8FuZyAkf6=%Zod%fe^oS>wKm%fFLN>=DCQ? z?HfcNuL1BYA(T*eM75Aq`ht}Y9(8*OmW`w24F~C3_!@5ob z4B*;Vfi^fLRc@TV7hM-fpZDvCf;L3lQb0~r>9)ZfrKBbIy$25nn;I;J-rLNU)@S`% z&1cU!h(~gJ_v(m*DugJ+$pqWk+U^Oj88h-JyBWhhW>mLPXgsa)l5B(UT-vi|Nc747 zo#T6mznIc8An;cr&lOxngf!udQe-Zo%lP-fGv?uF$90nZj&OFXVduebA_We*O_F(W zGL^zT@y9lM?$SRatgxm(VNGK`zqxCExxI?=$yvb*^AEp#oPAdFWh&mL_oIBy+^?|k zOI>QyLfL71YF525O*1%cw@3ZCyi(=y>QT0`%S*fZ&3`dfu?xn0Z`!e|k7e_InmTLR z^LL(JDDMq6EtO9&>nu4vzF93gETO(CHK8?P6_Z@%yQuyIZ}~{Y!W0H~iT!**I3v)9 zKs&M2GLw0J#<0JAcdd}(n((8a@1GiJ(WrA<=I_csv$LtQrR&NFdhV+W+SVi^$&qe9(i*p?OyVo%a zsHt65IGF5umER#LvgdJWYg7g+1A~M^U0Unq@2f&s7~56_P*>GGt?L$X?6_Ju=aR2r z2oBkDnz_uZ=w0U}D>$mywuhUGNENL1Y`syY|G4d@rD4C}6|78+!xd*~tpY^N*w6bI z)Cfm}FZp~JJX}rPS)LfWM`P5Zso^CWvlwUz@fBx@556Bi%X^5)(0Mf!bRJxhb0`O@8|)ITWjZss#F`JSDo;R>Bc z4`*9H>AMw!+%FYsldsSgM6f~Xfjp<(JH5w6Sz{cnw~b5E-Hl1S(rS^saCxWpdx6qt z#xajyDw@fzG!o?H(&A!nYMOgI>%y*{s+a1*&^fj^A^z#1;LD7y$F6)cT;aCyaNMP0 z+Y6gAR8>Ph6>=)uTV77}J-2`P)S$A>iuExUFH8HyC4?v#M=*sbaydAzE6p(yiR<>P z__B{Cz41oHCBc`z-#I0t&boCTw%1+BysNWRqHuh0AuI8m>-sERW!kdnAzBf`TVEX0 z&P(4MOz?AE7)rKpuaQ=b+Ii&FBOYU}R>n!;!Y|p3)0Bt5(yc>T^K0-Q>8L)>BkT@xrgc193k$%T8%_-p!5|i*@rtp&YU_VsY8zYN=6MzGLwC zP}0j%$(DoHzpT$FyCm{BIn3K=s+{g8_tM3!6^!A2^(n5daqCSyf=m50ZWJ$dyz5Gx z_Zn$7r`dX8-Zw(TKW27b+K)~8aH*WAke!pT?rqk`PKs7+QgfUf3WWiU^C~i5^WW_* z94oJ_QaTYAutN0vWUo}smIZ@91QdIc#0zaNPHxz7b^J-0*OGp^%S7jr{twH(?MzMI zdJdglu|6ZIW_sl({Y=d(f|mt#Jy?woE#xS$*F9MOi?Os_QC_m@Ng|uYo7~;B-)3$f zk*q7AwUTO~O{e0@pz5;gTHRU|d_jIoO95@eI8Dig%nLFGawZ?kX67>AUP#Z8PYCW` z{4(0*Fs0sdL2c6*&5N>dYWE5ImP7UzQXz2ERdb77%%f4?_>-%}m6;({!)(0?yGT>r z+Vx3|mD63b-xj-4r)dT9__-^GyyTZQ${lUjuvJ^`6LTRw#4F+hb9xMeW}M@a*I*T% zZyla5wR`G$e(o^M-dedrbN{}wXoroTtuBsMbxge)khYSpe8VY`pi<`Za<${Y?uNSq z3qQC<=;mG}l}<0TSqqQtQDhQ*Yn-kUMK3plT{fDA1&3?u1Wl4{>I$N&o})-%zs>z^WNy`il**J ziMVc~@WIaV*V`F~zCBNHu4G;QgnLcMvRx)7hW9505H}skW-dGkcORrhx zT{5PcYty^4reCAra@il6uMett)v)_?RCTP-)#S2DI#?rP8LFl@@FIGwd;D;CDqGu& z=9(9>FU{!Bq_a}BQHK>A>DN878+FqHvl zl_fPlvjczcASNp>yb^j;x&Cf`<~l*Xx^dq+u3DF%GW{Dz^R-h)=5I*!TAJ1xR_5mJtA0M;zN@`wXMO*JrmdBaE39!Q zmVfDkTUUBZzW-RLS&QlUBM;-)3#C>H40cA9y}Y=0u)TSu@s5qBugKQNTc(a{{Nb|s zap>qd9p8&{dtKFfM{CR)UPm2NRC@i4ZqvE_x@=3v#Vqp!*W_-0@eJJq?7VB>{8f}TrgO`#_VjxZCfV2(DfFR_GNTs)giOpN zy8RZ5rF=eL%=}P(FnZq*Lyl3v-=JPs^R%kf!cI zeSH3NrLIFncQ9+xOAUpL#9`0MvY)m2GkTpuqfwu#`libKz8eY$UTxC~GvErS(Dr+J z{|;Y`9qpR)R_Xn2x&oChr;p04`RegGcvQS1O*<(yjA1sfm04D$#Z9I*lP|c^$H%e6 z!@`|vF;!Ery_usZ zak@&${X`lGu;hPvDyvNJPd+km1AeKHn0(cs7oHA6_f!?YHRvq$`~Q zS*XXRFQ3IWBwUEGZm9S9s^RsYBbQj2)D22+sPc8iDswc&I0UV^6VG==V|x7!ZK~W; zZSf-G7OwtE%Ocx{efTCy)(u=WJE9py^U;3#y;SnS`6b1%CaY_iz3L|ItmZ$g^v>-R z?ObQpc{2-2`ODkVn5p)>blo;3!nS>(M3MIV@=b0>4_|mvlQ(-bvc9m_<>csj;4np( zUhD7Ipy}u=@9mq&jr;Valka+tdeWZ7x%|r(r>Oe{AAByVdfKtxpH1BTA(Q?e(b{t!zKX-Yom7{w+I|@n@%rjoQRJMfUKa zE*bTa&B=p1ejRpxDxN((aZ)?3-J`7Wr4s*vbGKSv++z=Z`_P*)Gjr9>tg*60mGg}g zxxIF_0$Mi{dJK-(Y?6|9d^6l2p!KOfJjuJ`W=f4r&LDTjDhGBqT7~@egKjh{=NO#L zqi@dI^-Q$Co|-il?N(Fg!@v+4Dydj6QVb;QXz)6BtWr!4zN7o~#FD6q|(Kl;@5pn$8Zw)5e> z6aGWKyX4is@9*@fcwLpT%jn$0$m#!7;87Hc~K->%6KxNT38Vb zId{;BP9vG{e?R-d7L!lZM0HdMf+h!-e%e79{v9OL-^IxKkZF1#Q^t_}|M>iTFh-Cm z^ARdgmn4o$Pjw5g@o8n9_5?cAH!MM$|l9P=AXuFE0{7lo35Q*KJ^7~yk>SZt?#pm?wXk^ zGZ$T%JVfVL?#(;jXX{^H_i%AzNukI4*b7yR-8p+Ir(V5z;P_1Uc>nsBmp_(ujXSxE z*3RmWz2&$nk;k!8DK1+=en{Hod?M?aPt|6%_oQ#r*-Z8(ID}Wc?Q&mo@IP3{V}osq^4|x)&L^PZE^gvC@oBd>l zDZ0Cm?Ax53uDvR@D*w(cItJ5m(?zpSi%fF{^#!uud9HDD3#T;Y$@G3{O6E#GGj?&f zC*54?s*UEYZfDs-700J4pEz24Vivb8?(*j;3S5|;8v|{#(j%Hrz;=*?w{wu%UbJ?ov{%as-dCsS6dS9#s zcZ8mn+Y(eYumcSStgecn6BoAp%`?MP&F@hSGRpMljEV7twuR@y%)W)fzs7q0{5+K# zhPPbkD*}c7W8i0%jcZb%sN(Zg+0C3_Rk?WZdvNOt1p8p6gLlhJSlMg;X59ZpE&h8h z1anjA-!aE=-Dk15jFWXt_A=)CC=eZ0OTF>08z5iBtqOrwn4~f;IDP2RISP5lmD>aa zUL4(}>6M0+KrkpV=jj9m$N!x2`0ueHd?(*Q#Rg2{a_-sl@@Q=^?c5Hb(CLM$`Gtj( zwrh?|PE7^0Z@SI#7pWeM@ZTdrWb%m2Z(t;7b8X|=ERi66mjAqYjD_IB|NAFAdz8VD zx+J^O<~fkBcBi4pb9_P@{r7DCj}ap>$@G6d^B<>$6&rZP|Le}_dNp^cR{r;oo*oi! zhwb0Lf9Q1@(bGC<+(@es0W!HLa0X8ltB zHeydmxa=Jsj=^%m*aPloAQTEBqn4N~FK>A7;2?W_ei{Ze7#6?;4kt_n4vWd|<_&mowW*o`8gKx5!09vE@;D(GUjvpcBgV59 z#2^(ZCd84> z2az3R@Sq^r9|vAI__+~a`~@G$2=cs~MUBcY!>rj4lHJM2+Q`&Eimvon&aeQq%(jA^ z2@>zU_Z%u;aV7XxXgmbKBeNcNAmpb6Vb2ITBcdXImxCeb zxXsju@^VkK;?tq$h#BJ$VbcxZi<22_?!IHPMyl=Y)j<&v)tF+<8j5nM$w^RcjFA2ihXAM~O@X=&Ngp4fyQkIUa2XJ}ei!>(65bV9 z>_hkE*?7pupKi}v3n5J$DUwsw`@r+|gj#SqbRnW#XIq8*GHg};TtOTd6M&|W2ika> zb85hm2Thg8aE;mfOaD56i3H*~C0Vj5=#+}>z7%lA10a+&g&Ew3Hij{2yDcpTFJaV$ zOn5<$sl=xrL#q)sVtl2a zj~b~5=0$)o3m4h#aFEO&BOFzX;h;$yEFThd^RBhv4GhxYO|18~2Z2N%v4&yOeL5Mf z^J?mx`}eE=;@)r@(%HpmmlBySSOskQwOXN+c*woe_2ls z-fZyWZHAv`PGablpyH52cn?N#HGuTp;=E5)*)oh`lrw73nBKYkid+-HKAG|tg$sByvOw)N_`@Oij z{`V5&hL{Uz$8Kg|jY@bzILgqGu|B{;sM64{@&g)LX1h! z@QQCzsj$a14eoKO#lRnPS9ic02Vc4@N=Hd6>gr8KnN=z;HuIe;f?8Dtrk8o`3f3GU z$^!5s`iZGkGIooZA21OgdDEU$JB`?n$<(RAaH|g(TJWZoU$q`W3Ex5(b1^RjS}3?d z?C;dHVoDO8dB<=Q2PyDzDV=Uhu58!7Um!jv)V;U1Ln*=kLx)zxi@1s^~T`&r9tZgWNkHlboU=)I(O{7!*!gik4>2-CMJ+`BvZw1q{2`WMmWi+ zL5Xp?`sD*L55o5vtRTZw5R{~3Kw%9c2ZSKq={!u_WK@gkb=aSj=Kld}6$K1KmZPjV zL)>lGcpqD}{q8tYOFi)JN#+PHG(J#f(=l~*^kR6$*Q(t4@#6_oIeDc#qfL9GtNg8=q_Tt4suY!t*+beKwL=Z~q{(OtbZrMUFA8gW4vLIn% zH>s#$A`-rAvG*P~>xUlNCr_Ue51zSkj74LZ@H9j}pdDY7lu~d)Muv)L7*NQt0L%+v z^saLO7n-b*ej8(b^cyyuR*|J^PV61z`s_B{e^8W%=ME;dk@sMTYJjaU>l%ZoC?l2_ zw&#z}vHrx^8bci`i>j)s41^!hosG?M`a$@|j}B84Go&J%282$Jv6RGn-^*r_4rH4O zg%V$c@+mVoWtR55%hd5oQg3%4arFsU(NK{L!(cKQnt`-fDVoQ-pRMW@$M^-bEB9b; z3}L8m5GP|l-U3`bqo593C0Zw&$Xn4h^r(yP6(YstL4nGnHdD6P8UW3zxiX}eeyUPWF0N%2S8EVfWEQ`i(1aoCtF@+G1^LG5tr^T#c`D%& zj+WWw{4d-%D{5}Ow!#EQcDcX{D+GSbOz1keY(H_tW@-}AwTYS5N9=aiC4;Fi$c$$@s(FcX+IMc#Pd+HEkUvwb-_yk2> zUal+nh2!B{85o|e!lFU)W?5KOJ9syexVBdttBkbKql6E`kr!5#QNGM)e1%;&#;DQ43#&)h#h#0cYh zuI&zO78qHC6uoYJkJ=jIiKwy(>A|w^*sG^Dd0+&6ji17ww^IAB>n2k++}>zm0%r~r zv8aR@xV3oX+5B-3v!SkZ1d?wM_Ym+fk+re^@&R&;M=^S|TYS^+OR-^1SG!F4^X9h5 z!dWg0^(O4A7^%gKyF*sNkx?A^N;l4bsg;SUeIZg;)&0Ml}ihf%w{ z<@ZPQitpn1D*?E|6i{u!^=UOC2 zW97{IFTKGS?N%YfRaj7WS;RH~eL|jt)ANYRCZtnqh+1zJG$~*RNz9D!S03P$nk&G? zye+yx?L_8q)~+SPx?=`;IE09!JtPE}M9km8W&IdB!I*a~Z*C4o3{8Xb;aq`I^uZc3 z{0S>Fu#Ep$xsvS3D}jsT*n(zE-el!&s7Xg+h{!QH8=$DuSoTl?G|Ol`-7(tv4MSy& zKYnUm!Yoz0eX};^Q;B80W%j^k|E$uIk}P=gV&EhWlYwMJ1fh?LNMs{G$f0n=)ZPlH zYr(lGuFgPP{Ao4l%t!EZ4d^`(?Im0%j+20pkk8lraO&Z5#tewG0%S6TthYlHK5 z5VN$UNa~%&^XEcDESNy+raGjqj>*;|7)~V`(3m?NAf~3~B8q!no}g z+p5)^JLy*C?^KJuzQy@|@D^#cqo)q(dMErrCrkIoQ!kGl2}Qe24r$)oc8va%_^r({ z8+Y)p-f!esJ11Wq!fB!EVCP`B^kVl}2Y5NY56w^S4c%Akg+mDuiCBd}-%{*#2lRj8 zQNL;62Szktzbp9$YB7OEP*&M+>6PYAE-v0t?4u3DxUp>#zXbUZ^{6gbmQ(1U|M4Qt z922Hg*KvZ#MJA*+ocA zpzK*uG6J@=D2hxXG4Dma2^|ZI`Q$8dLrECvQB?hHi}H(D_3`QCZ2@QU2nv`-jlULZ z2Z-!CQk!BN6j6Py`0;7Bi|j`QU*x?GQg>KRQR7vQs!FR&f}964F@M4KgSGT3D3?K6 zN{%Nupv@uub-=52D_446ZM*b!qCx;V=kSy)M*92`RU9gl_3lAugA9_sT-WvL_G$bq%V@Xf_D+L z^#P4RE$p#SLIDJ-Axbwen80a@Z@7Z}Omvtjq0|##{h^PF5;f}(Di2)# zOb2mwilGhva`aszOJpq^ut|F3es49qW4Os-x zb-wNCxe{hNUPOJ2Q9z3r+n}^Oo97V-AL+(!`dN7isO=~|Dswt1Q7$GUj}!lqP6@hJDrAD zQB$nN#4gD-pQ$C@gjAD^)kPpYCeY=D6zWdidFs}Am&_losM9jFx~M3xV>}dx*prNKR9i=nKG6TmxGF{q>l6?L3waUe&vEAal zJwBFr5I^nMl!SVK12rX`vee9`XJ?)hr&6Lg4AJo0Iw^wR76BU+18hPWM+I@ntlZpK zpk(LLjeRRDa7Ed_5PQ=PXd~U(!TgASbOlTn=`~_uCtv`0@nei#@hw>0e=^rd_DrUnU5*evC4i64)?u3kRgCOE* zkKGmvAc2fpBf674L(;s_x?DPTD$xaD~O!Nm7z$-N3hf(-YZPPtKyZT-=exx zyd;kSX0kP6jQ}C>+ZK{=BobJ&WComXN6cx_E8q0+bc$PBH8f^X2gRV&_wx3BaH*)H zv$G&0{M034TlqL?D*q)~!<|C24f>ekMH4{}gND9_WE#i-ecq_bmp7Sw_ z+3tkgf;VxngA}@FK!ECvQ4(Em)3R#jy~lIAU-PGV8}B{=-GaM#R%E>0+S;0!M-d0F zSm#Cx5!M4nL=@r>8WUjcfl|gFT@Y#t5n~`P^16-24L%&lIE3W&^tO$Rj`qMv9m;Ov z@`-`SE{O>r4CE3{J*EP;^A^q=rsoatW+NJuC=W>ajd-Mj0v{;VZuupMWzoZn2M2F4 zG5~ylEJS!95Eeadb6i=uoY)TlM1kc2QS^uESrM2vzJABy&H^R$(7r`LkM8;TQUv9o z90IU{3!-OayqKYb6ckyLHI^ggkhS&8dOZn>TJkx1e8jtn&UU;6NFH zd8Spq6Vxi)lPeBI*vNyBw>$*#H(w-e0!Mg?zKV#AEo6*y`_(d(h7 zz~P7p4&zQoGd1Vw_o z$o`107+DB-N+0}3h)g%K^HK=HLPL{?#dTg=uo&3#%A}r(Le%H4IwF2*IV}MJuFH3E zymIAA5nc_^V1k0R@9c%jixD$oqp-0@3gsRjzYD6fb>J(vLy1s6Hxq|tUEL{YGZsOd zlBk^Gp`Y!xx#Q;tsSl}Bs9T6iH-bPJR*Ub6T#Wgx!F6Q+vOo)A3sHH6rwUF;94bUU zFxH;P2q2V^vy^fw@ib>_@hl%ekrmge3Z;=l7znq}8vgQyR5=GkGT}hQ_p(caP5Rnh zU~vJ>Fuvc>R{lGv^X>zy z2tNaR(??ZER9TV6+>3}X|2PI)3(3Xl24W-*L>k;rCOyaz(UAcJUC3<`GaAh!6 zW+1RC5uHV%a0Ec#1iB8z&CQHf`7@gxV2Yk_jgdgTL}v2f&u>T2Euz3BSTvhORtJ6p z*I^dY4cM&=xT6=jYoK04AYiY<$T`4Un}CVQLKPN5-g?5vu3^fFc(p*yF9by+zMmQ? zDHIj^u!7Z^PT~rQ!Nv4v6#&NcK<YCLy}j&*L^;>Ec8_iY?Zta zgWw4C2r2d!#3k~M2JCWd`c@F(O(g0>!RE(n&b@o-DQjjHRU+o#vcdw@ZA1#}Y~;ESK#o)(^Aw4@%+u3%z>#2@DK9QQjQiS@(K*b6D z)@NzedV9POXx9K;GHS&z>mL-G?xJNztw>}%!ozXtm@Ez)KykhUMq^F|=B;Tg!>G)0 z)Y)b1+FgjYSY+A=4kD`S@S*O-$xvHgi+<98%iXkjS7flU1DFep!JDkKQ~l*f(^oO> z4;d$VIy$|KTIGX3j#B85mQ>nFc+#>~zKRx)R6-<^heM_hc3y0A+S0o4`K=dSl~6(K zcV5D(^N@?BM=K8{JwP)!Quz=Qb!1UCQ42LEExVFZ@+Q5Z9IW=qWX~3&3GQ2vev6K5 z89b7Fm&~*gK->Vp&Jtl+7^Zo%Nja~A&Uoy|5EnncBD(E9#FYN--Mdt6i%&4I=7p@s z2jMp);~HLN8H(GZ7-Bh>cO?}{t8|n%=w7bLw?eE=D5(v&9--H4s6=@!tM93(e}S_=FPR-+@M` z$q3X5?7}3YCO*n}4Ia=MRNl9t16zc&1}SMpCab7b=NrImr_dNgId!nhKArjC)hrcV zqKv-%51bmmlr`Wb;fGkn%(_o?Au@W`#dZhirIREXN1B4=QUH+OdQcw*oB*~_4B3`& z+z*_KYK(_dGo*{pgxW_3CEIji276xvMNEN7zpb9F%&Imn>*Dz zGLozXacqbIdctD~Fl)l}_RL@XPazTu%M>{=NqB?+VYLV>?fuC*#I-O-x`^OZpp)Xs z(#3knI5eoea;kp_;(El*0_!?()UaZnS)lr2!~noFk$GBDj1n^_7&L9OJF^J9G-$Vt zZJX7#H1^D)PS$Wo3xU`LRSL<=%ae)?(Vs{n;jw{^nsd~jXWxBds^!9Y)*5u*o2j{VU0iEeUC5XxxQ8gi+9Dfy{MDJ+= z^1d>o(THH?G%MRt=_FK>B7tNS^j!*V2-4jE&zz@bX0&4E&a5NA}N)XO?VhR zWS>}Iq0|$^I*`$m> zDIh5&1+8uU*ks}(hB!f>wVr_i52!s}z<+YBE4bKUwtd^SZMF7mrnX+>;f{?P5iP8a zLQ0H06CtD+_$q{Y{LoE}Au^e?r&+~pk8b@b0fqoznh3L8NUX3fMk}fZ)@JQ2 zke;kG$BqRlrswNdl30jC?kd!%cI{6#QHbR-NAnI#kO}jsjx2OS2Ar&_Am)ppnk>l{6e`uin zO4Aox{}FPt5HuVQe($DMh?3ljqlcJrO)ZX5gsS@bXMQjY?h{d;T^zKR;Yz6n5i14W zxU@KlQ`)~XP>}W5cD%=lyaxw%d#>YpvOo|H>d|6j8CC>jjhO!(&=Oe)B(*1ML4GqZ z_1Sy%yEW>=mFP7H=Qk`7@&$Ggsk(?yO5M2I=`&(G;mN3Pu<8MP;mC(oaCbrdI ze~zTr+E!QR6PI2)bQpkb^BSp9NELhN{b2(6%B;E|%q)DesbB#rt-f;Zz(fX3{ONX- z|5B_+Z@0a`l^HZp2(jT}kKlfTqLYhrlefNXZL>vQ2bovt^4BO(Nl}5-st=%_IBC8= z6}jgE6|rYVwAUMJ&nB%%gqpj4+wVCw79#?`gfMQb?Y4)fLaqhx#PW!(eT+m&!mDBF3q)GbkH<5!ldf{DV+F zK7K7(Oc}*v`G$4Vhoilm)Bsw7Nt+k3If9I`S%*-v8Fc13C8#BCfRhzX-W z6aPd49(8rG~Vk+cfc^E#xGxX5)tZHSo+acv>Wk%YB}eQ^t$2^hz=?c0}; z_N~sfA4a63?eVn@#s6<>5tvsTxbkfqG3Fx1k0Uz0RbBezW36ghG0B5l!x?&2v|da`-xUXDkd_JCZD{lvKKWc zDdv4P^UHt+=f~_DHtF~iNHK|JNJmR)1oQ3<6+pY`2EdSBqJ540*m`PEksyswXOt^9 zLtg+Tg(&{cGIXrkVW3Ms4bW%c)?N~3i90CR@dQx9{skl>gE#ULxduTLh$R=!E&$Ko zP~hyw5^g~=nbII_pqibN(+lEXG3oKa?~pXSkr*97-x0;iEo{*KADsw!C>yDPITHtR z0-d2okgRGX{SsUqkse2(Y8#P!5=K109$`MT2hOOJP)=RcqFy*E2!e~6IqT{VA)J%T zVOovUuLmc5biD)V*`QJ%Y8^w$s}Bwc!CO%^KIl8Diqku2;{)t$0&E0s&^k1D@f#Lh z4-gYs0z37+T?ba=rn4`!it$#0!uM{3allqi#5%7^0;kY#-O7XPY-dDdBrKdl;L3KF zbe3@^mCnEe3&5s*3JYIGz?3AF(1+d54k;;Z4f*(nJ3t<2kc8rPkIuTCfvfgjjOP=2 z6p0Hq&gj>SChl%wCf@_r-Ph|U5%P|p zdYM2v%rvt-C3?gyDZ4^k#A(rRNiuCV$Tb|fatzLd$_bdKGom#V2+TqLMmagF&7)6v z$#hzffBpL(TfM*fd{{^SQxh&cF6b%l9rw_ z_}~QN#jK6=_p1+-=70ZgrTBpUx1l2q0c^))E+2k zu*n5aYxC@|(-R>5ymwmv{$_rE`Rses&aS4SH@x*qa{4OKA(2#QFeu5{__QVFK`gw< zu}FTKZG!Dge?R+MSJ+OD!3TzA=9+9LD?_|Ejgp(c4A0dqHgr)a*|Cy>?( z{`>R)e67dNRC85${Wj-&7>~x~Ip*69;xp@B+$bpCe zPvAd(1Gg9O+i)>NvC821){Xz!lfG|}9`-;8v>$RR!b|QBoZIxk!$Y3%^5fXawHIMZ zhid}jQvDB>y}ut`^}%jJ+90SrAg9lW12_WQ5ycDu*`T51fnj!Z>3xwjh3E2c-t5`g z`f@YlUw8D^DtM3#=gB{Bm%lcd(}+}paBLzo1Xs{LsLykHkl8dD>H6ACVJBRzC)DDZW_+bBX5LZqin;%-lA z9zL17`luX1mj2nA_|N+kFIlYIol=Ar(i8hdfM;Mn*onj+{l=EQlX?3enDF^W_e?DF zD27(`%m7LY5F4kwIkGHfWzP5d&vn&4tnTe7o%nS4TLvhBmsC1 z$wyjt_+i5a^s=N*my8!10vNH=vdX18u0o`G?&YLwY4O*pB#F+VypC6JBq{MSE*Gu1k$<({80 zKu_A*&W@TwdVVN6+=GJ|e_JWT^zcKp(Vva%zpk>4#6zF8S&S{Kf+{bqXh%GyS-;*N zLH9mf4*PIUgL#x%-^g+F>79RgL^LT`)MJ|aGAg`^_4dpOcvVV*M)kpxN`>~H?nT2m zy2o<&x&K}YQl+lQ7r0W|lFmZr>`DLU%Q^Uv`}C1Yo*?}^LOxS>speY!n)}E9C-3<` zm#cGNX=$l%RFWF6Vf3k#(}{uQY0cKP5Ath{3ZeH4W{%$f_Jo&r)n{x0S#EOU#*OYk zYtTqojzfUhQ-P;RsyM(fjS)56DtXzI3c1+&BCu5d(Rj5wZXndYNGca*G{oW-4vg;b z!2=LS05+1a!$L7rZJ)O%vhmNp9?4-neg4s5<*NODl{*2hAV5xj z*!Ez}|K$2iPT;7r6P4$3sRTJ&B-mSiv7Jg<*R8hNt1TouZ=mYXT8f$cxj`!D6Xoa0h$aNxQxF3 z4gx(h>Red{P{adNPqFaRd|cWmtfyVN8+qk2 z9Mn4hI@+-+|2n8^O47rYIhqCAKMHv80NJ1pa< z?z@(j)}!8a&c`?WLon00f~0IaAJ1=2-S0qb*oLI2gq&7f_+B(1{@2qgW?wtZ!?CbM zx>1WBQ&tvrwm@O=@RHnp@GvW2q7149>ZB*vsDFGn} zQ3lj~Etq=CtU}FQeY$2_+HuY2WxoRND7^iLhxyPb^??wY&n3jw7N;m_zX8E4#jGQt zZo)h3{IjiPSmq~41_5tl@Y?>@Oo{KzX$*faN9ihyY8Tw>8|bq<0^&ii0%B%?>#NvM z!!7hMaJ$Cp?T;G1fEVUuN_X7-pH7?FOO|0@`}?1Kv`Eo>#oOE07u0OoRbY{5OxLgiwk?cWI?PR_s6RU-}aqI_T;E$J86Qyvi#8aUku&TI29XaoG^6^W*6xhv%28!*`T z&xYEF>l(>VsxjX=fffqMQP#5`e1j3Z|Btyp5680Y+JNy(M44p@8I#PUka-AY$~=#m zGAmOekusJ!b5W+qJkOa@#t4yFL?IM1Bz)^|cR$blyzl#ae%troXWQ85*xa47T)ZQgMD8>ULZ64 zZ|~N=dUtz!8bX3dJkU14%=N8i54(`*U76DN3+~(CG5KFpur{0Oybtz3cuxoLMWG8Y zHWw7DAp;H^odo0xp%ds|Aw$^)V7CuOMU00{UfmG^a3DNB4#-3>;LWxg`44aM5J#h% zZ|_yu*v3Y~s%4L?avHPIj*u@JhG;?$pseTf${G-|1^E!-G{1cjsIdcK+JPBRRH*-Z z&OJ+O1MU)hsgp|m^926=X7$W1vZRN=QH4eZG@_sD8huNrOuutjr~Dt!aV}|(9r2OG z#ZN(Z2*PydI>3d9Mu-^4Pek1CD|*oJ3RoOMub}1o1M;(0fbIZo?^&0C9|BB5<1Wx2 z!Uh=|OwA5o-n8f~^kx6+tsKD{C&I;s>HvJu0-@K+*$aOJ1nb7;H-n9DBLkn8vtGQI zg`sSO;m27+n1*}N23uzR&J!PI5rV%6PZwV{Et zaM*$S4Rq>L7GinsUbW6rD{89`dPXU8@z&}oz{8G&D_H*YEp6+~<=#@vcn_^@6}vp1 zjSecQ7o|gI!GRoB)}_1;qbg~RKioJU`ho@YB>+C6+Nge6JxtRw_!g^_1@PlZfG2wX z{wV;T(EQ7+!nas=-o!C{WVPV4ZS z^M7(kNYKOTQ2=exKR5K_S*W=I|43Ope1a8fi0S}+W2cvDmD?h5S(4DCIwj#fnEmI8 zkiQKN8+tLrzoe=Klr*i_VPC*kRSj1G!a+uUK+gyUu$sLKI_#0CJb~%3qt4L%#)j?s z8irENK|N$VtOz`y+UO-fsMO1BR1q``EVj-Oa#$}LunvLW$r1U%UU1OfBQ>(jX3Sb* z$i4!S=uS(fbrqG(w}9!9%Cbko7TEA^ScAnW4Y=8ke$g;jS?<~*-maB|Z$I4kx?6l` z#ss)Y8-LbPqzn^q^r#&D=F&dz2Y#{!n zd_BA=1!|BYLvu#q%9W6{@zLFXt<4r#Lw$uNVsJ!`$^BWwN>4;kRzR)tEA%zV+N+RYreIi6GO3kW$G1fPd@2DHnFtA}&7-j*xmXjgmvNLO6S ze(?f@52@eZu>sJIn}e7GHGu+fY!t^9pj!p4dsLjq4)Lu_*oB5TPQ2L;_wwl-%bC$X zI=1V=cL=B*=983$5IK7cHQB&~Ad^uGZww2)@ngiq004?Twg%7%8j<|TKT;tsMZ)p> z427DN*k*I~4WE$i4uI(ZCAARB9c-KEsjuhy$aRvWpJBSQf~)(CV(?Vb|Aj1A2#%M5 zk=iV@^7|ITObM<->tq`!TO}54_@Kmh*!2h40U+<=7&J+Mx)AwNPNNrKAbc8X=!2M1V1x6#*k74Caa0En)yDPD*%Rfbt~*2E&u<3xFs&18{{G zur z$Woi75x)DI3xGqKq;l*YbW{EtxOW}WO$;)$1`^W@ydf{ZncK^W)p_kX1|* z%HD*At-ukJHGz|U>i<_ITH0-hP!%rW%RC|9t{eOh*uq591~4IX^Z;ZX&ubC}S-d5P z2yBBj+7D>fS-{sY%GUyga?i-46K8fqu{eF;BH=-H{I`pQ7iB|qMse7R-A~1+A%oD3 z2k?pj=Gh@(Db&Lb%6{ z_i#?L2%Gn}MUro{>h!I@q27PjgQ{Jn`ieA|YG^7@@tHnCnZV%$6L8A&5CX1k6#vRQ3zdgccd+d9<3BupjO7nnMdpCXA~g)u4u&~v{WfzSP{1xcT=Opm zJ^>F8)_hxfP5n*o^(qGs;!3T1+fV;g3vB-e;Re>YMW>w!V1y7ZJ-;tx`7~)Y3+aoM zC!9Z=0B8aNzjT&nl82OBV6;Sn&8=Cld6!jB>0g(m)2{9N#(XHX6$({({a3m5?_vZ> zh@iTHKHCeh@=66n?-TnJtqP893epnY@%8O_#hBjG-kBwDd~kk!E3_g>DtWCb)4{*; ziS?$MZd>{YtHA$;$=}kf1itR#%PHo~GS+0+t=Wy+-t-WqN6Em$TYOXwQcvjB&Tw!g zQ0=_q{^Iz%zMkM&$@zo39h?RiQsmey{sPd$M*iI&Xl;lZv`@`=))HEmayxdKGeSrG zs@B?hXUGXqsJ2vm8&=QwF5*Q@e4O&bJJ*R#uJ8Ls^%Vc#Z#V8&^i@^WHXxiEkM{7> zv?>P12~IP7qkMFkCNk<8`^c|;NSC24c`^1*cKhzRE$%ye;087`AA$d zT_$P+p}s(aYcQdT@rQ&NQSkwXeFIn#&-+Iwn$hjJIw^S^^YL-<@q$+lfkRW33JS+3 z2Q^*{Al03aI!e9Idx<_&l5r9SHJ@Q2LfQ9s2>$2Mz<6T{G7@M|_W?MGa9@y6LzjrV)!2Is!-Mopa)^}MiYI`!~_gkFY5n?lyd zmJ^x7SOZ)(l6QCXo*54GI}-gJNE9=n+hikEaTBP~^)SQ_B;qFE}|`j>@?scF1UkPa2K8hIb!1=)3lsBalPu^b7^B26kp7@y+xMc-$*q*r3JZsp z4CIEu6s&n&F>Qz3P@2(;o9P4m{g$j}zSJ^oMEOD@EXSy5p1`n#nJ$aW0Lp ziu|cd|FJx%Xe3rZ+73qg{zO|e$$-ccFtrdjHN+y#8(AFIrbo4tfpvU{r65i%9W3mvfTUQ~xh|8Q7rYTeW z-;pHAH^@g;HlXI#H4WPE?8E2=a|u9hJCoWITcvgcBXq{GCHsMDXSnuqX%B9d(%h5q zw1it4Hq~DQi=E5L_IlclYq&X;i37$ukJB1G&Ehi>3WbL2XLyJIY*9g51vcpXWS~4W z)H>im%m5ngnO76z&Y3TDj56`#I>Ih6U351|<(m^M-v8|ACl*fsZBX~_Omla4TvvMO zXQt|nv$nAId4C_N|IMY&v&Gfzcsm~G`m9Ib=%p_E&*vA(0Jb@8nT!swi?0aL(9j@f zSH+PheqNGC^l&U@T*#xh`N{5a9XV$;5CxFMxYN0-a!=IndXkJ2#VP6Z_wWh05gwb@ zQ8>Hnc%VB~A-m*abox^7YQS#_wuE@|4w)OdUFQ}&lI|}bEsitzRTIsVvQwf>!^-J9 z^jMkG2#|n(MC&BqT6-X?xzT5m%d&Ura8kk?MNNt(OSxf<&Greq3PZ5WZq9Tp6iX1P z^BHJUwbiYm!Ay_tIoLR*)g)R<^A`z-4)Kqf2?cg!ozY+8JPNNp@z-{_UC6e_y=M`{ zlX5$HhZEa~M_4;K|4f4RCWM54;iIZu=Xawep@Ux76-bJNS`l>_Ch@=2Mm(}g{-lg! zWz+xRLq*e1O^@6wO3Iq4d=fPjn7&-yvt<=MPrA=BB@Q?JnsI)(emC~`oQR5U%vk6B zEZk0&o~$f=S5o6e@(aQd*883X#I{{uYLa;pG-A-@{N%(=Qp3?d@6d53P6ANdf1D1` zXvzl2NaQZiO#^{zY6&bLG}a2~?7wvH)EG=#m$3FgCxn|yRPe9AfjwAjQPD1Uef$-U+#)dJh1(JK>N~aB!X|mEE8^iNC zydnd*BC-GDinvARR)CF^2pqw?yUW7J92!;GKpx~7dGJ?$4PXaM7rbn8v>}?*cFlP( z7U{y(MWO#+Z7lnigPo7%Q-rK~-_gr7(RY33 zxZT-$(OdDpXb(gBP1RdvD_s;jrdjIo|Kq40@{-nDH28JEzIdEftMR;YO1>UVyEjYI zsDUzX?zd|~{{hxVpQeWl7)UO32xo7G>GE0NVDcVHtSrHxdJ)X(5;I)zv~CMdaAU*PqDIc#haSvsE0J2+eh{P_icM|LiU*3igbZ4(!KyclJ;8-P{*c}(X;^( zQUdTKW)Tq@Xp|k+X{1L0J}@lMO%;8AW-h_5DOE&z8uQG7;(Mr+y2ME@j>>YQcssk3 zH|Dnd(!OY zJ%8v^k^hDM|C<^Sm>@{}Bql~PlkQ!z@5@1$#_Db$NEhH+dN?qD$}|-nFVBXgu*($_ zl=1{cl3=N@kA=CpW2u)4ZmmB(XLO?M-2unLSPMNEOEoNC^K8vl%hzMc z^2!gXaW-^VR9UrHZDd_FP(M=ITmCNne4>zLt4-s1o+jE(A_J~NM$11Biig;TiRfWZ z6LK?{8xRBWq?`!nHgvNI?~1tIC-3eMKaP8C zz9?mVGs2FxSv5AulX&>O3(H1mvjc}xqq3&vv~%XJZHQ&5r}r8JJB z$FX_0vv&>%3r&QC{4Gm6P_N#Y=U(juB_V%75HzH+YGoa<-;)3{8zdtu$u>Q;MI;)+Z@Uro4$>2T1RoE|WgDGdRFutn? zp(pb=PI}pM%7x>G=TjEP5LjoojxhSqr8c?pNt@`0PjhKZ+?0J?^B;6-R!<_nnjDG8JOd?_1@IqA9TfxdK%ERoBXWG*eLu~RP|QHJM$c0hvqrq z!~ahF^|gapIQ2NtB15Yty_L7BSQSr#bur>$12&!xP~yc~??U`Gc;tv~-Bz;_P>hL+ zi_~_v$&u>z8`d6ZH60?4^#wmos@$MfT2t^X%F2U#Tt! z_#e40Wo#%(cy@~R*_7H7k-C{&6_2{kbFOzS@rMO}_8D3-78-c(?JV9Is7*ZgCG>>b zlhGh9&2O3X+Uy-7zJfBb>9?A|>6 z@3~#JL0Z+o0cLLU#01RX;C-W)V>%IJ>vUtnwIzD#DKEE`R%g=MOQLZZA82{+H3)6> zH`r)XpPv`iu!#6_^s&Bzqb`fTYo&vs_)LDqIq#3PmpJr>rYy6`RBAKs+3Q?-r(EjT zty-!L|H*YL!{?q+z1gE%pjhUd!Di+7kV%DBwJO@ zYk_Z$v`Y<{RVG?jPhZV-GSb^}Zs+R6{+z=vyyT!!o6Rt)Jg?y*)wK})cAQnY4gPyw zk3Q%73hwu5R~QmGECtJl!G&9FejBN6l#r0P38h#vajT=8M*b3M$VNyy%r+82&sv5p zuk5Wr;?y(BQAUjI-i1Clg^4#>Q%UPs#W^GO<5WfA6<4@hI>tu1){Cy235(zKVC=Jh z{rk-(c)4A2`0>3tRWNJg*Ob^k!z+n{f=|H($+MgrYmY@kHL_%Hgj(nO%s<4>7Q3(S zuq-)}R81~MeoK~vy~O9c^94^kV>aO>xBvcbUd1tj-nE>9{jQp-l2QvuSwr*NYyz-< zv*5fu2i4$fLbR7APLqCs`GEVt06#C>kv^zxI!`&roX1aq`QEm`SG>5*iIJ9Cy&H5r z$5~HAq1z?m)d&%#cb*o7;5_FteqD90vz|C5hsTLo`(M@*$_iB@AFmUSiys%$XiRHS z2vZ1=q;cXGc4%OC%Enuxy>!ds7(RPKjgHgPijzfWa>MHwwhgWxhNS-tOVj(n>Ozy~ ziJ~5yXqFr3uE0bNKB&JeCpQhqRnY=}cTkmCR8~o8sVtD=8roH3Yw?*M>MqxFzbL-N zu@~dK?fS+#b3zRpQ|{(;-Am-dGUwZnou2o?J{_`r>N_9SDBp8UPSxnIxKn!Gv{cd) z_v8MslIJww-%U6A>0p!3@0aG;b2CDc*iF(YT%0SkJ}mNfksehm(+Nu)wHmu`zVQeP z_h>#Aill;c06~PdIh5&p_5CfuL7r}y(nSXwFChxJ2{QlAj}44G$Siy41#PPl(JL>hPMr5 zuC3k~y^WD32&PKh*;eV6xIm5Ro2+<}SDg3vhWC*Vd-%K(-{^DhI^nH#i_E-*RqE2&USBev(MPvf?z$bW*(EF)v(*-b{)Kf3) zsAJ&PcbfRAo+jCqd-s}iOqPXn%oz()^m*tNEg#gr@IE_vUIMkCQb?Acj%vK4ANCb+ zL0~9yD_ZhK{0b-Q-d$0QG%n3=;T=n*w;W$rCm6|ZsXb6CZ21xTc1gharCh~YX+Vpq zadIWo=nDhJ1{NQ>FG4yI6}6m8DHA8#0!CHXWjSza$js9Z0%lsdGNZtZ~oqd&5fNdUXK%} zJ-4i;Je{vpyOqK^I7bd6saU&sL6zBH+lf$yn=9 zXJsuj$gIk`c+1pIZgZOOq`4Ae`gBfTO3gf1ptdvPY@DfiHX&|v)WXNE>fO@8)9B2- ziNS5z#%9?YFFk!ue#jzH-z*H7@f1Hvof3CUdTd#3mC@UBvgdXbPHwkF_syz+_nPmd ziI|i)Y!8NVxSrIDXN#9_U;A? zwZFN~zC7j@%S{RYk*&wT=d#TY+wrDA0st96&|o8^qod0=ZJ-5e&l^Yv9K1gW2Iy7C zW|7I0L45$!AG?CI+zZ#K`R6<+HNOQvl=_NKKe;)nLzHB%h?dZb3YBCHDM6ZXPRa#u z*F7&U%LUn@vQC=dH>U!ajMH?SDEX@NN9}gCBYw3EYXq-f9Q!^|-XcXOv<>q%(F1BXw36I zIDM5Gn_*JEFCXv5UZRFxSOV`cy^loE_r$1YQj^wi><2jNo9t&e3X8Vs6(&5L7dW27-sr5 zT9K!;WZnFfJDfcX;&aZ=aqfwu-7jbJFqpmiT^+mI=vm~eb3edYrp$FRUM$=t6}Vcl z*8dUDLh}1$DuHc@`B@v6Pz5C$Nki_Bp=1u}DyiAL1$4#4kgqM?WqGThn+xP&&%l~r ze$;c9u65td$uT#aG{I{-dh6hNh5QeT6$Oe>&RUky*Qfd(@|{;a5X9qy6O~}1QetOZ zQN=TjP?6Iou&!iD*Fg?=2rpuAb!F`$FFH8AAG%Z!RQlNjXP9gMs#c65$+BSImhZQQ zb&@Yr{RC>iyfTh?C1TGlpUrx@A9_rSq1@;p;c{Nz&$oRR%~zi^q#7*z)~Q)~?zNLK zq(IY<8Wxd+J6rJed5cG8B?G@v_v5`QI(m9j=R-ML|CxH2-cW-xdb*ip*OiEvIEd*O z^zd!s5^7+=1<+zVG#?D4jJ_JT>(Dqv3J16^q`>Ix4YbhV?^&P$2=dfuE*AKG4r{fy zcaIv-LP&rv^N_c5{P0l~v0+ag!_?=6va}p@SQrX{qihvvnJJr!y=U`ig+9dC&t;X6 zyuM@J^}`C&w3(Pvz14NZSVvVluUzWhBP%DF#fQ=jcEqpkq_|>M=4g#mP06lLSu6|8 z+#NPDweHqE_Oj*IFvp$pi6HUq(0U_`%r&{G0ZK}hgMH)Ou@1ayw;$I|>5_y+NB+bM zXsu}F!M=B&i!-t={fJ-gDRrX}y*q(E9d#@*kqdko+SKfE^VwuOx8_z_$8Wo=268Sh z^|kCIa=~SPT@*WYJGSzW7I#tB*?KVeQs(w*#G^# zE#WjHac=|8V{r~`gM{~rNNG{ZMS9k^3SUQ z+LiQqrBCw&dLM*0E(8&{rhE;0xYai4mRsx8ceBGZHs0)%+7EFXkp&$*LZ^q<(ijS( zw`4o4A$)NxaY@=--J5@`v94Y2n99oR_A>9_-Xh&AzMh9rprvp4q@Wt@jxrFWfIri4 zQ&(4)o%5wPViQ1z@+B6$ z@^$vIdI9+-UtP)@1p{m6nD^`h&m~omY6t#=BXL8!FOEc$u|D1%M}Odw*d0DHuN_i; z+rZ7$AvJ7?>$bsql{w-oVfeqvt7Hnu-v8>E!1zy@-of9eH2Gmxn@2JIiE`@<r_z*RD;(Vm4zrKzZ0CG*Wo~)l=-e-|iVj6h%Xq zd<5{TqKXMKenkE<$fvCur~>AUYaRlQ%_m85B3 zYi@WahNt9pNA%Ldj`vyR-{Rl8y00tU3CiZExzk`LfBY@^Di#JdMO>&3?-lm;>cH8f z^47id+J zu2mQo2m{%a&!svi=ZZ`9Hr3Y5HN}`tLhL>|DtQeT{Q_ORi}~+uH-d;N?mKzAk7{dc zKcEwKewLGyqdP{CK!rzR^>t~48q$qRxHON|)5G29%9hU>1@e53nq8Pz%P1};b!~qR zQfis*-k&$Me7hUMMp?sH3e6e|jjpOzEXdjmSw%g2tXSOa`|&iRebR~h1C4>zx6TcM z*Y|C_OT1T#W9)`wW1{i6k{(2u5?hbD?|2Gfnv~d{SH$9cesPIMOZL%k%*Wkq&+6qU z{4xB?5!rup0VvXvkAITcx?EsfkoPKWP2k>d_mHSdd&6&=CWV>tU312W^ODBvZlADE z5`0}Ih1DgYQJzlux@A}T8gCd5*A)_c9(e&u)?-nl#6siQ0jDS^Z5bnY6$)kFzsef@ zRC#G*eK~uR_x$@W969Ty7}aRg6yB+KL(64v#~nGHWBX0(TySbU>s5}QJ7}hSE8q|k zg*6ne&UzfwfI#6F7at%0@IXp7_;LX$Jg+Y2{?k^ z&K&%d^@!(6-!c4Wx7FZvn5ZZpH}>tpX~2?=oy!yRNSBIEj6p zD{t-F&GnjUC;FBW7U6>*x4EJt|B5fUrc*tKLr&LOQed5cS-Ry5*$*$D7nQz4Yr(52 zoswxMoUboUUT<3tX^1{v!_++1K|6%MbNWY8id!S;&e74>MhD{8mUFyLk{gb5`C>yA zykGRM+r#tg9Gpufw4b=arRTsp-hqYD?>e(xJEk+rra=*XLy)F&EiBR_on1H4rFM4m zetqfS-s@GL$g3&%1O#XvA?L%Q_dxdOq?+E!e9N!q>v3tpz74ioCaW)I4({m-Tcj+O zss>5AExmN!$XR`{nDZmR$JUlT`GVaMkjt6qeH}`UuW}a!&W^>gp!FVB8W$ z^=Y4+1uO8(1*~ZR#Xmgk9W&aEdP6{6e((f{&dj`5eX@pu%tFdo)F3DcX#+^qVEloR z!7O6nbEXTi7-M3NU;yEO(sPt+2B`1XR*f+c*` zM#2#f8c&?%Ca3}#JNC30-HOtMI4JOKFVF(@@rA+|h0kg7yAdbmJ*eJay&@Z?MSaSy z0rO5IAv%ApwIsf1CM;d>m)DiVekzPVmMM!u73urtt`e1$8D87EVXqSU8?!AOPAP`3 z!mPr%qI~Es8}dnqxkciSx&Qclt$GXtOMKgMYVzgki^g&jrOb;S&17#sNv}Au)`5zgkqQ>RA>S z&DuMqxZ;z3T2HQ2<(>HH#;3PilYHkq<=aZ%BiviE7A@8$aB0S`zf!Rzi@v;ZW?!?- zDY=^HZNM$2ogLd}9>tzgA|CHoUQ~A7*gD|Fte*hN&Alg24&~{fW3KbAAzprT+3WmI zq4{lx#_@?)9$jCzw7!L_5IXVB9E{Zamp#8;@@l}_QG;0Cs-NifN4(P|H~C3-$s?S0 zaXQQ*@CE|~-5y~gF;*?iEaZ$?x2 z+DstP=Vn6r@@^w>(=vrq$aASf9XQ&f;(Yi(QqQodTC6hT%ZEO0I>JY;$vU!Uu&#$9tByi<>x%HwIOQ?xL*=*;b?iQrjArNx>od>Po;mQ%6I z*n44ng+S!pQ;~2t;WGXv<<~f7OhxM9AKkw2?4K9@Wx--oKFq*(DvO)YC0pX$!u6EM zFNO}09>XV!66btg2Y$8u?lzd3Q{y^+GI`8FDwbH`R8x(^D4ucS@Fi?T6K%T`3jCv` zI;#9tu|jXYJ=$<>yli}tuAnN-zw~WG$twZ4e5$&%ZQs@Rb|B~a7STv1Zur`2Ijz3< zNSCC4@>%mNgA#nx&vaFfhBWwHFee7Q=N$-QzpB#~eNqU0=ZADAzG_UVePF)wfoEj6 z6uO%&Fw}E7;*<=`<9N(|=}8F&8T(JbiQzaOshRs4GFX0?H=n2mN(>9Is7?(C zI6^@|@pb=QXJ}3ux2` zh7Xw&x|ZZq7H99;+d9Rt!ffhsas^0%ouU@+Nc(f`bMu`$(4Pc6Vrk7 zr(4Y{d}-xmA`U9IkLArg;jg$w;3%F<7@l)a1@rC!QGlk8OnIT|>PlFvp2jf0KofRX zLu9kf!I2!gs$gzg1}iBEk_wgM249P!b~d=HR9Am{Qklt{v>p(AZz;Xz^t@EJMTz&k zuhibUAcfBtS$8Mwb6<(rDRUx~qC-pI6sk9%GXs4(!BA z7NKUA8GAcD*4Sn-@ILS4cn5(Ci32w(yT&r(Ye1*-UmSCsZ|4XjO2mvAiN2YGD17RwNo!9A`krQ-j4O)}@`8Jv(P zWqYSrxlfpV_E-jK!*?Rde5hI%)6|%LMF!gR?G~3-eprdbE8*K+KINUZE&Pk%EaS@A z5>i?jtm6Xgj^(?AB%Aj*YPQ}L5^Qg_QxdLx4xNLSh?3Jc;^=mL^zqJb+!nv^Ko@ec z_BnjxD;*DHaVf$AHYNBJ*(r{;A3e^=MUzfV^1wK1V{@R@QbvoLeiDl_D{h7J(~M{S zM_-v+_4-0Bj6=HkD!&%lBTQ6-*^jGtvgXK3Kfs6`jVg*~l5HXxsLk-<5pQN!R%!L zvfR?IGFyzCZkaE9b&ewNHZ7li8h?S+l$chR15e_qQ>t>$ z+0FIOk~}zQ-1Pb}rgib@W8x$XMd%U70-Ptt7cfFYAEXop2d}SW=g8HUt)EJ+8MVAx z%s1~CK2%|+=7^7bYpq4Bb?nl$X_Vni?(k5VT{dYe#_&~tY%f``JrN|at@7(@B#z@n zZ=#GN;>C_uXZ1U*Nm)zAg*vPR?(FPp-0|k7$Nrt7Q{5ZCDsJZdibi~;!o zm&;URSean}0;G%k8a8vPa!Wssuywh_8Wt$n*krGK3K0CjMwfMp1!rT^E%HD;+^7L)(NolYj`OO3Nx`^z>uDZt*ly!WU$=aMPYH)e6cbZl(eYE(BT>hXT!`@}?lHz- zDwVh>Y|972NY&Yox5sHm$vjXQG(8zZaOSS4FSXRu$-IYU?qZ#e*X2ikU$B|db|3Fj z9+EWT*ln)9Q{!JION_zV8?DZWt*TT#fkRmgNx-w3P0G#E#pby>MPHFBxx4**zQaKHqun@$B^4*Fjd1P|63O6l z2HwVtTz(OXZM`IGLPFe`-9L(}RUxaE-x*<#A#BT;8VXzM`F@iR$My7>HsdS064iO9 zSXjQksnkVV8ZPK)9vD8aZvNG)h?HSOD2}@5TH7Jnf^C=Z^SS7Nue15u-*M?w1A7Hzz)I(H|NZOK+B^8qHSm zZ(GKVE6T-7KUYWdh#vH~(W#@IU1wTF%{%y=sM)K(5ocoVUlzv;p_R_YjEh-(3R%(5 z*{hZ|lM+_(eQ90ORi-5={^pB>7w7YQ55I=F=Gqy&jW}am%!N|jCMDBA5{Im(n_HII z1ykQt!dq2trZ|bkTf7_3w|^w~K`53(d6|4kp-+OHT9EF#DsSDkl_BHQ8})}Q6arXI zDMA5HD~_aOZ}1xd6tiQaL^6@EZpuYGZS%&r!A{i6ewLknN$ala^m6}^{=zHn8{QXH z7rZVFGqw^W%wvzH2%4WES7(*1H>hxbqBHVtKJkT@q|s98uB`Q$OIPo``8r2uSXowZ z;w<%1y|GQKF&#t}d0OjyKIu*Daevspanh6j58Wim9Il(2S z_*FT)wY~1_Pk9A&`*#YC+HQr6Ui$OndrE!!z3IS zqP^cknzunLnL#<6w?#lN6FlHC$Up`8pB(J1`oV0KDv^RBQFLxi4G8%)SX#qCDy-`7 zFk3xzBXck+F*yP5DAK+|4k|Dz{t0FxUPDR94~{>y5#`s&{#rH4EXR)A-I=3?VYJdD)dZ9c`_n4+{*O9(Vi5HlOfA4exH{-`Mi9C;L(ebaffE!J z#Gb}Yh=fyqN2hhcZDeKT+MBm+}T(f zCIN3kkUx9b_BH3Z02|-dw~_ns6Q5wj3ADm;;cuDqV2!}Kv;;jb@X&h@8L393a2bBX z?HU-$$~USahJvs5CMb_`Ahjjn#tbVY5*;bJQ4|0SAkdH0cQFP%e~`yDybNfHgrS^< zoTsPI0SC*&6ZG`qAp8PjV8w-(m|I{Q+?ea6 zy#iY($&=!^imUR zi35*;p=&EVoM*auf25O33NNMO@=8iFfXZnT(hv^}FdOc0MN1uua}gK@34$sdSr)^P zKnv(<-v`ahK@YD%X_Fe)qad3Wq?FZyv^HVh6=@41Qv`VXi=yHz7$omi;On_r8C0ixZ@In&3({-hfC10m$h6=0D5wvI(S+zr$HR16zc9K#*F) z#15bXkgS@p<@qUeng8LZwpFaZa1 ztT#-8Wr*})hk|x?7FQ)Ua&apeqBFZmEXZ#_(NF)#QD-Y z2fI@TNZblZ2|*moUAhrMctW1+=+UE)?FpL0JH6<$nLl|5(Xg-_fpPBG6&$kYtdP*q zPf}mBUwCh=goC;&nTUu8s5;{vs(CxF*zrX_w{!R5H~>!7Be0}F{18M| zfPI675fT#e);mq66P$Y+DM27d9}qn}iUH|o3mEox88%XExORbe*tO5%r`QO3<3Iw-JnDQ+uOqTu`m3vzRG>y^hDY6rS z)eRIv6XaBn1i@FO&z8OHJow_jc`X-RlGNK=1qi#aEs(WHG58voBnZIqh#Yf3quxNT zC&!O9X8-rP+HvWmG{&_o?Zq)?;zGXbMpN2ZF=TD!$R9aFJ4gSepqa=y64_3X(v3sgD_K$;7(mBhh!3-ksOV^ox6yD@$TWQ4 z1RT2dk@)D!S;Q`GxC>TCNOBvt z21v=i0g3Xcl$1Q51^{8e7nlZg$_Zf>u70CmnG^>Lq~Kn|yfE{zBS65yU|^aI8RbBF zh7C3#=zCyWzvo#(vU7iw+M5Oj$RUAvUoZxGW`lOGC3*3u9XJKRgrz=S)bEiz_Mu!Al2OGo_{yW00T& z8W%@ORG|I>J47D-sb-0DBC8wV=TDzMKL8#SqL?G`dJwFLj)_T6xeQu%peb7Gfd)Il z-{s^v3%QylltHHlh5wZ-r@Maq`0*LEwlU~{0_6Y;&=&x~UP7?R4MTE@a3*-K=Nnew z!x0UV4HuBr8CX(0gG=yQt{6rRP%tLV0!b$`GxH5k&o?j*j^?&!Aemw>%ZKtEsUUm= zZywWYyO0^nfdGV;t9+&Fao}gR&aT)jE4g0 z58hUhaf{?N_Z$e7B zuaY;Ig7(4oN9F`*d4TG-xwZ9C3>*~AAZ;8DjHOvHHab*mfPw+gcuadwPaDYLAdg0r zroyoS>C^-EFoo5r`g(D6OOQ-B==?-Ke*6IL!J;Wl+?_#67LFf_u^>xr@9b=W-y3c! ziFS;jG}198BU6M2(GNw#83>aa6j{)nR2~`{>W6p+f0GGhDJ76*h?x`ztD*$f*pAHK zzwape8OZeh%>{^I?Rfk4GA%sHBx|r{F)##b?#e2pJDRSZp7D@ZxzEB~;{eE$Lr_qM zIucs1+Un}hs<>e}+s(WVt!>oO)MUlY7yQx%c$&Bg*Sj}43P6Rb9~K|cDNu-#5zK)uBzhE;L2_}YCO>DoIxrOq@UyB`hGAs&8k zrOHTTdBp|vlm)@?G2!fCn9cfue^}V1zO@w$BLZOO2c@1VjN8W58Do%0n=_i1wjIi+fnjFC%8rlA zTK%v_P?m+n+(DD0Z|Ow#HvZNc_;Mf_1LXa6>eQ(|5c$Ti4`hdZ937p3b$nQ@zj2tGA}SRd)mv!1P$Gu-YdM`S&5)YDoQ%f$GPRk!xsNV8xY?r^-vD zt*f}CB$r>;vPw-6EZWe}AKGa^=C*`sY%qKyP>48Xu<#4j&L|3kPJSSG=|OP;(W|RG zmnCSFOXBMvr(VuKN1QjvoD#!L7uq%iZ><~9eRDMvMh4EsU<3Vmett1*5jI6shgEHG zMzWT+HU$J#q*!vKk7|YwMC)Oz&^0_lK4cIRkvwB_-gsWEFWhM4S9bvJ8E8UM$nm2; zumI8_V1V2+26C6K?%4SFAjpqr8V~mPAQ{7!-fhx#b9Hxr0P^fe{}5P)NBSl?Zh(;+ z7(pg_n4p`h?N_Y)m)7WM6ds`u*bGcYP&Gm*6)0V2v;+sle*v|r`w+B&+yKMcNOpW? z>R?|OiM%Wkzj*>;10|5JA_f6?MIaaOn}F}f#skAT1YQ)yQiyy~ptFK%4#;Zp{1?W~ zN=Y$-9{g(nB5p2^3W68pCnOU(Tu6_Z261~N*@R@(U|HdU9ZnOJJIMO!BA8S|>_i@`7#DN|+}Ul}Kv_ z97|F9*jIL44Q@3ye&7)Zui#YE3=rdcNNAB&N;902WtVg;GQX$WQ?EC~k4ubakV2%Z5BFLr!@(u+!0?0tD1)frC&JGmZ_%ft~`%pJ# ziK-$4-+qlj4-9%7L=$ka#ZXpJP2_#bD|*!e`DHk848gdOroFrSHF!7OL$Bep`Ar7k zK9k^_*t9f~!1VdXAQa))7J2a#Q_xL!;mN4VTOhnpUD0a^VP#eU4=p(RlOiP1*Hp!hPFc5+T|X% z2DGr}AW{n_)IB(Y;d+AMK{wa|Mu$pWq=y5#9rp_gc%WSW4hdkK-4v3e1HTjqEwo(C71=(ZDgqGQ7n4E|vd`(C0*pNX@FK{+urQFC!L>A+h!U(CHQM5+LX3Fnbxm>2pDpb0e@sm z3wd9FdB-sf+8<#283`v%)95HI3a)SvbaKxEwGLT7!D>fvGax(`U^Rg$H*tfBZ7g&g zdMvT8@`i)>8B}&)<(Av+05L3(>DV?5M_^$9=TDUI5i%^d#d2_P05lc{S*SzSsWM5N z{SC~OD$$#@1!XPBTBf0{0kSqW2KkAA7A}>4m)_D}5&Kx`2JsVx=E z0yNhQPY~T@z@@>>g@uz-$%R&H{ZrlQt4k2<%^e&FfQ5%#Icccz9dGlq5*r34#BZ?@$>o-VIZIDoP39M-Xs-5F!hcP9R_O z3DVXfRnU4_HDCMOfgF*Mu`EK{;RR6a0*&^KnX4eH2Y-GKLlSqsH{&4<90W{5*0q%V z-uA*tu!4r?hY~^b&t8CUMkggf`STIKN?BKz3R%YC96fps=B$np5Fp4D>6U?8F2)}y zMUb$C!YWDN@N$}ZMGqJV1esG9b5Oj3t#t|V7XT+Q)6m#!f)nsN%=ESl43xEa0p8R_ z@s(2m;7=%@pkA5|J)7DDQ-9%5znz4&;SVJ)dZQpHX9;8{AYNX`ornWHC_(@Zk&o%2 zi!gLivP>Kwe}NxFI|SG`1zou)?ZUv%;Xw+CEOn3s8c4yTBvzG`fCxYfS9ySM&&1;c*-q6M=2l^TGy@ zCBc5i(@p{oJTOMviUjP?w*ov42f+IRI697iF0DM=f8zdcdxe+J>aYV|4&cM!?!f^`-mS zUnsi(&{*ZEXoboaPgYiT_7Q)@)ersaW22)jP#oqg049PE32^X%ij2-9AOP}?cH6^( zGKm{PNOv_HgG!i8IBOJy3*koegOw4d@eo&0Jqyh*L$Kaq#)?8E!@Qsd1***8t}D6L z7<+?}{D!F%V5phu>>F?JaB<^9i=I7`UiROV~Q9FB*pQw(6bf=VcdNO+7Cw6ylFb#SeiD==Ne zQMS@Q5PnKT)79hu;p-~Es?56XMHFmoNg^v(p)<2ZH99xA4XT3pW61ef?1U27+ln)z*r^NMeMsija`Y5a=FvSOG)X4@b*k+MwZMCYz|7(~5MYsk1^D)SgpF`*Q&We1%F5u`J}rsHm6{ z<>;Y%Ch9L`4`su6ux^A8Y_9X008xEL2tTTreLrb^hFGK)i$fWwfAyuE#&l9yb+rhW z3d}n|I)$JQ)k`JpBTI{Yh92>{!F1CV+~>*8Uby5RzW${Tk+KqUb!9jL;& zil;u8IyoVwXUN;yyu=|40=$DdF2&jcAO?o&CZuhP%+_IMPu*ee##6H%aJd63MHKoW zM=9DLe|a1_m#qfk5DcK{0NuYN@)<}6L8*1jh)uIw)d%AF;Vs{{p}-1Dv6=sL z9+{T(a)VJTV2A{eG2Mh%iu$$yK3A5PNA-}v#I<=7|1&@o9Y9{wzgk5}O^q@FyQB!G zga*jQ`%Cq~NBrn5B!*>EgWTT01qn^xLGK#DbrNcVur7Sg{;Wvkg&Z9PKp^lSL@d+~ zhu$PQEZ_;G-rIu#Jotu)2mz|UArF8onL@WQj76UUI+8#Y0B0xh{4a3x*L-P!(*gO9 zuHfZy)S-b4m(^BAQ2@YlC^Mqq*cndcg7Cp42thyIz9EPY^ZvXTG+xaBwd=K$ zj^KMZi{&xy;YRjx@Oh{`vn6P6O-U@Dzm2!SMqYVtFXI_{_cw9R?eh(WMf8JpLXF~b z9c}A4FJ;SL;*d@2;0z4CAK)LLq+}{9o0f1dgty=QyPMmEGmk&khb^pY2Hhth333Up z#CQ0XMx5pr%w`@j8b&l&++^FhT>eJL^>of5OoY>xErQ7r3=gCPmFXN51Nq>vM#dk& zLZD}08h)i@K!*g{2W3TC9IVSWEYZt|5{;i z5`@;PVq*INd_kZwl=V>%L*Q2hD|Z1-EJuIRaTBM2nz^mgpMt-GfLnc2MtJdrXSZ?T zA;{+2BcUe4XJ-1oGUO+XeE%T0Jk3@GRGNXqbs0`I5lFeS9X9|jLms~VS7xn%dp23+ z3Q#Y%p~C(9XCcE8I4$5n0)0&;>m%=w9@XQU6%a}H#F5)X_VW0cUk~? zv&ffMOGgJKG>9Al_krx)@tw0%Ag}?@Sq^WK<>6-<6{wR@bc2**iZL~BBI+4s1q9Mp zknaI6{Q>maz@yT$wI0?$1OZv_hd@0Z zR1XZa+@z{Xl24v(m6g6CpY!(ZGfKO?h#rRq(;+Q^61kyZHZBFKdsG`EUn-!@xPa)} zedx=;3kwooHyv_|z;=E}rW*^_g{-60?8a#!32hrLDTO}3JrFQB#i;TeQrLoxEnf!f z+Ys>bY9kQ_@qu)wU*`{KpI8I?CTuPZ*5Drzg^z~aduO2LxCRH>l$beSe|T3D%hw>~ zzZ#%jv_4%JyJXV23|B@7Y(VQiY_6QWmLg-elP)X?0?2z%6+kf!XGjSNpG`WhhV{E~ zu)jff077+?URDI4ajxY)Z>Tj%5pBp*L^D{M9x;KSBzpkoACiZL8j1xjRAyKbg(h_C zK&Z-W9Flj3T^kRprf>lX%>;G_`}w&-Lf>r^2r^9c(g8xMb!N*r+@9M!xx)|pO89}v z_#DkD!~N_%bO=#p-lc^~#L2|UgWGG%`-ifH3*oR~(Qq1wUG!{Tu~!q@3WppSwF$49 zIZ z4xG$dz16IHmk_!EbO$gJM^`vqv>?R?-F#%IY&Jh6+GE+F3Um=<935``?FEP-o`vv? zaB+awUP7LVvO#!sw?Ot5lJe)<>!sV30M|{(^q~!k*gpsmhNgxqB$L!kOhm}!4W&Xo za2yX)1_HV7KAgtT^F*YI46RSc0BQHX(!A1qji?`auL2N+pk@%X|2B<-&Gi7dl|cen z9@ugO7_|7O^9Bzi)mUUPjnU|@czW->-cewb98yRUf6Ei#_uvk#-Dp?|sI@BqBnyl2&s8CxAaFp#3lWg6 z1EmQ$$AQ5w$ibszzFJnA|0y~~z4_+%LTX|Pc8o)#qsSZ$7ArdKp*sXO&7*7&Lh(J|DO3U} z2{kSNrGT14bJ-3u&#l!aC#BLr%)%8A?jVnFm>2!glYbm2=unxLAD8BHfeUy5eZUns z8h!}CDJjtIVXeIiq-7QahCcj)T+Eqp)&g1|k3eJaUM>aTr2s3vMDVV&ZFOmEm=v&? zpwe;!AnzP3Au8;m&!DR$1h}F7!DuI_1o->YdQ%Ci8Mh|*sHCa!U!kKD5wQ9Z)C24q zTc7~7Pw6yO+w3b!O`V9PtQ{SS<+PolQ;B#vQ5Q8-R*@0stY`*c?uT zItqY^+5+LHJ^@(w-kwoVP~8z8@-Q%LKJkQ08GnBJ0-2}QL3KGyNi6C+1Ta(`eCunR zU2zX!ORHe^e9@dLV5+jBLJKnwU8E^BxRwxfNBMAYW|nKD+fNM#st2^`XBR=+%76;P zy&)zK(8K_j@`i0alyGrg?668aN zE)#rwPE%4cI>ABo$k6azZwc7sGI@g_i*ZZz({~+{!?4&^A;k&+U|71aNlZfGHI$o- z_O`bE6M@vwsDB?32sJ}ju@%@duYyOeAC&eC>+9=v;7LLHP*Kia(U}+UWVhH_QXhL=C24Ez=9gt@+BibHeQ)9llhGE)$kbCK$t=K62y? zdW~CfH9x*NK^C}LvOetxsO*JFoLxR-p7W5QUNC`%f?Qu%!!@`Y^fVRII_jV!HV?@F zBb2H6J%bM(n%uaOjvI^duEh)ibRTFpg+2(}ABQ2T6^;T-$%Ow3%rLGM3UIF}mN?kv z8|9oPzbTkATA9hz-LwI)$uJ&1!AHn-0$>iNC*cmYEd;XI5^2G`<+sBC{HiR48hsjYA_Naa^_0@ydAR}`uOH}wiqo`ax z-HThkG^hD&UggvDgK{zbmc7(D5S5p!kDCWKG|}Jv(c}ujIJUxu)0^K70IV2TTF4{*k-0g4?}=OuZc+Ak zRX2YAJl!9UeCHYd{P1Dh_iyXLR0#Fm`Q|@T=Z!|#n@*vyz}Y4%`F~_5SaNO-a%3I1;Q2pn1n47#fQV<4V(rX4|I5cH>hnh77l!khk+e~Iv1#3fGPyiZZsu>4!dw3 zOJN{rLED|L_h4>6T8@67ocl5@7Wp6h2)*xLt1+U6uqPg*dcckixvWAeSOwYn zGep}0EFaZ2;3qc+2drHI>GkX`xHj}KVt;O-f4=WbJ`lEsSP7lO9oz(Y@Bk?d)xvPb zVKB&I3}vRkDYDNbmH(TiMt0~dtu_!BP=5q-72dMp*6wUyeYAJwAg4rZ_B{wja2fOAQ6aH%3c&0Zmx2w*e- zLV}jSS!f$#P@4?MCPlLff3qt|uu7ibPCth~eUei+Hi-p)8vHD8KwfXj_3UG<7HdBm z1!+z6>+Pk!-uB`@HkztJ8=iFvVVBlTUmEF|#|e!Kz=@ouJL3PJ_cFh7aJlR8?*?-E zTv57tCSTLb$BlrtGF~2#=iQ?B4nCxlWyb^^18(B`Q`!f)QjT2t&kaA?62qt8dZDln zV7Rtja__@56cBcH^;uvrtuog&+Zh zb6~Vym5?|Hx>3&1G7(DDks}qaeWm9qLJgdnV98@(Q2N(mM#1@4IG4`ATLx_pfPDk( zN}zfHcvd!vR6g$RC999m!=eG01~Q=}N~R%! z^_!zmG`ufRp95FjKTeu|c`7L~y!yQb9N?Gc2_4BnF!=#kbgEkuIxQUf{wnBG!1Do+ z6mUQO?@s~0)>ezZpCbFfdI#9#QYQA(Jy$>wug*2ajOdSx`U*|9{~4<+%CG=w>p_)_ zFcoM%1=#V!2|PD9XEWq;vl&o-=+RdBHJBavb%8Hm{_mF`__`B%-l#SKx2ltn-a(t& z1-h)6nVF{JpK00kTlY!}tkS4KKNgaCo6L*y6~7zuWQR7>YGX8pKKdZ9Iy9ZJQz8Os zloGpm2f2MXZOjouctK@z6k)d!=|Q&%@JFVAcnVPs{M!FBMp6<>TKarj;P`@M(i&HM8h|1AclLYndvEKmXO|M};) zGzIP%S_*9c-yYr|WPTp!)=#j19`Jw@7}b zN&i^9;{{2LrX6-~B(=MKQVJn~-HSZfp}>U=>B>+y)}9gyd%?$(ZXcBi|5NYWu-%xI zagLP^{BSb`3q6ip&+Acb^h@y)L+t-`f(1OF)mC%bv_yS8Zjc;AX5&^)MmYxqxone7;44N1U;+!u||B;CO^JKmVCah4O z0!8UMAU9A8!)%8Xnyq+tn4+S1`I`qM_Fz}~uuWcu?#d>an^O@!hS^+>-NAxvBYo!d z10MGBm6DVb281uf1GJ$e1bek_WY;Ue%oino|M-0!Z9z9=H#aukS$1E$b`6-^53rY^ z4UnkQ%FQdKMu6!#MfoKBJlk9?(X-k0@#I@|^~oavKDTWAeSaOuKXba@KD|~3%v<5K z`kHTk9-{$1bT-5O3doK|B8V9e08JSb8!fILqWiSAvZPF=OqFXm7O%&9OGPB+C;;*U zf#?4_bBwv(#*Z5zMTHR{u{rq_uSl)^8M}~)c!OBC+pv`Q(DrmaFhg0_nZZw}) zd4cH3J2*|Oeta#5h5SF_=Z+67nCqAAm?-BYK_?sg?dH(M|6e`*BP_s4Pn0L_ zyIa&3(W14qwC*_t((FUx(#Wk8Qg_}>Tj1j$D^cLh8Dga%OM}zyYq5Q9<+3XD5CF}% z4g{>};$^iBNY#g@N1@RIW@jP2-rDNUSKbA@=@Q^1^Rn{3%r-#5&q;C+{_UhAgSWOK zBaXzmS5wsY2c@z;>lHgg0Iewl+lZDDM;1j>?ovb2LGPc`zL>}v5ns@L9*jDmP8Eu2 zX$=u*t6q>JUVq!72mM%0BLkx`StCAa%M@v)lv9u_Ke?Nn!hucl8ck$e5(RTCAU&oh z9QySiVr5V|1I?-5KN+9_^PcC}W2w@oxsp^OlY}$Y_d;#e0$ww z02f*&cqV^1KpC2h9uT-SfdT~BDcF#fLn%%%JqpOgRiG}V2tw--;|#emrVOw~)F#A~ z0l0?nRt%J0gwT5h<5!p^fmKR~LF8A+`|&XF{!n8agAQIu-!RTllAVR3f5%x6=vEN2 zAVyZ|u}4Es7LIs~b5D;7aLe{N+xB-tcBAAiEiAkXg9bg|Gr@uuVlQBHcp3u4FhB$@ zm0(E=W0^XU`lgXmVcagzz6N$q>7P$S)HfZwZUrEB$FJvX0UaS&7_CV=&%I+!?%uuU zK+8UH>Qwu$Z=n%A29L`?K2m?sHYpif3nRUt8>j#CYjG@SB|*G?C;!JgcrhsAKPfA3 zW6{zIX!6%_BXyhpJ@GlfMYsuI9kR&;UwY-pNSJ>E34xc;Z-}XALN2?I;effQ2Q&yv z>y3~PZSJc{5QG9+tQ*`0bVR<|;jYKDeC3A)2i(E-8h}#tv%C5=>kN78G*j+^;IKCK zU{KnJuLAAk7O%8B*Y5e!1WC|M!YYK)ib8-hSfagC?&Vs!KwFpqoJNp|9f0cZDXgfI zI~Sjd`^5VF8!#xKP1NT8$3mp3V?~U8xab{`)L$z$rsX*p;R0W+Y|UPBW@m|JNJBD@ zrjtSau77Gu_0pOgd6TT_4_Rn#LK5AQrY;C-e@ZR!T{aODN|6{1AVG|Eq){!}VN-rC z@(gJa>KZSVDOq3(KwP<0te13?utzyte^`ZDjy_BP+m&bpXlMWAqz8a+8qm#XAFTn} zCS)z|)Jwl5Hral6Aw*>)FgvegW91d%Y5%IjG9coAjiJ?scNxBBW|dBX0a_>Glmm6Q zYAuXL6s8e8dOert;h7=!9yE0smtmYFuXj4yQxbhzp43end2f4f@ew zI+`NB`|h3#q^$#y+Z*TM$U;hAm@)(^L#p3fun9ecXRr(U0oX(geoAmEK!kii(_h!E z>i|Xp86Yj~)Bg#5D9Kdcg-5p2g}Rrq3xg(!;Hdi!fG}EgFtN@aC@=$U)hgi4qZ}7N z`8_bL$^tC+eVDw#m%P9(ehI@K$1tDoe^!K#>j}sJV z%try%t$^oa)}d;RYEw9J%HWVj{JY8h@1rwMkG*@x02R_*YxC#tXhbWJ-6gPSvz6vT z?>?_x)cQen;bUVN`SS9$_)|hVXdQT0{)FOwJ9RC>n>l)nkT)x8-t8#Uh{$<8vUM)0d=UTNb9$vT>Wl(TkYS8*$lAqy=oRz&WQV%K?aJ|l@rGn66c!Kgq z@PR>W04b@L#~KpY5uEd=@kOaQ%VOU?jPoGd%wx6W#1UY+{rqz!zPjGD6xXhNl$cm| zvzN6F3k@`&^5>!}^7|y$D|seUlQkI4gg_bSV{w*{%-_=|rL{SU{#(Q?Q7{$!mvakh zu@GR-qQ(bsk|8WQEVkT4UdEsi31K5ujxEV^_~N2;cW96wIiX1F@f7;^Lq-jQ55VC~@(-v@3KMj#s2sp_=f>fj?=r)>L zI&jSElU4=bWKcG^MFyUN8!rqAeRJp2Y==SAm-L*2FI>P2`6n*=8@}Q{+TzO!JZf`0 zZNRdAWXj(<#n*nTlKRo&05`` zJNiHUds!4EUTfH$a|pHW0Tnz7Cmr+#&^$H7Vrs{Yxtr+U;WPw@3a7=^Ber+|Uv3$K z0t{#E09-IhMqEhP|7E~*$U(}ID^~d5QtKV`5U_e<>;_Qr?n8EphRAUFXrc&^vXv&M zEu_$3n|Tp7>JXlY-;=5Hj0!tgHcHr@e1}Jn1H^VSXodmiRz1Cn5H4U!fxv-6H0jTG zb3HbEg_ro+b~w1L3oI~>Wxwm(nd`cKog)y6R71#Z`a$ZN3Ad4{T?acZ4lNrn3(*6J z5fqNVknwgW&p0TZei_!3I34$jT%IHF z=aMMQm%q-j&FiOs{%2YG7YgZLgJ(^g*v6+d{h_=)NjA9+TBC|y^>l>t25Ctn^A|0axW<4LvQF zQk)c0_x8YuhiMe9II2Uxr;|NtP=DTo9YQnrG;`KrPi2j;5x-9spDmDA*B*~&d@ z8=8?wD*jnh{8f6%MYMn6MaAWG$+|Hb41t^jJg_dIZYzw53MIE8Ar3V65+B;nSwuD@ zaN5w42%KF(iG?hYU4}*!4 zLP9qF^@%O4_&7offEgqz5LF9gYpNkJVgXuseFQ%_a1DdF&CWSEI3NJ|K6mdkwtKxX zbR7ek6mnj6y3{-peb0rd&2Rg0D?N)!%3p+KNckhJLpc79d_lM+UdIPhDaozIw_P4T z5na^I`PGWpxJ3mi37Y!=eVjRnC_*rq3jKsU7+K;kHt_yFMLb;xvz&OxHDwnpJ+O~V_32b$ zi6S&M=_bL(%1WYoonBi1Hul2X} zxL|Y08iTle0KsE0nGAza(0;6|%j%AU4zCz{Y%Ph@vf|F}i@FQU0AN(6PV2~tc?J91 z4{Oa0eovS%c&1US{49!YG?Q*1grqS`YvDs*e*376&*LO=I4IEX`uzm-q$DC^->X_+ zAp~=zuOcJ8fs96uzJ$XkWjp2KL4s?RTrMA9f_XfU#|*SMld*SGT|w!#RzihOYs=&+ z+tp-+Jf8vjzujNMTXO}x*>ih#4!512+^?Vt^=Ol+w4@|%k?}-q%icAoT7{P^fkhxd zfG26txaT0OO%YdMLP+))D!uk;(GT~Zd?O(doHflv$+?}Yt^S6YsM!XOEPR*OiO?yY zgTF7@HjeXw+1G7HAOBFyTHXjIryFvGb|P@&C)ChSv&&Xa3>?H})yYxZz?a z*!4$MpYe&z-(CQA`X_eKrR9zs{eoz}`8ClY_(MR1E>cnpM9k<}zURP|C5=}JV z{orB6Lk4USZ**we&AuzjEg6b1M(ZHx{Ufc&l0^SHqruVC7yW$VoJ6YJ1kS`4Zykhm z2L+Aa*S161CIEj@MjcS26Xky3e^Rs=O*Sd+{kQmuy|FB*L*R;sNSh!~74;>bb5bc| z0WOs?(qH-8uj`zVkM8mMrv8z1I(aDqA44K>Y$-0JPCKkPWF}+NKl`vNt?cA>=Ydf_ z$22*{8@Iodbm!5zEim*2<8X(jB1UiQPm+>f%?oQ}L#yth%BM@aF$Z5fa=ia~o*+1!)H$c&wOC3bT$@I>cR;BCUf4EO zon_m(fSLOKtu5`Xnd8wDw>zEcUhsK!{+{qO4c=UXx?5X-sg4!+8Bftn4OF(34_^4w z3~gUYi(C8LR`NX-pTkH;UmhK2yLabpl*~6q8a?#AXmFT%)D5ozADeF8RaVRKSxAIm z;8L#Mn{D?vmV#1TD3E3EZI#`E|85NAP^gRa7k;t-@OUcbq z_X(fPWN$QZ5B#Vi?Uv|0HleOb9Q}Ur8vgNJjoWE<{?=cLp?38rrY`|zn)mSU198lxAO|08XevklFf5V0W z)>{m*qVvY5nnH5wgHm7X*CuGc5sXT|>~I@%i?imVP;gL)(gEJ_$F^ns(HT?%_HW5C z$KT_mDw{*HV6LnLuf-wTG9LvT%403{H>SSUcUEC!uXt$<*(NU)Vk?PPD|IJ34 zd76J5#qndsyMdWd)0!xYy25<8R$0`VGEH9?K79Sm&=hf-oVt?F9wt?qiFfwWV4)5p zSP6BV78YBE;(qmgHa!K{PoHTRFE=Zyax{Aotlc!Qy8m8>c<3ajM>x-grbIg9bv`&n<8lOx) zH{ui$rSK|0Qqm!`Onh*778_7XD#&zbe!nD1OqYy4`KIMUe(Kmj2sVzftWVraub=osDy44Zkj)0j7VIJKT>-g9IuTKqOJ8t*1M!AQGB^ow=y`!O@W@^LJyNx&l^ z;R1~5e22>A1K{_{F@)U)wF9a09E?^(74cR5A6HL5>wbREE@tSRk*g?csfH|NWks++ zfd}uh{G+S2kiiHDz@p>famtAmt$6J4E{%(5UdS=ZmWq~mER59pgtNf`3;KbKY@6-& z-n~pKrU#1b^nCQ7`^%NdTJ^@_a;vD01^Y02?|y;}&(5m#)(y`X zwGpAE>9f5)CLf;((XY3&)b)9)#aAm75PdhHZ%lZ{O7wcB9k*M9@|O3n%pyi^BFm61 zwgbafNdCrGvJn?W#$7;N{n=Y;;tR&vzOC>(M=p#rzM5dBot8=ZVNK_Aeuum9x3t$= z{Tqzt?1SD0;tzvLA$N~4NZLdgyDPUoy(HY&$b2GtQDmQXW0#}uks<~*H;)U7lfAv` z^jFTrwk5DYVicG)KDME#!`Ejk&Ro)M-oCaWs~%jJMbq%Se6CcSsd*o!MY2lyZ4S@O z2WBymA7XvOU7_x$>~0ime7AF3HM@eVtf`t7TKd#`&qQ{PMgmJJYPV43o!83!A-1|) zeClZ*)hb27F}C-YYtElf!{8Tre=XJ_WhhNBF1#rtv}pb&tN^p zEw6G{G(+Z=WzaF}ix0EA#IU-@Y>)jT9fssC0=!}3t?y_yY~n~xZ8GhNDPXT^y+75w z?wT@o?9}tfh)Ly?siaSq&2?ef;g(4&FQQ$zf~uiCBT^WvLHPAsoGHzV@lIyll|NY zRL1#13klUlk-M1%9T zFjM5ZaRGQ*9HQ_|`NSQ+$v1&l4Az_J7o6}z6bTvQryVG_rk2Y`=Py}d*~LT{_=`@6 zAHIwS{j&|`tQm=e-!i@q-(a}hgXI%Hd`pP+{z9B2iGTyQML}&-Np|=P6UBG4&7MK5 zx!K`+`dV~e6=U_T#xBoXY_mOI-I6>a!+WyrPOj|x=H%?KdYpKf7lS$f+Oe$U%f#l< z@`)H{e3xtKB9RzO-n+H|uj#b=OC`2@KNFtqk9xvtt;;X)>@J>Q2vyPq*Pcv|8DnRG zvfog_=aly2a1aox_%S?4B`q#F+1=jr0>uQ zPDS$Z;=ZU+&@^b<&_auMlU z>5*x4)QrFQlHxjrd8PZYa`D4>mB+oij|;uyr<~2%XNQHJ<#;* zIsu9_5z)w##|;*NUy?uJ8rOb2iFOGk8ataj_BwZC>*aWv0_l^Da~ ze#Kn^2br=Q^M)&))z%SXW)1o;e{4UY-WrTsXT_9-gX6VLX+Y_5OrF7EX05nYv*d1< z#CSY^1M59w7CeZxT(PNXoRa8d7lX?`{)hy_)wB8Ct-jS#tjFh^K4+q_=FfiNZ>cgR z6~#IX#hPudSt6ALOuZ%c8DLWYT;gGXqmDRC(55W^B58I&(W>dOM-Ag}*QTQY@SD#R zDB_xqq&9|hP~Hz_aTv1y{&D%%!qOgLa52lFXgM>HZmTm{KwOz|RvRDleM-%wQhCxV zQfl*wkPwCQpy)``h4_FR>S=)pTEe&AnjN35`)++u#JM&PxxMMEP);vTK1o^1(@|7j z%Ocg#Xsu7@*rHp!?6h5Jzx*;?0IR<=D~5|zGPEPSwXxS`jQq@dwsdB)y;HmRvWjiU z{aSDy4BDqh-%6Mp(DoI7Z@$ z=7Z;TJd@U*DbTUWxgWEB`#ebUUx2T}5T3d&SCM1;wU>uBg9!Etk(m0gn zsx<16m#JpW>-^eR!f8%O)KNzHCm!{((kLlwB>U#;AQp$#4l2wU(h2;`{Io5*%S$R< zpoVa&=TPAn>%oR_lLs@g#oXrkdg;A!G+vj&1R|swU#da@_j$xKeT#2kozUt%mhc-H zsgtocA?E#UZH-%Urinsj^Nmh7*yd`SG77B@UZ7m5-B6c3YufJ6=;N+qLfg*pGBb7jimsSgqT&ecjMYlAo(PsYQpBn|`;fl!+&~JgZo7h+ z%WXht;7<2u?=Mi5-25RvLU_Zvq-^LzFX3>;Cy8$Frq}Bf;%(v|43GUImi*N} z)pfBhx8h&MU&_)n(v{)TKajQ|PGYt~dOfM4nR%P+&91oJITh0zH7pFP^4A}-lXOLs zMsJ0A2}<#e&uZ3%zA#}I<#w82*2w>M`$h1U@P&G|j%$R@Pi(KM!HyNmSrvXZeAwCJ zS^f59Tc4_y-d*aanwE0;IX#{+SlJUB`WFSoEK5Gyt#L2;{tD0PE}fEf?_yQ88ygcCuP{4Z+;^-s zZ=##bsExS!o_5Kr1>H>tjB~%({w3Sc?XAWkwe*q(yDhF_{Q3m%7lK~pW|6u_h?x(Q z7k%5UA$WWnU@^=4jV~H1n&(25=Q{jLRVFnp+jDb%@IF+XGbjCYRZXO`TbwU3I}NCJOG=KG9?=P9LYf zyY6~#^iJjQ#Qyk3*W-p+qW+^lZa6$T{4e>5vpujvfF+ATN`qCPXvGW@-IJ4}O_4XD ztIrRh6HBm!@oL!kvIRnUNSbq4wp!9o!j>-Ml3spPr^`Da=QVw7WXtX#z}K^%lkoF` zG}S!KTXp5BW=)lB*-9@dvUT1G_I~Nb``~V=qE%Y)*Qpy8Z5-*_&h!CY7yM#K+i3ky z2bJ3FrOPI^%C$Q>d9HqvW++*XS+72K-{s<2@)54RyD;x@g(sCrrkb{0(lmCbUmAUa z)8t*%IcwCo_lLdx^Xwdj&reTV+>Q^<#nICnJb2W9_wB?x6QjbJ`sl%q^h2{{flJ+; zDr3xX+b>j01V+>!Xx0-evihc^NlPd_P_tR6RbLu?EALb|Pt(4KN41#yey6v`Ok|fP z{Yy{YUD!KrAuF8a#4PG^#&hlTz_k8aXw8&LeB#`wo2^r%;gzSsW+Tvf*uP{$E|yvN zl`dR3?PEK0QTTYemiJTDi)1&g_nv4gqe|&~8uRV=o5=2pP2!OWZuD*#IxRX%M=j;O zik~xG_(u&mI_Jv@(~hTMEa?eEvhUEyy@@0UPs;P9XTuP$#ngn&*_$n`LnpD~?PKi^ zTbm!&mX16K4qp3GZqY+#Olc!v&yiM?oA<6q`Ey<9>Xd%?)VX2*RT9!G$Ec}qb_sUQ zCGHnAFONKszL7S3&t_EkI>+;h=Im=n+nD$Bhb}TKthQOFkiYjWY0oLU+#7Ot z_q!RB$d6dX5{xiWK9Qm?tGYJ#oN|-il#IJyKCDHyTIG$NDgVf#l0)kDskCP`p-rXZ z1QuMGuXOyzIiC`|5g*@kdt0p3jRf~_-6-(z#oXP`!fV5l=TF&Ro>R)#rZZf|Zq7tk z6LB!T*6;5-(L9k7$tK;s`+^cxzGM{MtkLcbBg#ci0`|&Oj~7)l*FF#w$?!z^k@M!qVQNh6y&_!Uvv8%`xvUmE6b~LeNS~>fpER%8 zW>jO~DM*|jK2NVK+n0L6D~5NsSZi4zZi`vCa)8x%sMXf=F>RoMtOj5LVMqNaH9PHiSVY82sB;j;AnRvRdLh5;!LxDlJ9-v9k-J>?DJ;0_ z>M?dk?81T)Sa|lhnm1%jHG@?=HEr7hc8&vfA(XA2wzSv1+Kh*~U$- z_ADHZWWsInnSUax?7aR%xxK01Dj@UZD~XX~7r7SB2zmKvMqKwkg~Q7}^~tMY_<99? z1&ca`KGvYML3pTwaPqMBnTc>O8^d;DjB||L!YhsAF-O&$H?LawTpP{uId89=i}R(i zFgg0TG*ddD3rwq@cxJJ!Tvo};imG5ep0mEhs6>7C(VJD0Xgv{g8$pr-kKRT4b*&0u zZs6MobT$RnyS6`U@F0xq?)i90;C#;FR?uws2lsWw!`+=lBHe}iYOjEw`wyQ#yD%Y-1g>T?dC|G=WPw~a{;>6i`@k06_6?izTvQsTq&j*Y zX5;jSGd|U>Bj9A;qD=_f7xkx))*uuh?;dsSWYtJD^WWmcr!qs{cbd= z@g2WP#YZkT;Aq#rVs)`&dU-){r0QBsXuiq)j{PR0xd(NWPBCzhIv?_Lo}%h9q`{v& zuUKaz_o0)OqR}|eAd2l>e>-f#q7*wO1nnj zYq6Pk35I)8iQD`IDG48T?V-`O722=Ar^tNI&F(`SGuu7)uj3r6*4mw(zN<>%3M^y& z*du9u3S-KCW;dp}_f`N zEYZVvltRI(NQ}W^4@E+G=BhhOm(`)&%(d9Gavy^sg zQu)#Tde;vDF2&p3i%w%J%S|>6>?AQJL4s*7TJ|!LR*ilTj(Ij0OU-TCS#q~HnU>i% zl}3@ufP>SiFlV!GJ^~l*HuaQ{%FDY~caI(6!0hp7Q-mRD5t$_=O9O{7S9`{}X~MVy zF_#t|dms2YVB-6wfp~4Yh?a!Hh@i9knVqqVp0)Y=>1(2?jy1jpVf;iVGqkekw(S{r zDKHJluKu&JQzKprT~;KKh!m=TGy-t+6PQ2%C9(Jct^>D{aVuvRf=(W{%X?ouY&BQC zWv9|u{198>O0FAs*G9ZRn|a~m@(7hzs!cj)!pT?KY9EhtnWt;CT)s#7UBP(ulrC|v zxwcu!am6ZJle9K5(F4v|<&Z_DhC{HtkACYhs zxb(chg5x7Ai9nLL?7Lfht`*VURoz@>ef6#TCu;)}JPu|D+LjgHF!^44u`sZ`ePPdS z$3I{6(4~>`&RJ(ui3y&4X~s@%(>VBIBdbo3h2uUw=8gR3U8!P0dYttFL7edSAI1xs z*#soTCsyW27n~=Dw7_q`V|*OOu|cZ3n%qZowz}GOWJ>u^g$rk_K_)(?sNPk<(XssM zl6+wxd(wahRgUZTZ%QVwTNx9=duK#GJy#7TY$#pX6Dt1kMEFhW+eIXaaf=<^y=F}8 z7&~gu(h?Wdvz)|ZV)X%nArB+&t6w;&v~1FGN`9{D9A?Smo4{R_fr+ti`B&#V#wb5i zC>;3VeN%B@V)g9wI^Kxgn2D~iZq}>mme-L}Ih(xOCW|Va$9Zd}pQJVBRx}YfR|a~B zw=JsomgY?t#BQH$Da(#_+5CRtG=J7U=L5JD4}npwg~l?kfzCzZ)KI}Jtm~3g_LrlO zy22nqect)~Gri14ZB&E0ne4F3Bxrjc1da;KH;5ol6G&bkCN9 zoc!!u=pAU=ob41^9mG@5m(azP8HKa8PM8f?tZCoH_Q>67tyE?4@qUt(0L!e6sF}f# zF21bPqWQ~>j0K~$0}eOSf^w(&iUgy{PBHBbk$-SIqYX zi_qS^WYU*Dfj@1PPjhVdubH^N^*Sl3OhJ5$!HtM1q6OqUG(f%(XpjnPY{1p_0xeYJ z^MLqgAgtmET0^h_U*}_iY=kI@P8)+%NCFqs!=FNr{p`hyiS<);vVP*VdHQSo6t6gS zcN4rf@^o4>c>4Yd^=!kJVtU7z11cr7{Ly;pYli1$7d+bIrCYO@`X0rW@!eB>)ZE-D zB&tBbgnd8K*?|pNauH<>G<~5@vS&Mbo(aA#e~~TbgE zU&=L1u~BRuAgAyhg!7Qy2Ob7A$%Ir?PQ#cF3((wUx}AUtmN%JNINN2AhKKp)w2cK- z8d7B30=jVnpG#~xJ{H5XFSfPTXg&5l{zX;OAv?lPJPX+|GTMrUI-dATSwVv`gYI<} zdKUg&hcVyAa0Kv=8*Uk2`gFSeAp?0e>#%Xa>XzUZ{^@Uc?1uOU z9geT`i!OM;5$PBDV~J@T-)!1}r$XsHa}y_%{PMRr9os8nYFg>?swsmm{h$Bh%p? zX?~^E&+k&tO3heny!L8Nr2iIq`Q|V^gHbKR$K99{3C)hO8K(0tHQQW!&p9iO7OxOO^{pK8}lh3KZ@jJ=;k z%X^B=Te3u(WJWvXf#V7NM-ZDCylg;o;!pC${~I!8tYbw20b{r-d?A?4LWGZv4R^xV zKqVV(9R*kU2S9(d*~E3EG$pp_gMtbPnloSEhyO+N`l$#H5S9V~g&|WxL_}{0t;&u} zQ)TNQ%T>9+e9~VXe88-{0aK9!$Hxq%6gQ<5tCg=B%%GldkIEoh3HI%zW|>1v#Zrb; zJj}JU*Z9(uYq2U)0nI(-?&NF+3?rids*SrgCU8R#V-%QS)kM`jci5si%4uOskGS=) zlHUC%M^o^XD&fc0<)k$u+LB!lAEQ~ZUBx?gCXuGlapmFW_dQG`_n++E=c49vo4EIg zOt1j0Hl5HR4$gRE5f%N@i~9Ule0lf+c9EG)U#eW06W_m5?%QLeM;s)lgrV0Hj=oQ} zUfgTTO6HZ9ZbB9E#7#K4O?XhANFFT9OUR})TP`G^FW8FzoS(>N?|f%!6&?3b21+1dP?cztNV}l;j*{5<4W-`UoCXV z9r?cyl4TR{QuXXBmNMbjbyDT*FzX8|Al;VSWO}ckl0Bu3`*d?=&|+y9`^clK`Uz1k z1Oy~+TnKI_pDnc{rSm7q{G_$K{NCo{fQ*y0FHQd9^5XW7UVD|7%iDyr9oWFe^A{;@ z%gKm7>e>60!EImY6aOzOjSr)nD_Mz2V`J$a5Z{!x(mBgWS`KS09wbbj*BS>!)-B(lB~1tnrw-AE>P z=bP=cB#-{$0_^9r4oeklmFj!jM~{Q9ViphymyJ8qz< zYhh`5=|fQMnkBu1bZH{%_C0EL_7muWK(u5&9?ZZ%W?w<^uFS)S4`H6g*~?46a1S{- zIm}U?S`BM$mGf#RbOyp-Vyi;(-OCtc9EiaH5LUCg(3BRz4}KRQUVab*Gmlq+l8X5b zmJ3K>*Ll>)2-LGM7?9+>C2sGSZD?r7z{(c6obU}$A`lwI!$9BDW^M7}{KCRf;I#sq zg%7(X*J&>RV|ez<0;Qj?02rTU?*j9uz$Ds|nzIMN>CF(%c5{`xor-FKL-FyW)qRMl*4PIPIikH{@I<1(%BS)op& z^owBeP1*2yv(5PCLYa2AG4~V?Su@ui`*2WjL*_igPBDF_PUHGshHf5ro_)r>(a8FC zgKM&?Sc;RUsgK(4GF_#dj(d`w{85UNhuBovf@&pLIv=oN-UK*aE{fb1XnYb;UXr># zn?2R>{DAv4&~z9W7|?iUaB!J^nnYuMgZx6np-}4n4+};4l@xHNyD$!i99r5ECKqxp zZ&Q5vd{{i=vjJ^;X%?lfaFa$(9iL6>1QBLTvxo4T-8bfTw(5*h??kO8p=k_erTzyS z+#sGNzvd5*&gEK`hrY#>1^lLTiw6r7W>Qc4D znf=lqsm$>}7LnHc<%NyNK@g!57odw+=8k(rA`6l;N;!r*0=H~&?khaw$@*kDDhfpW3G=Wt(nqVecs_%DYRKamP8;?Ci?H>Ons&5xX^7`9-oFp-_^(GC?~j1ffC2?JSdYPy*ilr&~Gv`H7$d6 zj(C&9nd#}wh>i{28~PSUaP!nKHh#IdXmL+Tsbp*4^VpgrBjtccCSOk{3Nwf@RfhVhrj&V~C1941cc#b{#fm!-;N6y}!mpd~-IP_#MGDl|^Kv{^a+_OPTp}Z>EG#=;JbMchJQsoV_SeJQ zdA9C5k2=fY(~~2@-8cTa(W9H)89VCte{ja{T>SpzH@TgE`_eftj~glEEG;d8^~qOQ z83vJApk0E*L)X^Uzz4D%L^f(mZYNo=H|W5zg$zuk8 zz^RT#zuZpBXvaR}1V=ccV2QXTzmO0msgttWvvi~NFwloAJWatgrKChaza!NPCT9b# zWy=NYg*1S`8nJC7u=yOX$V>Q_hla5M6W0T#U@AU-+zTqq2ylYQ`nssDTG!*#7_=SX zF9$#a{X)wc(67gO^4UPb%LD9ykgh5qN+Ux@{dpv_fOB85+1x;6g`UkRtf(Bb{o=lPidqZqS1`F~`fG3M00NQy-=w$=OkF-@T%5#S3 zWZj1ONf7dA1_^T3Bv1N6u7(kqbVR0D8;Rg$Mk;^Hza?}i6RK<^DC>Um5`tKe@)Ag@ zsgd3l_4kSERC%xGQ=2~zF)RK3`wus%uyWP@GTPA(83zXkL4Eu&jE43j(R(ntFoj7= z2hi`;R(Uvl?zr$?BjU5PU}#dhy~PUybL#nxFgK9_GB{!U_ILB&f&B$Cnt(u38zYhQ zM9jAVnQWGW%o~IxhQZo|%rESrPbWIkyunEVB2TO*eN62SN%PK_n?xk!4pAW)4H`1Ck`hX&j8OKjj0g=Q36<=Irx*)N=vDQ?CUV31%CU8;0ySzy+~DXa z(cOihsVn9>O@sFb*2CQEBSaO+;M2|vm97a1{KWiXE`09bfF}DBG7QE|J)8jZ0yn6n z5;<2S%sCD$_`@6Mm?rgp=3_c#$}(^<0CTR$N&^jG7e`#UFcF}4?p$nCsh8Kx20T#D z`}bKP<#_~G@(?pA87{NZB(cOffvH^MtS#|hiN_A~a9Ut$`mw4#qI=5*;{1vx714c! znb8@zT@MTo7s86ymypo2B;z0BwtADjn4eD}PoDVSnAB_oo&yp2ebjbF8VW`OuA(7N zCi&_F-T8Jw7@r8&Lu%!BZcD`feW_(CgX1hPnh8uGB8FX+agZ9>?T|Ys5VZnUmg5js z*#;F^Sj=sN1uWG-g;N6unzYkTJ$y4zXyvga9XgD1`cgNn3`~QP#I<*Mxe|3Hi0u+fq_iWO3Xq#romk!V{X4~Wre#GS zz0xn=Q3T+CJZ^68%}`PZ3JQW3O+N9aK|$9B2|pUjoJSH7E=HSF4+3gLy9rB?#2HgM zH$EQ4)TNTM2bEhc!Nt`dp%)qAtHQ#_`tBI-BNZlz$XPH;OC&8D?K(D~fSX5IPDiJa zc!0xx{k?BP2;Qz&ug=G5GFO!mg)RV%QGq|>T0d_Ih2b%5qxj0vqRx2@w zM%N+{7qE7E^!dSg!Mw@7-72``)1YzSbd`k&LJ=weSr`UbFWSTWC3I?bN)D6tk05*6 zuC`T!@1Ef$=#CcD)@rR;vu4SWBS%^~I=F!ZzEG3YIDXOcgQUw$HXP(wJUl!eA-tSH z%G!!-yj@pP=34MPrRw7rLf?((^dm9#?&ms?U&@*18xF_t*1wU=DBIG z8ZVjo!ik5eIf^TaJOCA&AX!otNG@DO?#NYm8SyNRs0hk&IVs^G6M}!(YzmN zP@~i{F(G=X@Y#)Kisw`$r2s_h5T~lG$vCF0x?AUk-yc2hd(AWA>I4%|pSN!hEoYZz z)rePrh5a#jfQo_FwRrl(&4^h4efl)$Ys_E2I%lnvlob5;E@aIKkjFRwHtqW4(%Icz zh$sekQ!>I#+&sY);bE77g4~I8GdhT{RK{!25^W50vIG2){H%xI$wAmLW4UaDy6D@p zB2eF*2PG_gwEY;YqckHt5Hm7tDV3)ub`phl1nyGPAN@`2z-#XAy2J%r3>7GfwoAhY z+YRDUckkZK|MZEGDB|Kak)dDmm9jEgD12y|@k>cPK1o;j^v7@r z;fveg=Zo45ZXw6Cj2~i!Er;?jPQPv2wzE0u{oC^pK_0j<0X0I!+6_$o zjI6A}qM`-3U1->N5A8x8vu5Xo084fxZn9Y8bJ-6&S*`K);w^Ktp(D$%Eo||KC`8nn z{3Nmxi=QLsiEAdAct%yI9=;vER69&+R-nctGQR+aNKhfd;#d+yi-aVZ(84B{Or-CT zgbkpz6>rK9r42Yd%3i+wytHT(r(Sbg+f`^H5b1t0TTik=NFESL3fL16jj0v$C<7y% zYQzhP49r6=RTz(yoFwA*BeA73Zi-3Q=6hb#friP{^5xMj^D?yrHo{ z$;a9vQhuyZDuBo2&9e6IjXArSmdRb$ZNduBhh{91R|dvo4%h*r<~kGOct}L+2zz5A z?9$=0j{4%{_4K)Ew)NI$rjq&3AseVC6DzquE8HJ0 zc)#*oTS2U%Q1C!7?KMZb=Iyp&5j99QllNnXoB3*-rb$m`4N$kRP>LY(fse9T9g$qol!As!wI$mS6A9(YGP7^7D8ZNM@I)Oa1{e3GRy6y z^V=Q8Yo;4>S+3WwFU3jrF;0yeL&dz*`1{x@aI(3AkT%2(ogH`-qLe8$g>>=Ai^*Ls zGm`5>O_pF`w2L5(<6&B+oo%a<`28okRLh0%037|qcMHV?rOIO#4uXp9`9d&kKHIPmP5fFz0qxy*gCY_cU zKFIF^rP>C|3Oz$Z4>-xpcdfhYt$+6H0wbdij^fFImi;6tM&hI?ofsK;l~`riPR<3; ziGk=i@*CONDl8OYoNI4y4@B29v2S)p`vc6*F~lbfRlpY=yTrwLW+z`C9!dWZZ_1o@ ziUy8|qooG!y&6RD4N22!NN=3`_<-hyuP;H246^P&N;{bmR$#hj-MZB$jSu^m`w%xP z_^}X06J&h{AlXBV?I0qaXYB&%3U`26&NCB61lWFf6c)^CrV6ilFJ!moncB`hp~eNj zYxtS6nwm9m|B#dZ0;?Iw%k%~M8y7S*Y`_DWi^+GoAWMSshg`#I1YQ)IC2xzj;DzA?EFuUcA_^+Q352I}O&~ z;EVb^gux=zK*C6Gks7u_AQ`E=!!xmkPJvjp1*jBo;={szE3rm|2tg|h500f>Fj~^X zWKf3lv;^#kEP85$5u@i*VE;f%l-G!hJC>mZOvB0J2aF$k;sqax@Bq@5V&m*Z!iMhh zr1j#sCD^`2&=@7+JXqX}5l*9PVPqE2v~e!7*suNlbmYk)1)(2ehxZX<&P1wDEn2}X zCnx6(6+f^KUaxGDwysEpT8Pv*z{WuY)8M`B`NZ}FIqPQZFDU-O^qRz-H&jS<&>}i` zKmh`gQV;ezXf{wDObkLeZZDn*f;jONL?!OWDg=E=VJp|)^d{##CYE0S56znBO;Pg&!xu?PL`k^mH_}* z59BV0mmi@Li=$GT8g>3X8TLp1Gd@0!g*gwWu~nSeEE4V|3=G6+>WEdAP=ZOnKj)Dk zJ%D6&f^E72*x^LB3m^lsDPk*%!hbU-XOwJt6QnO8v4gKIKuT1ZpvizBz2DH#@OpR? zMu?H>yJIbbQMLtK$8Jk&@y2FU|NY5BJRHE1a6 zCm5JewUZhb$O8qo=EUU+w5AJBRf%s9N%?T)IFDsP3AaFxHPDLKU3GR|2lnw0iSjl? zFW+@Lw?Q-X#UR?3O!(lySKQs)Xvi7_Act-bvx7DQum`VN^aDUfs8>7zP@-_~MHv^z zx(f9n1iAL~P9Q%+I-WmAA3@kBzlX?&#ur+M++tl?bbQzV45U!!zRBXbkApMdLb-rE z$_cc2h_)bfY+vEGjDCL2#0T(PKnKV* zGjrFX zf;EKm{1N(D1mvYOBi%?&OIt*N9+5_-KTEL{4iZPG`;(joH{~9NOz_lWd)iE!*oC?w z3yjU}?f3-w0&!0t8bkj1{6f{T)vH%;6BgbJ<;ek;*)f--T`6+A;bufghNPjUtIPi4 zLRA4gX$HQQtP1=AaWG-bR8&6ES_RA;74m*m#{nI_xSBcNQLM@UeJ6`nxMi&(r#av) zWV!x`vmCH-#UWQ}1<_Pu5zE5DVjw`Am*D#(FMx`5^MM=rzuGlRot&JYvdWI~QDuD- z)NX`f=0tK6T#Z#%<*S$oXK!?3@U-PE!zF=R;i;fDcse?TLI`&i1p~Y7(ATAaOyDIJ zZK=&JX-h$S|2gVdP*Nzau&RqyNMjL9eg1swr-|nF5^D-c@hJh{C*0BFzlpuw`UB4+ zcBqw^l=SMIxRuKhISycTs-zk!k|G;V#~s}ZBH)Mcg!Q6k^!}bZqE{r`JqXr`2WQdM z=XvT67XaFSM5+|PVD^Q+5q%F4B2)REwi-;*WiGAqjK`*Ze!87WX(&HYb@5=(StUxI`Eq~b z1|kRroh0?d{6@Hd;4yy6vbjjfhYSHxLqrfEs&En#62y@NNE4Bu%WrJVt*PeSxs&CA5%6XbMitrmGofVPgtC~bHL z6cHG75mGl5>=6jcWEK5jIe|(H_zgF!l)ZZGmq)-@DXz$V9$|+Pub&4A2|l>lXVB9C z?So9+x^`$5=Z!TwVDr3reE1G2pJ8Tj8h9jy_}7wLjC3;LREhSsJ%xziQ7ACQV1R7R z(Z~aI3ioy_ee>o`O@_s0)YN3Kv;Av3HCIMwH9W5Y9+2D)#eoMzVG4Up|sWhGOS)7S>+ zSzRY$_^{9SK&EedL%S6>1KQsmP z;s-bwh>Cb1pGGoT#>f$xi74yT#4rs7t?^>~i$aqRUC`{5AhZRbJ#g-_w5`g83w)G- z@dY*Ozg&d4Q{jigNB%fyXL5ZG<`)r+#xd7_=AJq6t_ zZ|qQ(tfxr+eICsedHeb@Q;LAoRI;h@Frmc8GUP(nrU)>R2Aj;KZ#RH@YiRYMS8_}j zp$VBVGdkGnf+ryjK-mbhsM*r#TzO~bTtLtoT0tmwL6ISL3V~&)BNtGfL6JDn3dyHa z3p`F!o|relqSp%zmt{D%8P}{iZn6RRC<_bsS&_d$@R@QCR-*`$r;aKmB7kTpNqyS4lc2Y;(dj7n5>V9aw z%nDMx5DjuH(RXUnTz}*t>KPvl3V6YlhcB3yX&E8Br~s;OPf53PVQR;1*#{f|c#e|6 z1ftC~2vflrg=OE8XV9Ca*;AkAk|#@b3VY<&i2_7@Gz1@*;st@i)mcxGu+ZXXU$mpf zV5Ag*169Sw@2H!om3n|+2m{S~1B;>ceUN4kL`(UDX;z@dCElN<27qf;Qi{;a3M@kf zaZOuLNQ5?NH+#T{GO2| zl12B4;23f#v64jI*nyOGBU!xa1(%w>pK)^&s^nmt%|Uf9!Ob;N&D z!{k?i7})3|_P(sFjBs}VhLWIwP8A<}Drg%rvr|`rpBrcUL+utV8~C0m^gb&{TZee) z4(JKt>1I$+^wcxdOh|#MPTa%A#Evk3si+`^E5uSSaQcLn)@dlBC)M{SV1e@_*=q1G z*$y<*pxH(GSb=Sy(Pjau1$&OJ)Bt|B*y9I|9=%yA+61Fo0wDs@+)uo_kzpaoF`q1k zUfW01Hjq?VjAN9?x`8F9CRFR-!NutGV8OuhLRBz5Ie8C`c6C3k!^NlW&!SPmmxsu* zuCucmF&x@fA3gQOv#uw`>rI1?8wSe=A^^Jvut!2dFN%M&Rk65;TR*8X56# z55X`Pz&`C<$F<<89qa2*Y+{!a-oAZ5o@lZKq+LUwm;pT`=2x&SSAUa^;*W>PJH4^r zr+5UJ4#dXyLsx0jwLSaxU9sE%92oku?D)Lv`t|X3^qwCMQE7zu9Cgj0Y74>BXaz ze8fr^cgI;vqy9LE;uE`RE~-J*5*RXHnjae1M9-1vZnM!U67~qXoB0_QZD?jSR42+n zm)jARPT#8%SYS8ii?sGMl-j_>FGlZjXXD%wa-8FQJd*DRWCvf-We9T%k-*?#zJgyo zuCw&;5z1Xr({&=-w*yl0#%64W=wLBw*dx#VK|4ZbQ~3GwP+&K?tNWHzK0V2T#F9^j zu=Vhe)xRWZrAF6n(L!db!9}?PmJz_}?_Ies?=+)qxCIy2Dd1r$(rJjQz)W}@EWs+& z$4%iYmIG@G(W?kSATTEipUFErTk^$HQ3~lIZ50r>1-V^3D|$S87Xwo)uYE-53nJY} zx{NCe9-8k4v(^+_6|k3#UG zJtvx}{<~e>-TEl*YH}RJ5S%|oE3QX$KUf?8)e0Of6ct)M62R`>U}r~K*8{Pj+0*Md zVEK(wHT;U)QKWx8&a}&xsf*mj&F$&c`8<3U`?3YO_L*0(%2@kQlfe5^6}M&}fK$Q!cavzz2dO@#wplO$P)aKBK0P zI79K}0CmEj=K*~_02~A~FZvJ#4}97Lg}N7Tm4OZ`={}Rp3SNo&P|PGav(>t;**l)*9M*&Zj0EfcOCe97MbN5o!@)00fNQ54Gxk1Pn;v zs%EuZKyLfYq*kP-F^BjmkOT(_^hdbAzL@6RvW176w*3CCtIf^L9NkC<*1~k!4ZWAm ze0*ImRs{(0cZ~;py?b87OmTF2$XN%pHc3*MOk_nyM%MW{=mww%gjHDh{yiP3OwrJI zfZKZsSqgEe|Q*SJ$juVLQ2dEVN^urb+F zn=M1URgig`)MqdvG86mKO~GY@uq@%N7T&Q#7Bc|^%LBZ58Ve8!;C!S{p4d>UskBoD zr~?4y%_nFWxGgYErbAMf;WR1w69!tA(rcvbcaVMnD$h;u4( zw3F74fy;0(_T`cGvCrb{_1Si-P@kx1L4j5pj+=qwl4$&{IE#=$Ocnr_fHUVt?x4)L zkGJ={fvImFunvz=XMJA4yubkkD>0;0P*gl3DM%5w>SB@}Z=P>!Z+`~o$ZO{%6B6!X zzD_6u)?d6zR*axh2F%4(qSEb#4PnDhZkL9j1Gu?}G*w?-fGY#1OfmwGRO$gv1w=tj zC7#@_E(IeaBhZ#wP;Mk;mp0ValCTq}cF>Nwn`&uo{f2NU5Gd5+RHNQn5eHS|-8?R^ z;>IOs0Jk97GG7~CynqtUVm%`xNS%m2AT__QuSO%!kQ@R@0p7Z~rNs^W&@-^E#h%D2 zRTOsXxZ_KdV?p)7;|kP-0k`6tQ};+K2#tvLexbAho>DA;Tj(Z|>Q%d8$}-{BK60E8 zZ4aWfjJMv834&+N)AoSm#{)A3-t9w531{rtI^pb2gb?81pQ4Vi5Lt9AwPNg7f`z_V z-&5Ld36DuOBA<$^lh|ptkqi|0ue`d=LpWYG1ED^iW4{yE6HtbQ>!aO-dxKz8h$F!q z>YW(IY9osh>-0pAEu5@TMb(}$w!l4$Q;Pw~$-&qEqzYo+X+y(fJL?)$A{;d*(@fluqeDO^Kup>B6Al4Pt?U3SK|yDrmnn`}>4W{STo8n`UPr`JH57hlGvR0f)oDCH3-kwS zVjVD}NzJ}$(Ap5(OCt&ne<|QM)P2&9RP$KcnC&fX!D94xyhxFSvIpwBLMdVIz_NmP zFWnPZzLMPz6o4eMUmDL6R~YO>!qz}_bxyJ`{b5PQ5wHX9ly;*(7-%=-5FEa%cdv*{Yk%iabQM zOhZL;Z5|BI>~`Nq)xwmO^vEx6#XJO6Rjqzh<)HY)m5N{wknrxC9{UUraTR{B0G}yw z^mt3I1L|nP?m&z@cCrIUlr5^VXZm&_+JXo5^fnT`cTh!GGIarD1up+NImu7m6gVQe zl!AnX6yC%O8e1a&!v}hdB&ld|10ln~;+c~pMchZRQ(Dk_A;!|B_GGUhZ}-Rh!ICsZ z)~Q+-q$zKN0|szAV3+(_={7efXPl!?I~L2^4 z2$b$8PVC9HgJ-U}yFWy3Z*F6wX(uTvx|+;<;YeF2X11JyNJu^yK}mL9`bMD;r(^)k zgiZv|P*q1jI~E>-rPrw`4>~hmv?I_G$^!}RU^AhOx)^D|cI>D`kzKjSptfqIEI^;o zGc|QPSg?c0)C%9dTaJ|>4dkE#{bD_&0t9)CisHm6-vWq|K%6+x_95jSDB+ICFDY3H z9@cA|=cLO;S|Rz=92xInz`}(nsIhcB{ru>${o;nkpM6`n_LlAFikj+fteMpb0 z)Rus|07Ho4FX*j%0p1ce7vYW(YbJv6EoT*9p?Gfx4g!W`2s2KBoq!yCll08EJ~>xs zVf>m*4zuy`2cSQ*2x=3dR~*k8U*9YH3==a#WDTTxAV@%8pC71cB<_MepxWq~#=|M4 zyc&R70f=j)aKR(oE2x?C#TXYvnym0o!C}_|Tm+D9GdzV05K3a-86v(D%M5aHzeZL% z0;vS0#A`g$??V721V50i2#SHu{mmg^a14R1?VWHyON(YtZ7G4A02l8=HxW0EB6-nm z6-cZgHm-JFbe99Or8RAjj`9@29YFx!>YkgMOSu1da5CQY2U~t(v!go3)>LOJD=l3~ z$N&h%@YGdqe+I_gl1yzuKh|B3Sm(QH&S1B)oU}Q9{4QlSsxK}R$1jMO zQ9uywc`@mSv{t1zX(+zVrl)2T3TJ@JZn3Ab((Xz*q#Xw)QksjND=#_|?(lj2h~SAUc*foYwE(I?_wk zq^!U}bHV?0QIQv-!%_zahlk+JfTJUai4gGjZkO6q(9GyJ9XCfV3`fc^`mJ-e}}|;f=h32*=+r#bdmnZ#TgTVh2G^ zklz&4)t$ii(t1-hw z{A0T8f*g~9i+@HfDz%}yjVja~3@a3-##4?;8No;KHJp6}#sL5I{I3r2+(+l&Gq|{$ z;azC-1usqRo0a%HlC}e`wObCK76i`k75n5ruS|wGs*fUXN;|S~Zaf7?7iJzXo`M_7 z7|1;H^v`rnyT5>eyqAChUf}mHKGC!QUZbg>3lIC^w&EPcXb9Zw9((J?{59Rc_xBfA z)wk}9_~8o}LHrOdRc3PgF`3FhIfrBrlRGd~#N^Ix^})YB_EwkwJyP;4N?})9%-$_K z@pAxWVD9&?11i-k3Jw7c6oe!EROgg}B^89qlc@rc?gh`@hYu1m%gC@(vSF z8|~EZ6)_CtYzKB;0K9Kv9N~ym;e?|{Dk=gCpG1+sW}PX5dDGN@CH7RM;WYm}fkU}ustQo{b{iaVSRpAuAS zBu7FggAf{V)x2q6f)=yspb3Rbx={)6`RJzPHAFWu%A|3A#zOa)F{n?!dc z8HQJAXr6)173nX5AD(R1AVIj;XcLda%AoA!@<+>hks;R!x*ekXwOpAXe?caT{xgQw zp78Euskns18Q`+EaBKrU%ZaFb8WfzgPa3#ZUe^v~((64JUoZuf` zI)=}W$00Y7m6%B8HD)wjwLfHi{Y5e%Tl`a=PDpydEign-3L>1)P{2+hhP1$vC=>vC zkj8TTDr0c^e+-KCns&IA85B&vkb8S}_TJJl;Mb(%Q77nmC=Lq+@ViHFaByk*L9gdf zk^Zk+L-Bo1@3kzmwUv)S5DfSs=*1_6q=PIqkAL`y_d^=rG&3}r{*RUXbJ-ZG=K$LV z2I=t!%lUbwzy>7`NDd}Qc!nQgWd6M?Xd?KSm;AMVd}Yn(gS)nEJKAmoY=%WBHn`+$ z>1>ca!`MEdFA7^00&hYY^Y6oONqLc-C((8*jX)$Uz*G(4o?#vc@B|qhMkc8bWX3@2 zKwRtlg@-QSSzz}U{A)}qy<9LY2ZZbm={lk-j)prT{U)%MNU4vuPHDo8^fy5wTXy0j zi#v{Q`}ft5zH|%#EV8>;0WmSLV$?C@=edG|shc+QF=lG}AtU(L(L|XL+h=k1YcEtp zgS50f+3@|knwlC4rLQr^2faW-y$2nO$6utehT0ew^yirH@_IbCTzVtPa2IHq3&9IP zhVJI$Lkrm4d~(zjE!aodrMAZ8EK$T`1Kae^S@I{6D*kN#!3zDkl|8hryL}so3kLC0->K$4=N3#0>(J`{K>v~iL(36Qs zF0`tV&i?11`Y$^_NrAf_nDR0k8yjG-i;>Q2I!qxc#H5F8gtXH~(x8lq2{zajbwxLC zV;puJ$Tcv~{kM?VsA`CYCULe#YvQzt$#P`>;Qw#jym>xGLz7cd_5qM43^P2700Z8O z?*AoVVrNjmVMCoHNREUXYlP<^GRuMDK^$vAQeE8SIPCkFo2nZS930cT(o6kodpn>& zoMXrt{wZt@M#pigkCz;P&Kr)l=cnjz-A0E9OCEn3@7Nv2@rMiWW8>}Rb0eewNeliO zDBU@hrkQc6Am{%$eD63O3GwzIH>sqotPpL>e_apkNyxAYltr=EkoYDduM5CwgW*u1 zkmx+7mCgCpUjA=UGWr`%{`<*)8?RJ(jV;xFriqGBZdX-6h5FL-Od>jt09ft>mer&&G7h=UE!j zsJxoDjE(0Z@4Tg9lud_Rud&T{3up*wVn4E)L%>u_SF}IIU-G_!wUJCj*qBm*jZyCI zt49Pguc0MRI@FN*c?XOl*)$Zrc`0=RuWOv?yUz+4+*+n4_*%U*`tG4jCNIa6~vTg_le9*UQS+GOgkb#>;LW*(VyMfaVmk1TuU{47R|3LQP4gVlqZP zAAwQxexA};5k=n3(J7|uld65Fy|_mst{%3ef`%9N2`jymMx9CD_{F0d)2r7 zYAA;052uNl)&+nNkY5f^8kZ$oaJw<;_Z$EIJc|FhD#LT`Ci&A(Qa=zdmohaome=uN zcmG#3AdE1~ec$Sf)0*B`!cw_?9IvE@i*aS+!E6 z5aBo!K4Z!b4c^S*54%IqkpfQG=FXx+pyY=d3qb+ zKvODmGEXpG&I<|(d(6G^?+-cmP=3AbvBss_Jskj2!x5?DbocP^Fo+3?v8A!1r<^xM z58CID%C43vl84;x|MIE7yZ0=&iK#fOqr(QmgZmn};1m+_ zN9Hovm5^llEyRdzFpZ!^u=+cdX$3Mo6VQ_J!5vXBh4}sJkZnSrc=GNgI|GN?gLM}L z#VCdzGm}^yQC^I){TzD8QE9utqU_6Pqi=*lGk~I!BSJYvK-9sxkTltor+`UfrZ-|7$O8xh>knyqPr_wikPCWkLZyFs zN5bc6*(eJeK20?1Aq^-Gox~8fs zu{HS)ey3hoMdxGmsxTNi8#_jY`UoI4{M~A8IMd^FY`yERrmd}j-e6`@O7T3G<7HRu zzo%4p3#4mXADH?S1dy{#Ao%@&TAB%U^mN&tOoOtNQhcH0@^gcTa_Ai%a*B;v6~i4< zQGy$5F3x(oLQT96X0vLtvI|(W&1nRK`AzJX6;G?PI9O(C1EJq|)L49k$!{_)@yA!| ztX&}GSY$Q){;8U|pVG12WKi%h#=KtH>KV6Z;VbgOnc9DUVW7a(8Mo|361jcXYdnqV zT2gDZ0L?=T*d*6cYoF;amX7F8-d+hj96^3}t5aS2#d47E##?=a2%RZ;s}CB2saV93 z(jQ=Me3tq4gOq=lgJ0Lel1`;_rIse?+I|f?d9(Zafw`4?gT~Nk1ld;RCzkVPJ zFUf%};FI5v>T`xlm3_S51Lt#H{7KoWEmZ8Fr_3)K0KXkNeE2kGcQ9_upX`iA1PEn5 zpn7#?PK8VwcCSobcEC_FYrQF+`P=`tcNeb{9y8J_%|mvs!@uqA1x=pOC| zxa;NZz0BwLJ<8S*0%_yELtmy_zw*f37hi|7(vwE+BS?&+wdXE3cel6w#RZSowfHBjWPs!_52zEqZ__nO~Y7 zKD-oFup{OrF-Ny0*%{Cmh6abh&QU+NTT|O&p;>w;(EE&i=}CPt72baWo|ji`{BC;I zN?rp%>ii=bpI%v0{EPPk?G6#vKcE+8phZ~8^=tc!TZbnIeGtvcr=vhdP*6822t~<}{EQtgD z7mkJhu52VXGlgahPH4V3*M>eH5fa9TG|c9_dl0#32Rg^N%2q*{oxy8Xb+A_r*5>IUke zf_W<{&f(zwb)^6AgV#gn?FV!J>(>df6fF)9oYTZO9h6Se-w+?Gzchi(+pE!Tpv-^dh4bLROCF_G}%)JZ><^ukP_TE(}jNzBqN|&@Sl+e}FScy#D!| zxk;BHGg!2$-eq=rs`sKxVfT+AfEB}8P1AnxpBe&K?ui$q{j`vVgV~TOin{dwvbvj^eU}34vlmig>`~^zi#`dJ#b=g znChF(%ZsF0f7mLx1k3%83lfWr379}zCf^}iqJO1u@o9P@`Z`%4zk3T5YOXr5v|Zd1Y;{1GkqM(0UW z4+eI&cdBz3+pqsz;fqx(W#vOMfm;dmFUzvK`ia(l&^*z*p_N;!5zTAxpKmV+{e?7c<T@(27=Ii>^VX$=c&dw~iDg$SLsJTK`s*EOer`aRqR6J2-f;t0@(- zVXAqLbsTiZ=H?e_sg+rdVRHh-~RUpIs&22N$wv@4?OkYy$lTf3$%iMO1n| zCT#Wi?Da>_pFdAKvVu3}`SSo|od3G?9=M$AfsaB`GG{e`SNCIw;N!j_La z4xVBChnGMBq|94^`PIT)P{cmaiPh(?znJ|BvA3kObeZ+qod?L-fL$_yGyGq7-yxYD zR+!PQzf>-T5;kGNbrv{p+?8XdSN2wl-s5^NB<##DcwG^UyTNA zBp5F87=_z$p=nOg{_N2@WA1l)Szev>eGkg%IaSa9^SQ%rK0Dbos{}}noatHs%GsSp z7(sklI$F5Mx0Ah(&&nU^E{cnP3qC^M)k+a|z@Zn1JEXr|7G$Cpr{1`Lcy@-5nuc0E zqrvRdx-I|wQ4;zBME2%|H$U`bTQ<7itJ2P3u@d8*e=6ybYojM{&Il(qJIBUHbg!}KgfNHzhNA>DXU`PhIi9r<4){&SJn5Y@f; zN0}WzAW{6yZ86&t$BR@^%s76<38oMB9@LV z?{0HL8)R`VkpMuvBn(#|Y*)df8o;pQV!T`=uR+lc)NN2s`1cc0nfDI5e<%kD2a-Z+ ze4O5wf{JQ&pJ5An?^BN$)0n~8o2a>Cd+;V=CnDLhzs3ytB(bqbL zGgQ^jKns)RxP$+Ut1NWN#|BEnYzhsbjiV+k% zgDWWWB0K0GNOGC>0jp;SWQf~$@4f|$J7317=UMSM9>qBDnJd`*|2w1;oFgfFXc_v> zt=LiRD2({WhQTR?7?RgtXG+ReBy`b0>-Ghfh2~|22)4aUKGWg-Kw8OnqSAY(mnXTVl&8BTu|ID!a&N5mb{piGiCA&}GIIIOTrQc(FL+hW&|+40$aem8%Huf`oO zOHDW1l)P#;h|C437I|R`1pD+GoiRXn-@Ypjg*0K30s}b_Rje`hfItps9x-uF z`!YL08+DA=Crx0UUPZX#rltcL>2}#}6YJJyh<$%j(Y5>lduPx1=5XN%sl}^v`gOyt zWlZk(>o(?W+LF1o`WF;Tnhxvg_1v3pZ?ONseU*=!j_gp&5l3IJ)!aVc& z634yqntr_M#qS<`3S4=*^ICb-H37hse=B_HscF4>(tE%f1~N&+RNbsV8N?ui9|jaO z&xc|%L=S|^>>poFj`Y;t3D&W{4!ZO0kdVV*W-w$;BLt9Q%np)yK;X8Kzzyx~9e`}2 zLB?aYf7j(KVVc}9)$smZgGlh;)#8e{y)hR|%eqr-4Zca}gtW{abTWDD(~dZ2C$x2WTw6wNtCsz-g!?g5Eq;^`%F z+0xA+%cL3~QpI--p0#G_H8lF9#j>+u5_M?|kah{?@1c#@6VF zgu#2a^@irdnwlbFTFfCD;Fz7It8}*U5>U znVjjpY+4x@MGgMhzx(^n@74wrZEwUF7G2m~xtPT%pH`P;SpU^Y+uP2I^t#@>pUO2@ z_P%$KW9qBfZ&!S;p0Z}8V|CalaXnaQiltxBA$YIpB8y3nK<#q|6`Z+r?9(kig@r1k z_8wQIWInGbUG%Z-UVfwG?4^(K0++&~ZZzF58+felt)I3?w9UePe6WjM>qpO?m#YQ7 zx_`MiT-qf*GacM_S?TbDa<%Q9w{Pcb&OZ6NXW~tS-n)K!-PtVe2V*jP&XV@stZz@+ z4>)LUsZU>8f6#9KW0S1+<<}HFJ6g_NXtPxfI}tu%z4PMK&DS?84eKv`iW{-F~_`JBdfZblaDXH{w07p`6CI=hE1eY^%ZM;YgK*fAQBh5?x*P>rJqLmdoX(%b>cfP z5;7>3k%NnMRi|rjhcGF9K$t{)o zBAm8iW#(cXpQk^^lX7Ff1+Ltblt&-`b}%|VuWUc(S(mnjAwxQL(MIsm{?j>n|B?2m zYnCZh8*rbtb*+J05&(3w+=*|N-F)$nA74Xw{*>o3n=gTm5<1i&GA=WW{o7q;rYzGJ zgHw{Pa*^Ta@0Nzp#(@KF149OgL+GA zXHmAP`O2nk)YcztYnYxt*(^Bem}^<)Ir_}8!c6*PcF2;L_xi!sUnlrqeu`0CnXdbB z^sW;VbMQ6?UzZ0kb4_Rfyp9+B@l2p1ugARQsY|Ja!?HT((vt@|Qx4aNC%eqfiwe$m zZ}`dDlRKsnAM?c2VL*Jk{8NdqKHu{Q`cr}e0tOgQ2?z>0mJ##rREP=RJ^xkQa+&*+ zhdf&OO=u@r*eqn;PZ%t{CELnxR%9B+zUlTM$_KOBAKxcC#me|?5{uk@(gscK68kS@ zn76!{b=zRt_2P7c=&bS9g7a;bHTOSn@v}NfYrbL4QNbVf^UK&jl)vdrKHro=VPLzz znBKrs@Ziep^F8zWRWIu8`kcBxCTqoFdKdPz?q-qZqP4bCfdVIAD-LXlUghRuKJsol z%B@{AN>sLo40Pc zvZQW8FNd4&;~iQ}z09*Nl8hfXSQA_KDED&}wY_=oKG|m*0L$i$l*6)n)u%4<*=}I7 z-aTcs)IN-D^o2y|mDYJLWiP+-4HaHGXggr8<1@_QB4v8Y*GaD_Kq4}Ey;Z)_?8m$t zv+pKUV`depJxi1y#H4Xbdy6&|704JRe-%1%X!erC`TZ3&&$%`vRWx zc<~)I&6O?BGn4i8y^qL;r1#|ymP*VBw^sKo-|Fep|5UwIQgpT8ES^9?-Aj?94u?PU zzB`c`b|~1XsIBn>SBKvGJC05~*_+q*Y5US0ls?dN{AzV)e^o(yf1E{e8(Ud*KTlC_ zvIV;>oyKH_=81XTGaXIrfs#Qp?5>w3OzL*r z;Hz%>G+Hre|I+N8^7CAqWk;NAt;Ta}Q{KAVI6lfN`RT^zm^!^cra-EgMuYOvnX#i9 zGk&qxn=-5YCvJ(3IloU)%v;pa@3mr7UA5~6JIj61G^dd~UjVacXaTHhv*+UzJUv>s zdDfSuGH>k4N3}1av(NI|p6|-Ecy~G{+)HZVWYaq2wtxiDvnq)G)%J_IeS0O*bg%BF z`_uC(^$X$hTYp}rsy94d#O$oS^1~YCDn`H@putpt{{lwVJYCX!!2AqSapgWbNcJ119$FnG4%+db4ce3g=!?1i}xrgDd~kzLN4NS z#k3J0fZQO=8Z5Fu@v>eh49g8KC@pXFl~H@n zAEPSu^ljbSrrkf!-COZGtzLk+PT6xbAS_IgRH9|foxB1@elpvmq|9F@w!a$fJ$K?F zgTVTJnO5f$A;Bf{uQROBsAH_2Ioqd{vEDU^k@sMea$ZSed|1NwwY7I--b~%{qm%e7 z7B6%)a`7G3I|^U!Z9VpMWbCq?u}@i@#P!Qr$8#K)cIn65a8+1=! zR!p&wGyRtIV@7_x&H8l*#J;ADQx920QMQSk-r{@T_uR^gYuR#km6s|blVv)e&X&f%_1dGnG?7Q}#><Vmz+&pk{fQE zyQq@wu>M2rduiq z4F~KaQ+cD9{0j?97`an-6};E&o(@zJK7}wRpj{juYe<=?zEk}s>*vX6Ylrno+r1xo z$>Mj^h-rI)jNL6}_iS@aZYUXi?VPz^Kis0-81`Le*s{BH;@$2E9m<(?;YD7ft3ytX zecY7jB@qu*Dq@IDtQ#mV~{ zR+$ZsRd#9}GaD@*o_X6TeeZQkf9K8W3Q5h2*&lco9eDcHrK#M5Teap+UiaH#ub%eD zVM8lqxo?HqN(|S@Ea`iT7#MtUCh(PjfJfS}j98U)pQ(0VlX94@p|V0*p}oWxRQTU? z&#zq^sg&+@Im^%X#__Op&+Y4KZ=Ay|4^<@w_| zN7-y9{aDP;b1U1LCpZVj)JE^H(uIX`s&|i7bX}gE5f|gUB1b!=nXNuz(|61M#B*EW zQMTFUZ(n|tF*GTMi_Z!_3U`r@59wv!?BYM`?HWJzL4c}^7j$9hEonOSG;eeKAGBX`O}NbV9S>HCbSdHYpo74XtB;hS{5dyk>+@g{o+irUq|lAVy&mEGDn^>^@Tpag2c;-#Y(Se zm#KeUaR;qSOQLe{t@|q!>rQnX%i=Ce8JKW%YqxY#a9A;>oOH%&t+&;uOBtIZ2U{dB zDDR~zr$$7Mcy^eJ$!vH@|9bZiw)$g?%Waok^k2_cUlFPMv^j4$H8a+DnLk74wKAGd zx5D-NV)h3+NwZ_%@iB<`_(02kjC`-J9=Dtp_g<#P!z=E4En1D|;k8Tawad3pU*wYb ztY#*2O0>lcrh(0sUM`=-<9Sn;9boKKF#H)IOkJUdW8CUsz~bjj2;&2opp)k_7& z3%|-xwlzPdnQ97|E%vEMU9-L1#p*Rmf4d%rQ?_>YTy4%%?tbrAmb`mkY*g4O(4D%) zh(%|;;4T-M)O3YcKEapnMHF69r1iRf#@}|}sDki&{m|5>=Ic2Fnu`weE3*d)=G>8{8!#KZ+@UvBAsSCW%S|H#}xKin>S9fF*$GfyQR-K z?wc?1dhE>173Up_UPgOjzD?Qm8W)LKR_RPF~LAyH))y{iD z$3;!}YKj&**bM6(meMYa8R)t|w! z78g29>vF$1Y$Go~pBtWQN5O(>&5wRw5n;|?zeuOoFFoVqti7dN+Y3{UCmi8%5k=ff z={oalSYk{~%O8!733q+&dn`RFWyb9|I(EtH=UE=^-c#4Auduzo_$hm@)XYSdVExZQ zj{EfEmmw-$EaDC)f2 zHnLi^uAMJhbe6?N>4c)Z>Q+0DsqjrW2DzXj+T&8N@)aHw9z!o2!KwkIpgykf75 zeZ@Z~PSqb4cxIhg+%SxQ(YTj@^Y{qrCm1VF_=|1BvNdHZ7-5L94w(;}%ibs2Q z%t~&)J{f({Ft7SmQK&rdAy8$h*t*Jf|v5U()$#Z>5PYp{{Zg;(*# zf(7SK?wlpD_owAyK^p~a`Gm)Za`PsqBKx=_$`4||39vbxNku@owC*^qjZ@lDMF`l^in<#kM${h|`R z4o9;I16V|`Pn^~7Hx;P6q{SQ>XmWrQ2W=%!f12aP)u?H_@m1*Ot(iQJ@l0r_9NgYS(Sk|sG7oq0U@?UoC_YHn~KT$hj?QFmXt=v}d1n`Di+f4;!IsAKUloS`mJMV13{Z&v5Nt+`U% zdv@YGjct8#e_^}j={ZtJG%g~gB)1-|Ol|t4^ zg+gPmtcey&*~t>xY*Ug@36*_`kUgBF$W|e{u^zIoku2wV-_0|>ncsEI{PjGptE;p5 zI^XTSKlgpV->>%~BdyW$nMsC_t3`j|H(Y;bliP&2wkXFH($lR=x4^I2LFY=5dp` z_Rjo##Rzpmp+M0_uX$Nbc8r)s5WO<#nF;;jHH%gAqmEZII>V@EPtP@LSn3^S5EplJ zlcG?U`Ekgs3R@|`|4Mdk)0T_`rvXbpD{ILob^Sed^O{O3zW!#%^o1!Ev1gTc%gwVa z+@^4Jcu3eczoYNA>ZXr1e%x52AQWTzb2O*B`TC_fryfjlRew>vuTkD$^T|zhMMTcK z`5ZTaYT9aF$+6duDpu8Ns_fS6GI?-_60!ESb+<>6eRi$K!x_UC`pB^O+_vhummJex zj~cj>XXLqU(%i1!?DO}s7ZiHu=hv0QW;DL0!zbYNJCBFDPe(f**UPyzVoXde3@`9e zd&I0pe=;X}lYf6z=1+=RGMfI?9ddHTR>3c7vRSsNX>@BSw}=I4u-yOk(3}&?wOWM| z_ppuwassy8e2vUMrFE@7IqvJR|5xfFbz-8-hxx-nVa+b_PN7+84!JOI{~5uwRx*iD z;3{ULw$-7_TKIbqg}awda+wOZItS{S5f|`sig?@{08US zd-ww8@(%R!h%tySDwU)MEV4%ZXDLA15b zckF*FGq_Q^{Z_bz4HFKY@$8bvDN2uLX2o2hyNmCs&N^GM1nseXct4QW**!15%Vw17 z{p#q;A;y?FDlK6w&Fy`xn=qeSL||zj&lRuJ?86Qy7{{B~mpL3i)B4ItaQxKbLfz&< z+tP`h{?>Y1URW{-hk51?F<%U)v|f@pEkjDWu1*NA**>=_#eGn&1o~# zy^wZ?*3TlRX<~a+B#7fmwJeKylF|lmk<`i-{|}BI_iprSjKA*Ty<~9ICw)Q7^7VbQ zD=Mc#6mL|#_VZ?;e9bXzir5}hK*L?rI5D%K-;t3eTl1`k;*A$^YjPLG&S~iFds1e> z*(v3+!K{{>((JtgqqQ9+=?kTmT&o25g6X`YA8E})LfzCfV~wMfAol$txfNHpg?q~K zU(U^wy;5KZG3x&Q=pc-x{idHrRF1qyld;IwlhQS+U;9Jf+2AtaQZWC0F!ktv4X8_# z3i4$4^RXvx!Xk3r-hY%9dIKJzo7=+d1_6;`!9p5eUXTjs;GLaVnOK{hVeSx{r9>vl zg7utcJg28F1lw26?QH7V@R%-eb`}4I$4BG5W7WIZYP@$lca?j1By&($V|&}qlJIE zy^_&^a<2wu_w{W zvSp}pV=Sg;q*rFMF;SbH_RKoEGh`WzaRsvTii%u#bj;pVCgy%ktR3%Szsiy)eTxf+ zTc$fx8`=va0y?ul*d$w}MP4{&^J%l(J*8}arTpktD;ajt#dh z*J4WI>*&TuA3`mqyBv3XJuTlnY5Ta_z#(9pcT*Rgsu$wUbYk!IZ5mWP+o3tFXCW5? z^ni>IpeK~QL&MeYxL%?r<{+_ige#% zc+CCvf^pO){`REsF!SXO^w&C{x>dEWhI$`UqQ6^}zFz)lJ>yo)#ftwm7c1Qf71o1Y zXP5vM>}#hlN)UO2Xu%X02{D2AZtMR24}n`l6jd>l)GNOA4fn%>2wl?8;5ZNlr3@5= zKrueRqPy;w-QYnXEYld4#m!szUB_}o*2|q^!~{2oTl+8dZ;I6yk{0gV<%|5#cNQ8V z@XAtJ^W91(#&mbAlXhaJ^Vn>)$R4JBO4Y52vtpKem1cW+ZQZH%)>ZNaD;nI`Tg1*Z zyIX4*ty?s^IbwJ=HIzfd+kr0q&0R$y$2FY(a=JY6y_I^VKBX!xB2}@-@V%>DdhXs# zo)d=kH{(|**qr0f4>0CWEuN4(`6hSQLDh3&>lVM&%dEX5*>lWCAw?i@Rq=DWAB9_` z+Uz8yCAwIpI=0U0KMs~)U2%5(^Eg^W1VSWR@X~IEsR6M9g$vUyRF)`Y$nCZkVmg3n zmf0^+0Z5kk8I%us>?v7IT|==9?dFx}s%#c{s@4KDbIc|B$G73mUrgvMBjK*cHl@hV zMH}B=Qz+t6PG4$0vGn8MXmFnE?E=G;+2nIsnzb!bPttQ`9-Wmn@{YLDA9=5TSx7*H zbLH#YKlW+e4gCjCV$*q89Tr$ytp|(tlrGh@;|VDhPxx6gL+II`J{O+T+j@9LGN0y+ zTEfF#Rr@gXf29i=K?CHCgrOkoBPyLx7-QNDfma`4l=qvqE37UFpAtm8wFk1!iS(M`S|scb^juoIdui&QtIpR zj`A8$*QE$#SnUaTV#B44e`zhaw4g}>O5kbDOf5e188}=|D7tj|eb=rCfrAy%TL%GO zi*tT2odPmb5QGToF8y$~d#qR%#(qGz@lpYrjAs=^;J5^Dojv9W@8F9LIWN z{=dGEfJ4Z#*1sJLb5DyYc!DkReOBMBpb;=ZPTM12#mfJjj(^Z8yySy~BGcf;pF&w5 zpE6&yzaLMZItI2J|G(b;ho{U?zU#YjjDv=9$%~N?|}=PLp?^LK}$T0h%&k1*nr|mKu#_K@-LDf{*(3& zha+Zo`{A>KXi))80Z5F!b($w=5V^CeXKI9M(Joi*!Q%MaVMmDN)|ae|NlCO6_xRSy z!pB~E#t009jfp1*t|jt``TP6N7SZSAK}|h^6?&))h`x4Xyw1SA@^h1jCztKooq_4s zKR+)&Kc)cB_EBI_pddGcv1$l;eM>&q^RKsNflaEv{y8t?3NAM z@ONOZ`fl-F`q&Xzms5T&BPZtp>VG^U5MXexBcUQnW1~;5w~0ibIifJ+k^5_ z5}OcRvl+(OvCrUR{h}@CpssB@8z;Zu3oy_a*j0cpzAxS2p#ov=^^-AmM}n(*vGl1w za`ItNkOCY9xm|ucuIVS#Z3(WTL90VH4y=hR7>veHV8B)433=S^vf%oiMCZ^fQoI?? zBapoj3v)N>_udHCku&A@gcG?zLlrHj%Y$ z+vW|tiAjq%*d8?8SG1WTy1P~ATAkKR1FEkex(3L@YzJZg5)u?7As0x%XwWbd5ZHFB zFTxdt_RIvXf`qK&oKKpzyyp3v6S|4=8X%?$?2%3W*S@?5Q4pB^ z5UG6mbRC8;0(*lxw-kq_1kx?T7Me1DCWvpu3xf;`0A)Tw>(2FTB{vMN?H z8e?oraGh>bpA<6<{OWAnzfX~z3sao%KYUHOFh}f=EqKfqws?)3Gw?3VJ^{|q0TI3B z-oB)DXNab*Ek6|bM1gQ_grYEbn3R&2m{oC4s=RN~2C|_^XzlFEh2j7#5ugDlSJ?Ur zSHsL!*aM(3SJ$*rex4@03M~^*MoNbcF)P$9C17X{8i$N;CD(IDhnX zT?5aRKdz_{Jrv*VfXSwwPEd|Fm#H7VoLHPNez#fp z+JM+H0qC1YUkI%-5aI6duv0R>wXD?(r!5`EP3jCBc&4VNB-aWt*zetE+6dki=&MC$ zYL!HLCDFJ;qsz0~WC>Q!KpB!F%}(a$Yc~?g&#j)Pe-X+bg(v!bSsAr^{38q|cN;`I zlhT{!NvQ!xNA_+w+@zwG_?lW-*Sb1&vt^Qg{ZihMeIi$$?PIBIfD} zqu}?k*cEg$w2^kK5keNrt|zKt5G;GC5We5Xuw~f~)cHbRKyDQwy z1C3tNpP;FTMt*$e=cC`!-og_J@D(YO(*paRrsulBWI6<-RTErfjsT#$H(!ivf-djY z@Z_v{9ODW-?OFG$@!k}dvJn(S7yN>yV$z$4-E0_twHeAbM>Mg+V7oWrL?nlIZL!p* zJ;;UPJRkNLMvy z76n$c6@1Wji*RdMUT%IVH1*bdx3|@x%a6c~Kimv~1Ge>!jDIVHIu9_9{O#$!|LfBx z-K4t=3@c5E7T~Y#{@Z2`63lP4)Sxgf2_;q4=fk!GH((`zl^2x&LkC(NDrWB@BaJG8 z3E4xnFO58JQ=l`hk_z*3O{#^CK=fpK0b1m&K{>p!@TJ28w9TMpLRqX2yHDs_fDcpbI|$0zbI{76rU)grt8 zm{2b0S<$3H(0;~P577Ib#H19kI&Q+S0oPYC3LE{D8C;7t_;Pz}dz6q9d;Ei$5g^Fcml6O%^HP9y5#L!;{U@kKl2P0 z@LS<{^(xIlPfQvggtK;6ks4Y4;%p z%jCGDpAd42Bt1D=$H0h)YGw`@(leoU>cW>t#TS}ClILGse28>e8X8=LpooC29uH{f z&$K++eRdg)m}H`bwk!)lkVi-*4YE3l=`K-^*;N22c0aO6IRgX3ORJU;48k$p%5db! z5u{9n!_@HtBdAMx{aZ;#^CLk!02B#Hus&u{3pm4K@7xop_zfOT2G~mcMe+!aegb1w zk~&isW%F93Us|J42`!ReRA`K_Rw+WcMw>lNFfL^c(D*9;@VP<#*jN`A<*_Vyr z@BAR*6edlY9GO%YE85veowG$_@@wf<4L%c9b{!aE7DW3JqhR_)=%^F9i*MjfqH42aW zFx+?IV+9R^nZV-ml#%wzr|fb+>mOGsGj19Stm4A?-1W>~gp}=G7qv-1GP(SL>F=-r zx)B~-q!AnQ%DPNRj&u{VFD_cNX1pgGK*xtU9EhM7DFCA?KF%({w2T)GwJX)iBmWY< z7z55a#eGU0_wyn2EWZB!XJJtV`EoqIW$6VxH0A|$4+O}^nri!L^p^a{yO{XD(=95& zY4^m(+r;~~m!c~6btcTd3F^O44l4?{J)h!Q{wzjX@ZkNfr_|%c#&^ACT*hBqe<1Rl zeR7ZAqPo>eg)ai88#i}u9Q<{pK~$pl`=a1?*$r`NqU|#?le3QtUww0HkNnN9dHB2A zj&-a9DBYyozrVx;%y??ZE?sjXa zuYZ}IE)e-4wD3}Pivvn4g9a7`1}dGy8^OX13`XHHS2Q}UPhy;ED-~W|wb184(x(Q7 zhU(zB$!>E(Lo+pcSz*d3_<6lf%Npf!oUQNeIapebI3ABd3CuoZN$;NaG#`Pd{&mYs;cTm7njp$Kx<1%ZZLUKIU*Yx8{=zgG!Vx4 zzTe&)V1GaxTY6sxsZ1_RWwLSL%Z-UGZ)Id)@J!`n_b#uf5Hp-~#^LKaQa|Y0X`8!O zPA&yJdHZ;Z|69|HrW)_4(?VH8P4+sjWlfzGnFn037y7NW>xh$!aD!C1rb{(gjOPm8(TYHKELU&$YC) zqH#=iPy&)-V~^H^iY~?ADH_hE$3w~2r=XTAwsZ022WjE+jGz^Eo^Mm1C>lm z6%JuP$I3V+te2S~3}uulSstlW_iH_zyOI$X@f_vMAv=}d;h6um<2Cb^Am!PGt1$McfIb&o+uv&U9MevSO#Xr+ zh*@~^ix)qkr2l#4&Bg11$HBo1EbUfzcP|(i7_jf4GUDit=2jyC<&|NoO05=%1i&PJ z9q(3Db~W0N-?7*b{~^ zg|ILLWb2y)!;Rp{Q3^X=Ts$ykjr)PYlQkqXbf2_z5?GzcB?S>uDEL^y^XJ1(9Bg>o zL-2Gt{$n_>Ogu~#!MxaGDVwevm`{s^tuc14k{qk{s2EoAA@Z}<*jxIKM|k|pk3T`4 zQkj;F{Q2@pPb&YPk5~Ql<7kto#*9o(f4(eypo4r#^5ePm$G!05@e2m8|BsE2@$gJb VXl|EK%_Cb?KB9Iw<&g2U{{fC5Q= 0); @@ -138,8 +138,8 @@ void compute_avgs_and_dirs_3_comp( const partition_info& pi, const imageblock& blk, const error_weight_block& ewb, - int omitted_component, - partition_metrics pm[4] + unsigned int omitted_component, + partition_metrics pm[BLOCK_MAX_PARTITIONS] ) { const float *texel_weights = ewb.texel_weight_rgb; @@ -279,11 +279,6 @@ void compute_avgs_and_dirs_3_comp( best_vector = sum_zp; } - if (dot3_s(best_vector, best_vector) < 1e-18f) - { - best_vector = vfloat3(1.0f, 1.0f, 1.0f); - } - pm[partition].dir = best_vector; } } @@ -293,9 +288,9 @@ void compute_avgs_and_dirs_2_comp( const partition_info& pt, const imageblock& blk, const error_weight_block& ewb, - int component1, - int component2, - partition_metrics pm[4] + unsigned int component1, + unsigned int component2, + partition_metrics pm[BLOCK_MAX_PARTITIONS] ) { const float *texel_weights; @@ -338,10 +333,10 @@ void compute_avgs_and_dirs_2_comp( error_vg = ewb.texel_weight_b; } - int partition_count = pt.partition_count; + unsigned int partition_count = pt.partition_count; promise(partition_count > 0); - for (int partition = 0; partition < partition_count; partition++) + for (unsigned int partition = 0; partition < partition_count; partition++) { const uint8_t *weights = pt.texels_of_partition[partition]; @@ -349,12 +344,12 @@ void compute_avgs_and_dirs_2_comp( vfloat4 base_sum = vfloat4::zero(); float partition_weight = 0.0f; - int texel_count = pt.partition_texel_count[partition]; + unsigned int texel_count = pt.partition_texel_count[partition]; promise(texel_count > 0); - for (int i = 0; i < texel_count; i++) + for (unsigned int i = 0; i < texel_count; i++) { - int iwt = weights[i]; + unsigned int iwt = weights[i]; float weight = texel_weights[iwt]; vfloat4 texel_datum = vfloat2(data_vr[iwt], data_vg[iwt]) * weight; @@ -378,9 +373,9 @@ void compute_avgs_and_dirs_2_comp( vfloat4 sum_xp = vfloat4::zero(); vfloat4 sum_yp = vfloat4::zero(); - for (int i = 0; i < texel_count; i++) + for (unsigned int i = 0; i < texel_count; i++) { - int iwt = weights[i]; + unsigned int iwt = weights[i]; float weight = texel_weights[iwt]; vfloat4 texel_datum = vfloat2(data_vr[iwt], data_vg[iwt]); texel_datum = (texel_datum - average) * weight; @@ -414,20 +409,20 @@ void compute_error_squared_rgba( const partition_info& pi, const imageblock& blk, const error_weight_block& ewb, - const processed_line4 uncor_plines[4], - const processed_line4 samec_plines[4], - float uncor_lengths[4], - float samec_lengths[4], + const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS], + const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS], + float uncor_lengths[BLOCK_MAX_PARTITIONS], + float samec_lengths[BLOCK_MAX_PARTITIONS], float& uncor_error, float& samec_error ) { - int partition_count = pi.partition_count; + unsigned int partition_count = pi.partition_count; promise(partition_count > 0); uncor_error = 0.0f; samec_error = 0.0f; - for (int partition = 0; partition < partition_count; partition++) + for (unsigned int partition = 0; partition < partition_count; partition++) { const uint8_t *weights = pi.texels_of_partition[partition]; @@ -440,7 +435,7 @@ void compute_error_squared_rgba( processed_line4 l_uncor = uncor_plines[partition]; processed_line4 l_samec = samec_plines[partition]; - int texel_count = pi.partition_texel_count[partition]; + unsigned int texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); // Vectorize some useful scalar inputs @@ -483,7 +478,7 @@ void compute_error_squared_rgba( // to extend the last value. This means min/max are not impacted, but we need to mask // out the dummy values when we compute the line weighting. vint lane_ids = vint::lane_id(); - for (int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vmask mask = lane_ids < vint(texel_count); vint texel_idxs(&(weights[i])); @@ -572,21 +567,21 @@ void compute_error_squared_rgb( const partition_info& pi, const imageblock& blk, const error_weight_block& ewb, - partition_lines3 plines[4], + partition_lines3 plines[BLOCK_MAX_PARTITIONS], float& uncor_error, float& samec_error ) { - int partition_count = pi.partition_count; + unsigned int partition_count = pi.partition_count; promise(partition_count > 0); uncor_error = 0.0f; samec_error = 0.0f; - for (int partition = 0; partition < partition_count; partition++) + for (unsigned int partition = 0; partition < partition_count; partition++) { partition_lines3& pl = plines[partition]; const uint8_t *weights = pi.texels_of_partition[partition]; - int texel_count = pi.partition_texel_count[partition]; + unsigned int texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); float uncor_loparam = 1e10f; @@ -637,7 +632,7 @@ void compute_error_squared_rgb( // to extend the last value. This means min/max are not impacted, but we need to mask // out the dummy values when we compute the line weighting. vint lane_ids = vint::lane_id(); - for (int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vmask mask = lane_ids < vint(texel_count); vint texel_idxs(&(weights[i])); diff --git a/lib/astc-encoder/Source/astcenc_block_sizes.cpp b/lib/astc-encoder/Source/astcenc_block_sizes.cpp index 9459be9dfb..3c40eeaf17 100644 --- a/lib/astc-encoder/Source/astcenc_block_sizes.cpp +++ b/lib/astc-encoder/Source/astcenc_block_sizes.cpp @@ -33,16 +33,16 @@ * @return Returns true of valid mode, false otherwise. */ static bool decode_block_mode_2d( - int block_mode, - int& x_weights, - int& y_weights, + unsigned int block_mode, + unsigned int& x_weights, + unsigned int& y_weights, bool& is_dual_plane, - int& quant_mode + unsigned int& quant_mode ) { - int base_quant_mode = (block_mode >> 4) & 1; - int H = (block_mode >> 9) & 1; - int D = (block_mode >> 10) & 1; - int A = (block_mode >> 5) & 0x3; + unsigned int base_quant_mode = (block_mode >> 4) & 1; + unsigned int H = (block_mode >> 9) & 1; + unsigned int D = (block_mode >> 10) & 1; + unsigned int A = (block_mode >> 5) & 0x3; x_weights = 0; y_weights = 0; @@ -50,7 +50,7 @@ static bool decode_block_mode_2d( if ((block_mode & 3) != 0) { base_quant_mode |= (block_mode & 3) << 1; - int B = (block_mode >> 7) & 3; + unsigned int B = (block_mode >> 7) & 3; switch ((block_mode >> 2) & 3) { case 0: @@ -88,7 +88,7 @@ static bool decode_block_mode_2d( return false; } - int B = (block_mode >> 9) & 3; + unsigned int B = (block_mode >> 9) & 3; switch ((block_mode >> 7) & 3) { case 0: @@ -124,14 +124,14 @@ static bool decode_block_mode_2d( } } - int weight_count = x_weights * y_weights * (D + 1); + unsigned int weight_count = x_weights * y_weights * (D + 1); quant_mode = (base_quant_mode - 2) + 6 * H; is_dual_plane = D != 0; - int weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode); - return (weight_count <= MAX_WEIGHTS_PER_BLOCK && - weight_bits >= MIN_WEIGHT_BITS_PER_BLOCK && - weight_bits <= MAX_WEIGHT_BITS_PER_BLOCK); + unsigned int weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode); + return (weight_count <= BLOCK_MAX_WEIGHTS && + weight_bits >= BLOCK_MIN_WEIGHT_BITS && + weight_bits <= BLOCK_MAX_WEIGHT_BITS); } /** @@ -146,18 +146,18 @@ static bool decode_block_mode_2d( * * @return Returns true of valid mode, false otherwise. */ -static int decode_block_mode_3d( - int block_mode, - int& x_weights, - int& y_weights, - int& z_weights, +static bool decode_block_mode_3d( + unsigned int block_mode, + unsigned int& x_weights, + unsigned int& y_weights, + unsigned int& z_weights, bool& is_dual_plane, - int& quant_mode + unsigned int& quant_mode ) { - int base_quant_mode = (block_mode >> 4) & 1; - int H = (block_mode >> 9) & 1; - int D = (block_mode >> 10) & 1; - int A = (block_mode >> 5) & 0x3; + unsigned int base_quant_mode = (block_mode >> 4) & 1; + unsigned int H = (block_mode >> 9) & 1; + unsigned int D = (block_mode >> 10) & 1; + unsigned int A = (block_mode >> 5) & 0x3; x_weights = 0; y_weights = 0; @@ -166,8 +166,8 @@ static int decode_block_mode_3d( if ((block_mode & 3) != 0) { base_quant_mode |= (block_mode & 3) << 1; - int B = (block_mode >> 7) & 3; - int C = (block_mode >> 2) & 0x3; + unsigned int B = (block_mode >> 7) & 3; + unsigned int C = (block_mode >> 2) & 0x3; x_weights = A + 2; y_weights = B + 2; z_weights = C + 2; @@ -225,83 +225,89 @@ static int decode_block_mode_3d( } } - int weight_count = x_weights * y_weights * z_weights * (D + 1); + unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1); quant_mode = (base_quant_mode - 2) + 6 * H; is_dual_plane = D != 0; - int weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode); - return (weight_count <= MAX_WEIGHTS_PER_BLOCK && - weight_bits >= MIN_WEIGHT_BITS_PER_BLOCK && - weight_bits <= MAX_WEIGHT_BITS_PER_BLOCK); + unsigned int weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode); + return (weight_count <= BLOCK_MAX_WEIGHTS && + weight_bits >= BLOCK_MIN_WEIGHT_BITS && + weight_bits <= BLOCK_MAX_WEIGHT_BITS); } /** - * @brief Create a 2d decimation table for a block-size and weight-decimation pair. + * @brief Create a 2D decimation entry for a block-size and weight-decimation pair. * * @param x_texels The number of texels in the X dimension. * @param y_texels The number of texels in the Y dimension. * @param x_weights The number of weights in the X dimension. * @param y_weights The number of weights in the Y dimension. - * @param[out] dt The decimation table to populate. + * @param[out] di The decimation info structure to populate. */ -static void initialize_decimation_table_2d( - int x_texels, - int y_texels, - int x_weights, - int y_weights, - decimation_table& dt +static void init_decimation_info_2d( + unsigned int x_texels, + unsigned int y_texels, + unsigned int x_weights, + unsigned int y_weights, + decimation_info& di ) { - int texels_per_block = x_texels * y_texels; - int weights_per_block = x_weights * y_weights; + unsigned int texels_per_block = x_texels * y_texels; + unsigned int weights_per_block = x_weights * y_weights; - uint8_t weight_count_of_texel[MAX_TEXELS_PER_BLOCK]; - uint8_t grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; - uint8_t weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; + uint8_t weight_count_of_texel[BLOCK_MAX_TEXELS]; + uint8_t grid_weights_of_texel[BLOCK_MAX_TEXELS][4]; + uint8_t weights_of_texel[BLOCK_MAX_TEXELS][4]; - uint8_t texel_count_of_weight[MAX_WEIGHTS_PER_BLOCK]; + uint8_t texel_count_of_weight[BLOCK_MAX_WEIGHTS]; uint8_t max_texel_count_of_weight = 0; - uint8_t texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; - int texel_weights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; + uint8_t texels_of_weight[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS]; + uint8_t texel_weights_of_weight[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS]; + + promise(weights_per_block > 0); + promise(texels_per_block > 0); + promise(x_texels > 0); + promise(y_texels > 0); - for (int i = 0; i < weights_per_block; i++) + for (unsigned int i = 0; i < weights_per_block; i++) { texel_count_of_weight[i] = 0; } - for (int i = 0; i < texels_per_block; i++) + for (unsigned int i = 0; i < texels_per_block; i++) { weight_count_of_texel[i] = 0; } - for (int y = 0; y < y_texels; y++) + for (unsigned int y = 0; y < y_texels; y++) { - for (int x = 0; x < x_texels; x++) + for (unsigned int x = 0; x < x_texels; x++) { - int texel = y * x_texels + x; + unsigned int texel = y * x_texels + x; + + unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6; + unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6; - int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6; - int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6; + unsigned int x_weight_frac = x_weight & 0xF; + unsigned int y_weight_frac = y_weight & 0xF; + unsigned int x_weight_int = x_weight >> 4; + unsigned int y_weight_int = y_weight >> 4; - int x_weight_frac = x_weight & 0xF; - int y_weight_frac = y_weight & 0xF; - int x_weight_int = x_weight >> 4; - int y_weight_int = y_weight >> 4; - int qweight[4]; + unsigned int qweight[4]; qweight[0] = x_weight_int + y_weight_int * x_weights; qweight[1] = qweight[0] + 1; qweight[2] = qweight[0] + x_weights; qweight[3] = qweight[2] + 1; // Truncated-precision bilinear interpolation - int prod = x_weight_frac * y_weight_frac; + unsigned int prod = x_weight_frac * y_weight_frac; - int weight[4]; + unsigned int weight[4]; weight[3] = (prod + 8) >> 4; weight[1] = x_weight_frac - weight[3]; weight[2] = y_weight_frac - weight[3]; weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3]; - for (int i = 0; i < 4; i++) + for (unsigned int i = 0; i < 4; i++) { if (weight[i] != 0) { @@ -317,116 +323,116 @@ static void initialize_decimation_table_2d( } } - for (int i = 0; i < texels_per_block; i++) + for (unsigned int i = 0; i < texels_per_block; i++) { - dt.texel_weight_count[i] = weight_count_of_texel[i]; + di.texel_weight_count[i] = weight_count_of_texel[i]; - for (int j = 0; j < weight_count_of_texel[i]; j++) + for (unsigned int j = 0; j < weight_count_of_texel[i]; j++) { - dt.texel_weights_int_4t[j][i] = weights_of_texel[i][j]; - dt.texel_weights_float_4t[j][i] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM); - dt.texel_weights_4t[j][i] = grid_weights_of_texel[i][j]; + di.texel_weights_int_4t[j][i] = weights_of_texel[i][j]; + di.texel_weights_float_4t[j][i] = ((float)weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM); + di.texel_weights_4t[j][i] = grid_weights_of_texel[i][j]; } // Init all 4 entries so we can rely on zeros for vectorization - for (int j = weight_count_of_texel[i]; j < 4; j++) + for (unsigned int j = weight_count_of_texel[i]; j < 4; j++) { - dt.texel_weights_int_4t[j][i] = 0; - dt.texel_weights_float_4t[j][i] = 0.0f; - dt.texel_weights_4t[j][i] = 0; + di.texel_weights_int_4t[j][i] = 0; + di.texel_weights_float_4t[j][i] = 0.0f; + di.texel_weights_4t[j][i] = 0; } } - for (int i = 0; i < weights_per_block; i++) + for (unsigned int i = 0; i < weights_per_block; i++) { - int texel_count_wt = texel_count_of_weight[i]; - dt.weight_texel_count[i] = (uint8_t)texel_count_wt; + unsigned int texel_count_wt = texel_count_of_weight[i]; + di.weight_texel_count[i] = (uint8_t)texel_count_wt; - for (int j = 0; j < texel_count_wt; j++) + for (unsigned int j = 0; j < texel_count_wt; j++) { uint8_t texel = texels_of_weight[i][j]; // Create transposed versions of these for better vectorization - dt.weight_texel[j][i] = texel; - dt.weights_flt[j][i] = (float)texel_weights_of_weight[i][j]; + di.weight_texel[j][i] = texel; + di.weights_flt[j][i] = (float)texel_weights_of_weight[i][j]; // perform a layer of array unrolling. An aspect of this unrolling is that // one of the texel-weight indexes is an identity-mapped index; we will use this // fact to reorder the indexes so that the first one is the identity index. int swap_idx = -1; - for (int k = 0; k < 4; k++) + for (unsigned int k = 0; k < 4; k++) { - uint8_t dttw = dt.texel_weights_4t[k][texel]; - float dttwf = dt.texel_weights_float_4t[k][texel]; + uint8_t dttw = di.texel_weights_4t[k][texel]; + float dttwf = di.texel_weights_float_4t[k][texel]; if (dttw == i && dttwf != 0.0f) { swap_idx = k; } - dt.texel_weights_texel[i][j][k] = dttw; - dt.texel_weights_float_texel[i][j][k] = dttwf; + di.texel_weights_texel[i][j][k] = dttw; + di.texel_weights_float_texel[i][j][k] = dttwf; } if (swap_idx != 0) { - uint8_t vi = dt.texel_weights_texel[i][j][0]; - float vf = dt.texel_weights_float_texel[i][j][0]; - dt.texel_weights_texel[i][j][0] = dt.texel_weights_texel[i][j][swap_idx]; - dt.texel_weights_float_texel[i][j][0] = dt.texel_weights_float_texel[i][j][swap_idx]; - dt.texel_weights_texel[i][j][swap_idx] = vi; - dt.texel_weights_float_texel[i][j][swap_idx] = vf; + uint8_t vi = di.texel_weights_texel[i][j][0]; + float vf = di.texel_weights_float_texel[i][j][0]; + di.texel_weights_texel[i][j][0] = di.texel_weights_texel[i][j][swap_idx]; + di.texel_weights_float_texel[i][j][0] = di.texel_weights_float_texel[i][j][swap_idx]; + di.texel_weights_texel[i][j][swap_idx] = vi; + di.texel_weights_float_texel[i][j][swap_idx] = vf; } } // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails // Match last texel in active lane in SIMD group, for better gathers - uint8_t last_texel = dt.weight_texel[texel_count_wt - 1][i]; - for (int j = texel_count_wt; j < max_texel_count_of_weight; j++) + uint8_t last_texel = di.weight_texel[texel_count_wt - 1][i]; + for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++) { - dt.weight_texel[j][i] = last_texel; - dt.weights_flt[j][i] = 0.0f; + di.weight_texel[j][i] = last_texel; + di.weights_flt[j][i] = 0.0f; } } // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails - int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); - for (int i = texels_per_block; i < texels_per_block_simd; i++) + unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); + for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++) { - dt.texel_weight_count[i] = 0; + di.texel_weight_count[i] = 0; - for (int j = 0; j < 4; j++) + for (unsigned int j = 0; j < 4; j++) { - dt.texel_weights_float_4t[j][i] = 0; - dt.texel_weights_4t[j][i] = 0; - dt.texel_weights_int_4t[j][i] = 0; + di.texel_weights_float_4t[j][i] = 0; + di.texel_weights_4t[j][i] = 0; + di.texel_weights_int_4t[j][i] = 0; } } // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails // Match last texel in active lane in SIMD group, for better gathers - int last_texel_count_wt = texel_count_of_weight[weights_per_block - 1]; - uint8_t last_texel = dt.weight_texel[last_texel_count_wt - 1][weights_per_block - 1]; + unsigned int last_texel_count_wt = texel_count_of_weight[weights_per_block - 1]; + uint8_t last_texel = di.weight_texel[last_texel_count_wt - 1][weights_per_block - 1]; - int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); - for (int i = weights_per_block; i < weights_per_block_simd; i++) + unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); + for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++) { - dt.weight_texel_count[i] = 0; + di.weight_texel_count[i] = 0; - for (int j = 0; j < max_texel_count_of_weight; j++) + for (unsigned int j = 0; j < max_texel_count_of_weight; j++) { - dt.weight_texel[j][i] = last_texel; - dt.weights_flt[j][i] = 0.0f; + di.weight_texel[j][i] = last_texel; + di.weights_flt[j][i] = 0.0f; } } - dt.texel_count = texels_per_block; - dt.weight_count = weights_per_block; - dt.weight_x = x_weights; - dt.weight_y = y_weights; - dt.weight_z = 1; + di.texel_count = texels_per_block; + di.weight_count = weights_per_block; + di.weight_x = x_weights; + di.weight_y = y_weights; + di.weight_z = 1; } /** - * @brief Create a 2d decimation table for a block-size and weight-decimation pair. + * @brief Create a 3D decimation entry for a block-size and weight-decimation pair. * * @param x_texels The number of texels in the X dimension. * @param y_texels The number of texels in the Y dimension. @@ -434,44 +440,47 @@ static void initialize_decimation_table_2d( * @param x_weights The number of weights in the X dimension. * @param y_weights The number of weights in the Y dimension. * @param z_weights The number of weights in the Z dimension. - * @param[out] dt The decimation table to populate. + * @param[out] di The decimation info structure to populate. */ -static void initialize_decimation_table_3d( - int x_texels, - int y_texels, - int z_texels, - int x_weights, - int y_weights, - int z_weights, - decimation_table& dt +static void init_decimation_info_3d( + unsigned int x_texels, + unsigned int y_texels, + unsigned int z_texels, + unsigned int x_weights, + unsigned int y_weights, + unsigned int z_weights, + decimation_info& di ) { - int texels_per_block = x_texels * y_texels * z_texels; - int weights_per_block = x_weights * y_weights * z_weights; + unsigned int texels_per_block = x_texels * y_texels * z_texels; + unsigned int weights_per_block = x_weights * y_weights * z_weights; - uint8_t weight_count_of_texel[MAX_TEXELS_PER_BLOCK]; - uint8_t grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; - uint8_t weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; + uint8_t weight_count_of_texel[BLOCK_MAX_TEXELS]; + uint8_t grid_weights_of_texel[BLOCK_MAX_TEXELS][4]; + uint8_t weights_of_texel[BLOCK_MAX_TEXELS][4]; - uint8_t texel_count_of_weight[MAX_WEIGHTS_PER_BLOCK]; + uint8_t texel_count_of_weight[BLOCK_MAX_WEIGHTS]; uint8_t max_texel_count_of_weight = 0; - uint8_t texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; - int texel_weights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; + uint8_t texels_of_weight[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS]; + uint8_t texel_weights_of_weight[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS]; - for (int i = 0; i < weights_per_block; i++) + promise(weights_per_block > 0); + promise(texels_per_block > 0); + + for (unsigned int i = 0; i < weights_per_block; i++) { texel_count_of_weight[i] = 0; } - for (int i = 0; i < texels_per_block; i++) + for (unsigned int i = 0; i < texels_per_block; i++) { weight_count_of_texel[i] = 0; } - for (int z = 0; z < z_texels; z++) + for (unsigned int z = 0; z < z_texels; z++) { - for (int y = 0; y < y_texels; y++) + for (unsigned int y = 0; y < y_texels; y++) { - for (int x = 0; x < x_texels; x++) + for (unsigned int x = 0; x < x_texels; x++) { int texel = (z * y_texels + y) * x_texels + x; @@ -567,7 +576,7 @@ static void initialize_decimation_table_3d( weight[2] = w2; weight[3] = w3; - for (int i = 0; i < 4; i++) + for (unsigned int i = 0; i < 4; i++) { if (weight[i] != 0) { @@ -584,118 +593,118 @@ static void initialize_decimation_table_3d( } } - for (int i = 0; i < texels_per_block; i++) + for (unsigned int i = 0; i < texels_per_block; i++) { - dt.texel_weight_count[i] = weight_count_of_texel[i]; + di.texel_weight_count[i] = weight_count_of_texel[i]; // Init all 4 entries so we can rely on zeros for vectorization - for (int j = 0; j < 4; j++) + for (unsigned int j = 0; j < 4; j++) { - dt.texel_weights_int_4t[j][i] = 0; - dt.texel_weights_float_4t[j][i] = 0.0f; - dt.texel_weights_4t[j][i] = 0; + di.texel_weights_int_4t[j][i] = 0; + di.texel_weights_float_4t[j][i] = 0.0f; + di.texel_weights_4t[j][i] = 0; } - for (int j = 0; j < weight_count_of_texel[i]; j++) + for (unsigned int j = 0; j < weight_count_of_texel[i]; j++) { - dt.texel_weights_int_4t[j][i] = weights_of_texel[i][j]; - dt.texel_weights_float_4t[j][i] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM); - dt.texel_weights_4t[j][i] = grid_weights_of_texel[i][j]; + di.texel_weights_int_4t[j][i] = weights_of_texel[i][j]; + di.texel_weights_float_4t[j][i] = ((float)weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM); + di.texel_weights_4t[j][i] = grid_weights_of_texel[i][j]; } } - for (int i = 0; i < weights_per_block; i++) + for (unsigned int i = 0; i < weights_per_block; i++) { - int texel_count_wt = texel_count_of_weight[i]; - dt.weight_texel_count[i] = (uint8_t)texel_count_wt; + unsigned int texel_count_wt = texel_count_of_weight[i]; + di.weight_texel_count[i] = (uint8_t)texel_count_wt; - for (int j = 0; j < texel_count_wt; j++) + for (unsigned int j = 0; j < texel_count_wt; j++) { - int texel = texels_of_weight[i][j]; + unsigned int texel = texels_of_weight[i][j]; // Create transposed versions of these for better vectorization - dt.weight_texel[j][i] = texel; - dt.weights_flt[j][i] = (float)texel_weights_of_weight[i][j]; + di.weight_texel[j][i] = texel; + di.weights_flt[j][i] = (float)texel_weights_of_weight[i][j]; // perform a layer of array unrolling. An aspect of this unrolling is that // one of the texel-weight indexes is an identity-mapped index; we will use this // fact to reorder the indexes so that the first one is the identity index. int swap_idx = -1; - for (int k = 0; k < 4; k++) + for (unsigned int k = 0; k < 4; k++) { - uint8_t dttw = dt.texel_weights_4t[k][texel]; - float dttwf = dt.texel_weights_float_4t[k][texel]; + uint8_t dttw = di.texel_weights_4t[k][texel]; + float dttwf = di.texel_weights_float_4t[k][texel]; if (dttw == i && dttwf != 0.0f) { swap_idx = k; } - dt.texel_weights_texel[i][j][k] = dttw; - dt.texel_weights_float_texel[i][j][k] = dttwf; + di.texel_weights_texel[i][j][k] = dttw; + di.texel_weights_float_texel[i][j][k] = dttwf; } if (swap_idx != 0) { - uint8_t vi = dt.texel_weights_texel[i][j][0]; - float vf = dt.texel_weights_float_texel[i][j][0]; - dt.texel_weights_texel[i][j][0] = dt.texel_weights_texel[i][j][swap_idx]; - dt.texel_weights_float_texel[i][j][0] = dt.texel_weights_float_texel[i][j][swap_idx]; - dt.texel_weights_texel[i][j][swap_idx] = vi; - dt.texel_weights_float_texel[i][j][swap_idx] = vf; + uint8_t vi = di.texel_weights_texel[i][j][0]; + float vf = di.texel_weights_float_texel[i][j][0]; + di.texel_weights_texel[i][j][0] = di.texel_weights_texel[i][j][swap_idx]; + di.texel_weights_float_texel[i][j][0] = di.texel_weights_float_texel[i][j][swap_idx]; + di.texel_weights_texel[i][j][swap_idx] = vi; + di.texel_weights_float_texel[i][j][swap_idx] = vf; } } // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails // Match last texel in active lane in SIMD group, for better gathers - uint8_t last_texel = dt.weight_texel[texel_count_wt - 1][i]; - for (int j = texel_count_wt; j < max_texel_count_of_weight; j++) + uint8_t last_texel = di.weight_texel[texel_count_wt - 1][i]; + for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++) { - dt.weight_texel[j][i] = last_texel; - dt.weights_flt[j][i] = 0.0f; + di.weight_texel[j][i] = last_texel; + di.weights_flt[j][i] = 0.0f; } } // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails - int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); - for (int i = texels_per_block; i < texels_per_block_simd; i++) + unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); + for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++) { - dt.texel_weight_count[i] = 0; + di.texel_weight_count[i] = 0; - for (int j = 0; j < 4; j++) + for (unsigned int j = 0; j < 4; j++) { - dt.texel_weights_float_4t[j][i] = 0; - dt.texel_weights_4t[j][i] = 0; - dt.texel_weights_int_4t[j][i] = 0; + di.texel_weights_float_4t[j][i] = 0; + di.texel_weights_4t[j][i] = 0; + di.texel_weights_int_4t[j][i] = 0; } } // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails // Match last texel in active lane in SIMD group, for better gathers int last_texel_count_wt = texel_count_of_weight[weights_per_block - 1]; - uint8_t last_texel = dt.weight_texel[last_texel_count_wt - 1][weights_per_block - 1]; + uint8_t last_texel = di.weight_texel[last_texel_count_wt - 1][weights_per_block - 1]; - int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); - for (int i = weights_per_block; i < weights_per_block_simd; i++) + unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); + for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++) { - dt.weight_texel_count[i] = 0; + di.weight_texel_count[i] = 0; for (int j = 0; j < max_texel_count_of_weight; j++) { - dt.weight_texel[j][i] = last_texel; - dt.weights_flt[j][i] = 0.0f; + di.weight_texel[j][i] = last_texel; + di.weights_flt[j][i] = 0.0f; } } - dt.texel_count = texels_per_block; - dt.weight_count = weights_per_block; - dt.weight_x = x_weights; - dt.weight_y = y_weights; - dt.weight_z = z_weights; + di.texel_count = texels_per_block; + di.weight_count = weights_per_block; + di.weight_x = x_weights; + di.weight_y = y_weights; + di.weight_z = z_weights; } /** * @brief Assign the texels to use for kmeans clustering. * - * The max limit is @c MAX_KMEANS_TEXELS; above this a random selection is used. + * The max limit is @c BLOCK_MAX_KMEANS_TEXELS; above this a random selection is used. * The @c bsd.texel_count is an input and must be populated beforehand. * * @param[in,out] bsd The block size descriptor to populate. @@ -704,31 +713,30 @@ static void assign_kmeans_texels( block_size_descriptor& bsd ) { // Use all texels for kmeans on a small block - if (bsd.texel_count <= MAX_KMEANS_TEXELS) + if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS) { - for (int i = 0; i < bsd.texel_count; i++) + for (unsigned int i = 0; i < bsd.texel_count; i++) { bsd.kmeans_texels[i] = i; } - bsd.kmeans_texel_count = bsd.texel_count; return; } - // Select a random subset of MAX_KMEANS_TEXELS for kmeans on a large block + // Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block uint64_t rng_state[2]; astc::rand_init(rng_state); // Initialize array used for tracking used indices - bool seen[MAX_TEXELS_PER_BLOCK]; - for (int i = 0; i < bsd.texel_count; i++) + bool seen[BLOCK_MAX_TEXELS]; + for (unsigned int i = 0; i < bsd.texel_count; i++) { seen[i] = false; } // Assign 64 random indices, retrying if we see repeats - int arr_elements_set = 0; - while (arr_elements_set < MAX_KMEANS_TEXELS) + unsigned int arr_elements_set = 0; + while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS) { unsigned int texel = (unsigned int)astc::rand(rng_state); texel = texel % bsd.texel_count; @@ -738,8 +746,6 @@ static void assign_kmeans_texels( seen[texel] = true; } } - - bsd.kmeans_texel_count = MAX_KMEANS_TEXELS; } /** @@ -753,35 +759,35 @@ static void assign_kmeans_texels( * @return The new entry's index in the compacted decimation table array. */ static int construct_dt_entry_2d( - int x_texels, - int y_texels, - int x_weights, - int y_weights, + unsigned int x_texels, + unsigned int y_texels, + unsigned int x_weights, + unsigned int y_weights, block_size_descriptor& bsd ) { - int dm_index = bsd.decimation_mode_count; - int weight_count = x_weights * y_weights; - assert(weight_count <= MAX_WEIGHTS_PER_BLOCK); + unsigned int dm_index = bsd.decimation_mode_count; + unsigned int weight_count = x_weights * y_weights; + assert(weight_count <= BLOCK_MAX_WEIGHTS); - bool try_2planes = (2 * weight_count) <= MAX_WEIGHTS_PER_BLOCK; + bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS; - decimation_table *dt = aligned_malloc(sizeof(decimation_table), ASTCENC_VECALIGN); - initialize_decimation_table_2d(x_texels, y_texels, x_weights, y_weights, *dt); + decimation_info *di = aligned_malloc(sizeof(decimation_info), ASTCENC_VECALIGN); + init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, *di); int maxprec_1plane = -1; int maxprec_2planes = -1; for (int i = 0; i < 12; i++) { - int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i); - if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK) + unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i); + if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS) { maxprec_1plane = i; } if (try_2planes) { - int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i); - if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK) + unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i); + if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS) { maxprec_2planes = i; } @@ -797,7 +803,7 @@ static int construct_dt_entry_2d( bsd.decimation_modes[dm_index].percentile_hit = false; bsd.decimation_modes[dm_index].percentile_always = false; - bsd.decimation_tables[dm_index] = dt; + bsd.decimation_tables[dm_index] = di; bsd.decimation_mode_count++; return dm_index; @@ -813,15 +819,15 @@ static int construct_dt_entry_2d( * @param[out] bsd The block size descriptor to populate. */ static void construct_block_size_descriptor_2d( - int x_texels, - int y_texels, + unsigned int x_texels, + unsigned int y_texels, bool can_omit_modes, float mode_cutoff, block_size_descriptor& bsd ) { // Store a remap table for storing packed decimation modes. // Indexing uses [Y * 16 + X] and max size for each axis is 12. - static const int MAX_DMI = 12 * 16 + 12; + static const unsigned int MAX_DMI = 12 * 16 + 12; int decimation_mode_index[MAX_DMI]; bsd.xdim = x_texels; @@ -830,7 +836,7 @@ static void construct_block_size_descriptor_2d( bsd.texel_count = x_texels * y_texels; bsd.decimation_mode_count = 0; - for (int i = 0; i < MAX_DMI; i++) + for (unsigned int i = 0; i < MAX_DMI; i++) { decimation_mode_index[i] = -1; } @@ -845,13 +851,14 @@ static void construct_block_size_descriptor_2d( #endif // Construct the list of block formats referencing the decimation tables - int packed_idx = 0; - for (int i = 0; i < MAX_WEIGHT_MODES; i++) + unsigned int packed_idx = 0; + for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) { - int x_weights, y_weights; + unsigned int x_weights, y_weights; bool is_dual_plane; + // TODO: Make this an enum? It's been validated. - int quant_mode; + unsigned int quant_mode; bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode); @@ -872,11 +879,11 @@ static void construct_block_size_descriptor_2d( // Skip modes that are invalid, too large, or not selected by heuristic if (!valid || !selected || (x_weights > x_texels) || (y_weights > y_texels)) { - bsd.block_mode_packed_index[i] = -1; + bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE; continue; } - // Allocate and initialize the DT entry if we've not used it yet. + // Allocate and initialize the decimation table entry if we've not used it yet int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights]; if (decimation_mode == -1) { @@ -923,7 +930,7 @@ static void construct_block_size_descriptor_2d( #endif // Ensure the end of the array contains valid data (should never get read) - for (int i = bsd.decimation_mode_count; i < MAX_DECIMATION_MODES; i++) + for (unsigned int i = bsd.decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++) { bsd.decimation_modes[i].maxprec_1plane = -1; bsd.decimation_modes[i].maxprec_2planes = -1; @@ -948,63 +955,62 @@ static void construct_block_size_descriptor_2d( * @param[out] bsd The block size descriptor to populate. */ static void construct_block_size_descriptor_3d( - int x_texels, - int y_texels, - int z_texels, + unsigned int x_texels, + unsigned int y_texels, + unsigned int z_texels, block_size_descriptor& bsd ) { // Store a remap table for storing packed decimation modes. // Indexing uses [Z * 64 + Y * 8 + X] and max size for each axis is 6. - static const int MAX_DMI = 6 * 64 + 6 * 8 + 6; + static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6; int decimation_mode_index[MAX_DMI]; - int decimation_mode_count = 0; + unsigned int decimation_mode_count = 0; bsd.xdim = x_texels; bsd.ydim = y_texels; bsd.zdim = z_texels; bsd.texel_count = x_texels * y_texels * z_texels; - for (int i = 0; i < MAX_DMI; i++) + for (unsigned int i = 0; i < MAX_DMI; i++) { decimation_mode_index[i] = -1; } // gather all the infill-modes that can be used with the current block size - for (int x_weights = 2; x_weights <= x_texels; x_weights++) + for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++) { - for (int y_weights = 2; y_weights <= y_texels; y_weights++) + for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++) { - for (int z_weights = 2; z_weights <= z_texels; z_weights++) + for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++) { - int weight_count = x_weights * y_weights * z_weights; - if (weight_count > MAX_WEIGHTS_PER_BLOCK) + unsigned int weight_count = x_weights * y_weights * z_weights; + if (weight_count > BLOCK_MAX_WEIGHTS) { continue; } - decimation_table *dt = aligned_malloc(sizeof(decimation_table), ASTCENC_VECALIGN); + decimation_info *di = aligned_malloc(sizeof(decimation_info), ASTCENC_VECALIGN); decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count; - initialize_decimation_table_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, *dt); + init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, *di); int maxprec_1plane = -1; int maxprec_2planes = -1; - for (int i = 0; i < 12; i++) + for (unsigned int i = 0; i < 12; i++) { - int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i); - int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i); - - if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK) + unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i); + if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS) { maxprec_1plane = i; } - if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK) + unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i); + if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS) { maxprec_2planes = i; } } - if ((2 * weight_count) > MAX_WEIGHTS_PER_BLOCK) + if ((2 * weight_count) > BLOCK_MAX_WEIGHTS) { maxprec_2planes = -1; } @@ -1013,14 +1019,14 @@ static void construct_block_size_descriptor_3d( bsd.decimation_modes[decimation_mode_count].maxprec_2planes = maxprec_2planes; bsd.decimation_modes[decimation_mode_count].percentile_hit = false; bsd.decimation_modes[decimation_mode_count].percentile_always = false; - bsd.decimation_tables[decimation_mode_count] = dt; + bsd.decimation_tables[decimation_mode_count] = di; decimation_mode_count++; } } } // Ensure the end of the array contains valid data (should never get read) - for (int i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++) + for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++) { bsd.decimation_modes[i].maxprec_1plane = -1; bsd.decimation_modes[i].maxprec_2planes = -1; @@ -1032,29 +1038,29 @@ static void construct_block_size_descriptor_3d( bsd.decimation_mode_count = decimation_mode_count; // Construct the list of block formats - int packed_idx = 0; - for (int i = 0; i < MAX_WEIGHT_MODES; i++) + unsigned int packed_idx = 0; + for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) { - int x_weights, y_weights, z_weights; + unsigned int x_weights, y_weights, z_weights; bool is_dual_plane; - int quant_mode; - int permit_encode = 1; + unsigned int quant_mode; + bool permit_encode = true; if (decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode)) { if (x_weights > x_texels || y_weights > y_texels || z_weights > z_texels) { - permit_encode = 0; + permit_encode = false; } } else { - permit_encode = 0; + permit_encode = false; } - bsd.block_mode_packed_index[i] = -1; if (!permit_encode) { + bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE; continue; } @@ -1082,9 +1088,9 @@ static void construct_block_size_descriptor_3d( /* See header for documentation. */ void init_block_size_descriptor( - int x_texels, - int y_texels, - int z_texels, + unsigned int x_texels, + unsigned int y_texels, + unsigned int z_texels, bool can_omit_modes, float mode_cutoff, block_size_descriptor& bsd @@ -1105,8 +1111,8 @@ void init_block_size_descriptor( void term_block_size_descriptor( block_size_descriptor& bsd ) { - for (int i = 0; i < bsd.decimation_mode_count; i++) + for (unsigned int i = 0; i < bsd.decimation_mode_count; i++) { - aligned_free(bsd.decimation_tables[i]); + aligned_free(bsd.decimation_tables[i]); } } diff --git a/lib/astc-encoder/Source/astcenc_color_quantize.cpp b/lib/astc-encoder/Source/astcenc_color_quantize.cpp index dc26e1d732..52634f5bf7 100644 --- a/lib/astc-encoder/Source/astcenc_color_quantize.cpp +++ b/lib/astc-encoder/Source/astcenc_color_quantize.cpp @@ -19,6 +19,20 @@ /** * @brief Functions for color quantization. + * + * The design of the color quantization functionality requires the caller to use higher level error + * analysis to determine the base encoding that should be used. This earlier analysis will select + * the basic type of the endpoint that should be used: + * + * * Mode: LDR or HDR + * * Quantization level + * * Channel count: L, LA, RGB, or RGBA + * * Endpoint 2 type: Direct color endcode, or scaled from endpoint 1. + * + * However, this leaves a number of decisions about exactly how to pack the endpoints open. In + * particular we need to determine if blue contraction can be used, or/and if delta encoding can be + * used. If they can be applied these will allow us to maintain higher precision in the endpoints + * without needing additional storage. */ #include @@ -36,7 +50,7 @@ * @return The encoded quantized value. These are not necessarily in the order; the compressor * scrambles the values slightly to make hardware implementation easier. */ -static inline int cqt_lookup( +static inline int quant_color( quant_method quant_level, int value ) { @@ -59,7 +73,7 @@ static inline int cqt_lookup( static void quantize_rgb( vfloat4 color0, vfloat4 color1, - int output[6], + uint8_t output[6], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -79,12 +93,12 @@ static void quantize_rgb( int iters = 0; do { - ri0 = cqt_lookup(quant_level, astc::flt2int_rd(r0 + rgb0_addon)); - gi0 = cqt_lookup(quant_level, astc::flt2int_rd(g0 + rgb0_addon)); - bi0 = cqt_lookup(quant_level, astc::flt2int_rd(b0 + rgb0_addon)); - ri1 = cqt_lookup(quant_level, astc::flt2int_rd(r1 + rgb1_addon)); - gi1 = cqt_lookup(quant_level, astc::flt2int_rd(g1 + rgb1_addon)); - bi1 = cqt_lookup(quant_level, astc::flt2int_rd(b1 + rgb1_addon)); + ri0 = quant_color(quant_level, astc::flt2int_rd(r0 + rgb0_addon)); + gi0 = quant_color(quant_level, astc::flt2int_rd(g0 + rgb0_addon)); + bi0 = quant_color(quant_level, astc::flt2int_rd(b0 + rgb0_addon)); + ri1 = quant_color(quant_level, astc::flt2int_rd(r1 + rgb1_addon)); + gi1 = quant_color(quant_level, astc::flt2int_rd(g1 + rgb1_addon)); + bi1 = quant_color(quant_level, astc::flt2int_rd(b1 + rgb1_addon)); ri0b = color_unquant_tables[quant_level][ri0]; gi0b = color_unquant_tables[quant_level][gi0]; @@ -122,7 +136,7 @@ static void quantize_rgb( static void quantize_rgba( vfloat4 color0, vfloat4 color1, - int output[8], + uint8_t output[8], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -154,7 +168,7 @@ static void quantize_rgba( static bool try_quantize_rgb_blue_contract( vfloat4 color0, vfloat4 color1, - int output[6], + uint8_t output[6], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -229,7 +243,7 @@ static bool try_quantize_rgb_blue_contract( static int try_quantize_rgba_blue_contract( vfloat4 color0, vfloat4 color1, - int output[8], + uint8_t output[8], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -260,7 +274,7 @@ static int try_quantize_rgba_blue_contract( static bool try_quantize_rgb_delta( vfloat4 color0, vfloat4 color1, - int output[6], + uint8_t output[6], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -390,7 +404,7 @@ static bool try_quantize_rgb_delta( static bool try_quantize_rgb_delta_blue_contract( vfloat4 color0, vfloat4 color1, - int output[6], + uint8_t output[6], quant_method quant_level ) { // Note: Switch around endpoint colors already at start @@ -551,7 +565,7 @@ static bool try_quantize_rgb_delta_blue_contract( static bool try_quantize_alpha_delta( vfloat4 color0, vfloat4 color1, - int output[8], + uint8_t output[8], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -614,7 +628,7 @@ static bool try_quantize_alpha_delta( static bool try_quantize_luminance_alpha_delta( vfloat4 color0, vfloat4 color1, - int output[4], + uint8_t output[4], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -715,7 +729,7 @@ static bool try_quantize_luminance_alpha_delta( static bool try_quantize_rgba_delta( vfloat4 color0, vfloat4 color1, - int output[8], + uint8_t output[8], quant_method quant_level ) { return try_quantize_rgb_delta(color0, color1, output, quant_level) && @@ -742,7 +756,7 @@ static bool try_quantize_rgba_delta( static bool try_quantize_rgba_delta_blue_contract( vfloat4 color0, vfloat4 color1, - int output[8], + uint8_t output[8], quant_method quant_level ) { // Note that we swap the color0 and color1 ordering for alpha to match RGB blue-contract @@ -759,7 +773,7 @@ static bool try_quantize_rgba_delta_blue_contract( */ static void quantize_rgbs( vfloat4 color, - int output[4], + uint8_t output[4], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -800,7 +814,7 @@ static void quantize_rgbs_alpha( vfloat4 color0, vfloat4 color1, vfloat4 color, - int output[6], + uint8_t output[6], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -828,7 +842,7 @@ static void quantize_rgbs_alpha( static void quantize_luminance( vfloat4 color0, vfloat4 color1, - int output[2], + uint8_t output[2], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -861,7 +875,7 @@ static void quantize_luminance( static void quantize_luminance_alpha( vfloat4 color0, vfloat4 color1, - int output[4], + uint8_t output[4], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -927,13 +941,13 @@ static void quantize_luminance_alpha( */ static inline void quantize_and_unquantize_retain_top_two_bits( quant_method quant_level, - int value, - int& quant_value, - int& unquant_value + uint8_t value, + uint8_t& quant_value, + uint8_t& unquant_value ) { int perform_loop; - int quantval; - int uquantval; + uint8_t quantval; + uint8_t uquantval; do { @@ -971,13 +985,13 @@ static inline void quantize_and_unquantize_retain_top_two_bits( */ static inline void quantize_and_unquantize_retain_top_four_bits( quant_method quant_level, - int value, - int& quant_value, - int& unquant_value + uint8_t value, + uint8_t& quant_value, + uint8_t& unquant_value ) { - int perform_loop; - int quantval; - int uquantval; + uint8_t perform_loop; + uint8_t quantval; + uint8_t uquantval; do { @@ -1014,7 +1028,7 @@ static inline void quantize_and_unquantize_retain_top_four_bits( */ static void quantize_hdr_rgbo( vfloat4 color, - int output[4], + uint8_t output[4], quant_method quant_level ) { color.set_lane<0>(color.lane<0>() + color.lane<3>()); @@ -1105,8 +1119,8 @@ static void quantize_hdr_rgbo( r_lowbits |= (mode_enc & 3) << 6; - int r_quantval; - int r_uquantval; + uint8_t r_quantval; + uint8_t r_uquantval; quantize_and_unquantize_retain_top_two_bits(quant_level, r_lowbits, r_quantval, r_uquantval); r_intval = (r_intval & ~0x3f) | (r_uquantval & 0x3f); @@ -1203,10 +1217,10 @@ static void quantize_hdr_rgbo( b_lowbits |= bit2 << 6; b_lowbits |= bit3 << 5; - int g_quantval; - int b_quantval; - int g_uquantval; - int b_uquantval; + uint8_t g_quantval; + uint8_t b_quantval; + uint8_t g_uquantval; + uint8_t b_uquantval; quantize_and_unquantize_retain_top_four_bits(quant_level, g_lowbits, g_quantval, g_uquantval); quantize_and_unquantize_retain_top_four_bits(quant_level, b_lowbits, b_quantval, b_uquantval); @@ -1275,8 +1289,8 @@ static void quantize_hdr_rgbo( s_lowbits |= bit5 << 6; s_lowbits |= bit4 << 7; - int s_quantval; - int s_uquantval; + uint8_t s_quantval; + uint8_t s_uquantval; quantize_and_unquantize_retain_top_four_bits(quant_level, s_lowbits, s_quantval, s_uquantval); output[0] = r_quantval; @@ -1315,9 +1329,9 @@ static void quantize_hdr_rgbo( encvals[2] = (ivals[2] & 0x7f) | 0x80; encvals[3] = (ivals[3] & 0x7f) | ((ivals[0] & 0x40) << 1); - for (int i = 0; i < 4; i++) + for (uint8_t i = 0; i < 4; i++) { - int dummy; + uint8_t dummy; quantize_and_unquantize_retain_top_four_bits(quant_level, encvals[i], output[i], dummy); } @@ -1335,7 +1349,7 @@ static void quantize_hdr_rgbo( static void quantize_hdr_rgb( vfloat4 color0, vfloat4 color1, - int output[6], + uint8_t output[6], quant_method quant_level ) { // Note: color*.lane<3> is not used so we can ignore it @@ -1478,8 +1492,8 @@ static void quantize_hdr_rgb( c_lowbits |= (mode & 1) << 7; c_lowbits |= (a_intval & 0x100) >> 2; - int c_quantval; - int c_uquantval; + uint8_t c_quantval; + uint8_t c_uquantval; quantize_and_unquantize_retain_top_two_bits(quant_level, c_lowbits, c_quantval, c_uquantval); c_intval = (c_intval & ~0x3F) | (c_uquantval & 0x3F); c_fval = static_cast(c_intval) * mode_rscale; @@ -1543,10 +1557,10 @@ static void quantize_hdr_rgb( b0_lowbits |= ((mode >> 1) & 1) << 7; b1_lowbits |= ((mode >> 2) & 1) << 7; - int b0_quantval; - int b1_quantval; - int b0_uquantval; - int b1_uquantval; + uint8_t b0_quantval; + uint8_t b1_quantval; + uint8_t b0_uquantval; + uint8_t b1_uquantval; quantize_and_unquantize_retain_top_two_bits(quant_level, b0_lowbits, b0_quantval, b0_uquantval); quantize_and_unquantize_retain_top_two_bits(quant_level, b1_lowbits, b1_quantval, b1_uquantval); @@ -1638,10 +1652,10 @@ static void quantize_hdr_rgb( d0_lowbits |= (majcomp & 1) << 7; d1_lowbits |= ((majcomp >> 1) & 1) << 7; - int d0_quantval; - int d1_quantval; - int d0_uquantval; - int d1_uquantval; + uint8_t d0_quantval; + uint8_t d1_quantval; + uint8_t d0_uquantval; + uint8_t d1_uquantval; quantize_and_unquantize_retain_top_four_bits(quant_level, d0_lowbits, d0_quantval, d0_uquantval); quantize_and_unquantize_retain_top_four_bits(quant_level, d1_lowbits, d1_quantval, d1_uquantval); @@ -1680,9 +1694,9 @@ static void quantize_hdr_rgb( for (int i = 4; i < 6; i++) { - int dummy; + uint8_t dummy; int idx = astc::flt2int_rtn(vals[i] * 1.0f / 512.0f) + 128; - quantize_and_unquantize_retain_top_two_bits(quant_level, idx, (output[i]), dummy); + quantize_and_unquantize_retain_top_two_bits(quant_level, idx, output[i], dummy); } return; @@ -1699,7 +1713,7 @@ static void quantize_hdr_rgb( static void quantize_hdr_rgb_ldr_alpha( vfloat4 color0, vfloat4 color1, - int output[8], + uint8_t output[8], quant_method quant_level ) { float scale = 1.0f / 257.0f; @@ -1727,7 +1741,7 @@ static void quantize_hdr_rgb_ldr_alpha( static void quantize_hdr_luminance_large_range( vfloat4 color0, vfloat4 color1, - int output[2], + uint8_t output[2], quant_method quant_level ) { float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f); @@ -1801,7 +1815,7 @@ static void quantize_hdr_luminance_large_range( static bool try_quantize_hdr_luminance_small_range( vfloat4 color0, vfloat4 color1, - int output[2], + uint8_t output[2], quant_method quant_level ) { float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f); @@ -1903,7 +1917,7 @@ static bool try_quantize_hdr_luminance_small_range( static void quantize_hdr_alpha( float alpha0, float alpha1, - int output[2], + uint8_t output[2], quant_method quant_level ) { alpha0 = astc::clamp(alpha0, 0.0f, 65280.0f); @@ -1983,7 +1997,7 @@ static void quantize_hdr_alpha( static void quantize_hdr_rgb_alpha( vfloat4 color0, vfloat4 color1, - int output[8], + uint8_t output[8], quant_method quant_level ) { quantize_hdr_rgb(color0, color1, output, quant_level); @@ -1997,7 +2011,7 @@ int pack_color_endpoints( vfloat4 rgbs_color, vfloat4 rgbo_color, int format, - int* output, + uint8_t* output, quant_method quant_level ) { assert(quant_level < 21); diff --git a/lib/astc-encoder/Source/astcenc_color_unquantize.cpp b/lib/astc-encoder/Source/astcenc_color_unquantize.cpp index 8fa345ca70..a1c2eeb28b 100644 --- a/lib/astc-encoder/Source/astcenc_color_unquantize.cpp +++ b/lib/astc-encoder/Source/astcenc_color_unquantize.cpp @@ -35,7 +35,7 @@ * @return The unquantized color. */ static ASTCENC_SIMD_INLINE vint4 unquant_color( - int quant_level, + quant_method quant_level, vint4 inputq ) { const uint8_t* unq = color_unquant_tables[quant_level]; @@ -72,7 +72,7 @@ static ASTCENC_SIMD_INLINE vint4 uncontract_color( static void rgba_delta_unpack( vint4 input0q, vint4 input1q, - int quant_level, + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -118,7 +118,7 @@ static void rgba_delta_unpack( static void rgb_delta_unpack( vint4 input0q, vint4 input1q, - int quant_level, + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -139,7 +139,7 @@ static void rgb_delta_unpack( static void rgba_unpack( vint4 input0q, vint4 input1q, - int quant_level, + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -173,7 +173,7 @@ static void rgba_unpack( static void rgb_unpack( vint4 input0q, vint4 input1q, - int quant_level, + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -196,16 +196,16 @@ static void rgb_unpack( */ static void rgb_scale_alpha_unpack( vint4 input0q, - int alpha1q, - int scaleq, - int quant_level, + uint8_t alpha1q, + uint8_t scaleq, + quant_method quant_level, vint4& output0, vint4& output1 ) { // Unquantize color endpoints vint4 input = unquant_color(quant_level, input0q); - int alpha1 = color_unquant_tables[quant_level][alpha1q]; - int scale = color_unquant_tables[quant_level][scaleq]; + uint8_t alpha1 = color_unquant_tables[quant_level][alpha1q]; + uint8_t scale = color_unquant_tables[quant_level][scaleq]; output1 = input; output1.set_lane<3>(alpha1); @@ -228,7 +228,7 @@ static void rgb_scale_alpha_unpack( static void rgb_scale_unpack( vint4 input0q, int scaleq, - int quant_level, + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -253,8 +253,8 @@ static void rgb_scale_unpack( * @param[out] output1 The unpacked and unquantized endpoint 1 color. */ static void luminance_unpack( - const int input[2], - int quant_level, + const uint8_t input[2], + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -275,8 +275,8 @@ static void luminance_unpack( * @param[out] output1 The unpacked and unquantized endpoint 1 color. */ static void luminance_delta_unpack( - const int input[2], - int quant_level, + const uint8_t input[2], + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -300,8 +300,8 @@ static void luminance_delta_unpack( * @param[out] output1 The unpacked and unquantized endpoint 1 color. */ static void luminance_alpha_unpack( - const int input[4], - int quant_level, + const uint8_t input[4], + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -322,8 +322,8 @@ static void luminance_alpha_unpack( * @param[out] output1 The unpacked and unquantized endpoint 1 color. */ static void luminance_alpha_delta_unpack( - const int input[4], - int quant_level, + const uint8_t input[4], + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -364,8 +364,8 @@ static void luminance_alpha_delta_unpack( * @param[out] output1 The unpacked and unquantized endpoint 1 color. */ static void hdr_rgbo_unpack( - const int input[4], - int quant_level, + const uint8_t input[4], + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -516,8 +516,8 @@ static void hdr_rgbo_unpack( * @param[out] output1 The unpacked and unquantized endpoint 1 color. */ static void hdr_rgb_unpack( - const int input[6], - int quant_level, + const uint8_t input[6], + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -684,8 +684,8 @@ static void hdr_rgb_unpack( * @param[out] output1 The unpacked and unquantized endpoint 1 color. */ static void hdr_rgb_ldr_alpha_unpack( - const int input[8], - int quant_level, + const uint8_t input[8], + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -706,8 +706,8 @@ static void hdr_rgb_ldr_alpha_unpack( * @param[out] output1 The unpacked and unquantized endpoint 1 color. */ static void hdr_luminance_small_range_unpack( - const int input[2], - int quant_level, + const uint8_t input[2], + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -743,8 +743,8 @@ static void hdr_luminance_small_range_unpack( * @param[out] output1 The unpacked and unquantized endpoint 1 color. */ static void hdr_luminance_large_range_unpack( - const int input[2], - int quant_level, + const uint8_t input[2], + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -776,8 +776,8 @@ static void hdr_luminance_large_range_unpack( * @param[out] output1 The unpacked and unquantized endpoint 1 color. */ static void hdr_alpha_unpack( - const int input[2], - int quant_level, + const uint8_t input[2], + quant_method quant_level, int& output0, int& output1 ) { @@ -825,8 +825,8 @@ static void hdr_alpha_unpack( * @param[out] output1 The unpacked and unquantized endpoint 1 color. */ static void hdr_rgb_hdr_alpha_unpack( - const int input[8], - int quant_level, + const uint8_t input[8], + quant_method quant_level, vint4& output0, vint4& output1 ) { @@ -843,8 +843,8 @@ static void hdr_rgb_hdr_alpha_unpack( void unpack_color_endpoints( astcenc_profile decode_mode, int format, - int quant_level, - const int* input, + quant_method quant_level, + const uint8_t* input, bool& rgb_hdr, bool& alpha_hdr, vint4& output0, @@ -889,7 +889,7 @@ void unpack_color_endpoints( case FMT_RGB_SCALE: { vint4 input0q(input[0], input[1], input[2], 0); - int scale = input[3]; + uint8_t scale = input[3]; rgb_scale_unpack(input0q, scale, quant_level, output0, output1); } break; @@ -897,8 +897,8 @@ void unpack_color_endpoints( case FMT_RGB_SCALE_ALPHA: { vint4 input0q(input[0], input[1], input[2], input[4]); - int alpha1q = input[5]; - int scaleq = input[3]; + uint8_t alpha1q = input[5]; + uint8_t scaleq = input[3]; rgb_scale_alpha_unpack(input0q, alpha1q, scaleq, quant_level, output0, output1); } break; diff --git a/lib/astc-encoder/Source/astcenc_compress_symbolic.cpp b/lib/astc-encoder/Source/astcenc_compress_symbolic.cpp index e97a63bdf3..e2506260ee 100644 --- a/lib/astc-encoder/Source/astcenc_compress_symbolic.cpp +++ b/lib/astc-encoder/Source/astcenc_compress_symbolic.cpp @@ -37,15 +37,15 @@ static void merge_endpoints( const endpoints& ep_plane1, const endpoints& ep_plane2, - int component_plane2, + unsigned int component_plane2, endpoints& result ) { - int partition_count = ep_plane1.partition_count; + unsigned int partition_count = ep_plane1.partition_count; vmask4 sep_mask = vint4::lane_id() == vint4(component_plane2); result.partition_count = partition_count; promise(partition_count > 0); - for (int i = 0; i < partition_count; i++) + for (unsigned int i = 0; i < partition_count; i++) { result.endpt0[i] = select(ep_plane1.endpt0[i], ep_plane2.endpt0[i], sep_mask); result.endpt1[i] = select(ep_plane1.endpt1[i], ep_plane2.endpt1[i], sep_mask); @@ -55,9 +55,9 @@ static void merge_endpoints( /** * @brief Attempt to improve weights given a chosen configuration. * - * Given a fixed weight grid decimation and weight value quantization, iterate - * over all weights (per partition and per plane) and attempt to improve image - * quality by moving each weight up by one or down by one quantization step. + * Given a fixed weight grid decimation and weight value quantization, iterate over all weights (per + * partition and per plane) and attempt to improve image quality by moving each weight up by one or + * down by one quantization step. * * @param decode_mode The decode mode (LDR, HDR). * @param bsd The block size information. @@ -77,56 +77,52 @@ static bool realign_weights( uint8_t* weight_set8_plane2 ) { // Get the partition descriptor - int partition_count = scb.partition_count; - const partition_info *pt = get_partition_table(&bsd, partition_count); - pt += scb.partition_index; + unsigned int partition_count = scb.partition_count; + const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); // Get the quantization table - const int packed_index = bsd.block_mode_packed_index[scb.block_mode]; - assert(packed_index >= 0 && packed_index < bsd.block_mode_count); - const block_mode& bm = bsd.block_modes[packed_index]; - int weight_quant_level = bm.quant_mode; + const block_mode& bm = bsd.get_block_mode(scb.block_mode); + unsigned int weight_quant_level = bm.quant_mode; const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_level]); // Get the decimation table - const decimation_table& dt = *(bsd.decimation_tables[bm.decimation_mode]); - int weight_count = dt.weight_count; + const decimation_info& di = *(bsd.decimation_tables[bm.decimation_mode]); + unsigned int weight_count = di.weight_count; - int max_plane = bm.is_dual_plane; + unsigned int max_plane = bm.is_dual_plane; int plane2_component = bm.is_dual_plane ? scb.plane2_component : -1; vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component); // Decode the color endpoints bool rgb_hdr; bool alpha_hdr; - vint4 endpnt0[4]; - vint4 endpnt1[4]; - vfloat4 endpnt0f[4]; - vfloat4 offset[4]; + vint4 endpnt0[BLOCK_MAX_PARTITIONS]; + vint4 endpnt1[BLOCK_MAX_PARTITIONS]; + vfloat4 endpnt0f[BLOCK_MAX_PARTITIONS]; + vfloat4 offset[BLOCK_MAX_PARTITIONS]; promise(partition_count > 0); promise(weight_count > 0); - promise(max_plane >= 0); - for (int pa_idx = 0; pa_idx < partition_count; pa_idx++) + for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++) { unpack_color_endpoints(decode_mode, scb.color_formats[pa_idx], - scb.color_quant_level, + scb.get_color_quant_mode(), scb.color_values[pa_idx], rgb_hdr, alpha_hdr, endpnt0[pa_idx], endpnt1[pa_idx]); } - uint8_t uq_pl_weights[MAX_WEIGHTS_PER_BLOCK]; + uint8_t uq_pl_weights[BLOCK_MAX_WEIGHTS]; uint8_t* weight_set8 = weight_set8_plane1; bool adjustments = false; // For each plane and partition ... - for (int pl_idx = 0; pl_idx <= max_plane; pl_idx++) + for (unsigned int pl_idx = 0; pl_idx <= max_plane; pl_idx++) { - for (int pa_idx = 0; pa_idx < partition_count; pa_idx++) + for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++) { // Compute the endpoint delta for all components in current plane vint4 epd = endpnt1[pa_idx] - endpnt0[pa_idx]; @@ -137,19 +133,19 @@ static bool realign_weights( } // Create an unquantized weight grid for this decimation level - for (int we_idx = 0; we_idx < weight_count; we_idx++) + for (unsigned int we_idx = 0; we_idx < weight_count; we_idx++) { uq_pl_weights[we_idx] = qat->unquantized_value[weight_set8[we_idx]]; } // For each weight compute previous, current, and next errors - for (int we_idx = 0; we_idx < weight_count; we_idx++) + for (unsigned int we_idx = 0; we_idx < weight_count; we_idx++) { - int uqw = uq_pl_weights[we_idx]; + unsigned int uqw = uq_pl_weights[we_idx]; uint32_t prev_and_next = qat->prev_next_values[uqw]; - int prev_wt_uq = prev_and_next & 0xFF; - int next_wt_uq = (prev_and_next >> 8) & 0xFF; + unsigned int prev_wt_uq = prev_and_next & 0xFF; + unsigned int next_wt_uq = (prev_and_next >> 8) & 0xFF; int uqw_next_dif = next_wt_uq - uqw; int uqw_prev_dif = prev_wt_uq - uqw; @@ -159,13 +155,13 @@ static bool realign_weights( float down_error = 0.0f; // Interpolate the colors to create the diffs - int texels_to_evaluate = dt.weight_texel_count[we_idx]; + unsigned int texels_to_evaluate = di.weight_texel_count[we_idx]; promise(texels_to_evaluate > 0); - for (int te_idx = 0; te_idx < texels_to_evaluate; te_idx++) + for (unsigned int te_idx = 0; te_idx < texels_to_evaluate; te_idx++) { - int texel = dt.weight_texel[te_idx][we_idx]; - const uint8_t *texel_weights = dt.texel_weights_texel[we_idx][te_idx]; - const float *texel_weights_float = dt.texel_weights_float_texel[we_idx][te_idx]; + unsigned int texel = di.weight_texel[te_idx][we_idx]; + const uint8_t *texel_weights = di.texel_weights_texel[we_idx][te_idx]; + const float *texel_weights_float = di.texel_weights_float_texel[we_idx][te_idx]; float twf0 = texel_weights_float[0]; float weight_base = ((static_cast(uqw) * twf0 @@ -173,7 +169,7 @@ static bool realign_weights( + (static_cast(uq_pl_weights[texel_weights[2]]) * texel_weights_float[2] + static_cast(uq_pl_weights[texel_weights[3]]) * texel_weights_float[3])); - int partition = pt->partition_of_texel[texel]; + unsigned int partition = pi.partition_of_texel[texel]; weight_base = weight_base + 0.5f; float plane_weight = astc::flt_rd(weight_base); @@ -233,15 +229,15 @@ static bool realign_weights( * @param[out] scb The symbolic compressed block output. * @param[out] tmpbuf The quantized weights for plane 1. */ -static float compress_symbolic_block_fixed_partition_1plane( +static float compress_symbolic_block_for_partition_1plane( const astcenc_config& config, const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, bool only_always, float tune_errorval_threshold, - int partition_count, - int partition_index, + unsigned int partition_count, + unsigned int partition_index, symbolic_compressed_block& scb, compress_fixed_partition_buffers& tmpbuf ) { @@ -251,49 +247,45 @@ static float compress_symbolic_block_fixed_partition_1plane( promise(bsd.decimation_mode_count > 0); static const int free_bits_for_partition_count[5] = { - 0, 115 - 4, 111 - 4 - PARTITION_BITS, 108 - 4 - PARTITION_BITS, 105 - 4 - PARTITION_BITS + 0, 115 - 4, 111 - 4 - PARTITION_INDEX_BITS, 108 - 4 - PARTITION_INDEX_BITS, 105 - 4 - PARTITION_INDEX_BITS }; - const partition_info *pt = get_partition_table(&bsd, partition_count); - pt += partition_index; + const auto& pi = bsd.get_partition_info(partition_count, partition_index); - // first, compute ideal weights and endpoint colors, under the assumption that - // there is no quantization or decimation going on. + // Compute ideal weights and endpoint colors, with no quantization or decimation endpoints_and_weights& ei = tmpbuf.ei1; endpoints_and_weights *eix = tmpbuf.eix1; - compute_endpoints_and_ideal_weights_1plane(bsd, blk, ewb, *pt, ei); - - // next, compute ideal weights and endpoint colors for every decimation. - const decimation_table *const *dts = bsd.decimation_tables; + compute_ideal_colors_and_weights_1plane(bsd, blk, ewb, pi, ei); + // Compute ideal weights and endpoint colors for every decimation float *decimated_quantized_weights = tmpbuf.decimated_quantized_weights; float *decimated_weights = tmpbuf.decimated_weights; float *flt_quantized_decimated_quantized_weights = tmpbuf.flt_quantized_decimated_quantized_weights; uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf.u8_quantized_decimated_quantized_weights; - // for each decimation mode, compute an ideal set of weights - // (that is, weights computed with the assumption that they are not quantized) - for (int i = 0; i < bsd.decimation_mode_count; i++) + // For each decimation mode, compute an ideal set of weights with no quantization + for (unsigned int i = 0; i < bsd.decimation_mode_count; i++) { - const decimation_mode& dm = bsd.decimation_modes[i]; + const auto& dm = bsd.get_decimation_mode(i); if (dm.maxprec_1plane < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit) { continue; } - compute_ideal_weights_for_decimation_table( + const auto& di = bsd.get_decimation_info(i); + + compute_ideal_weights_for_decimation( ei, eix[i], - *(dts[i]), - decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK, - decimated_weights + i * MAX_WEIGHTS_PER_BLOCK); + di, + decimated_quantized_weights + i * BLOCK_MAX_WEIGHTS, + decimated_weights + i * BLOCK_MAX_WEIGHTS); } - // compute maximum colors for the endpoints and ideal weights. - // for each endpoint-and-ideal-weight pair, compute the smallest weight value - // that will result in a color value greater than 1. + // Compute maximum colors for the endpoints and ideal weights, then for each endpoint and ideal + // weight pair, compute the smallest weight that will result in a color value greater than 1 vfloat4 min_ep(10.0f); - for (int i = 0; i < partition_count; i++) + for (unsigned int i = 0; i < partition_count; i++) { vfloat4 ep = (vfloat4(1.0f) - ei.ep.endpt0[i]) / (ei.ep.endpt1[i] - ei.ep.endpt0[i]); @@ -303,23 +295,24 @@ static float compress_symbolic_block_fixed_partition_1plane( float min_wt_cutoff = hmin_s(min_ep); - // for each mode, use the angular method to compute a shift. - float weight_low_value[MAX_WEIGHT_MODES]; - float weight_high_value[MAX_WEIGHT_MODES]; + // For each mode, use the angular method to compute a shift + float weight_low_value[WEIGHTS_MAX_BLOCK_MODES]; + float weight_high_value[WEIGHTS_MAX_BLOCK_MODES]; compute_angular_endpoints_1plane( only_always, bsd, decimated_quantized_weights, decimated_weights, weight_low_value, weight_high_value); - // for each mode (which specifies a decimation and a quantization): - // * compute number of bits needed for the quantized weights. - // * generate an optimized set of quantized weights. - // * compute quantization errors for the mode. - int qwt_bitcounts[MAX_WEIGHT_MODES]; - float qwt_errors[MAX_WEIGHT_MODES]; + // For each mode (which specifies a decimation and a quantization): + // * Compute number of bits needed for the quantized weights + // * Generate an optimized set of quantized weights + // * Compute quantization errors for the mode - for (int i = 0; i < bsd.block_mode_count; ++i) + int qwt_bitcounts[WEIGHTS_MAX_BLOCK_MODES]; + float qwt_errors[WEIGHTS_MAX_BLOCK_MODES]; + + for (unsigned int i = 0; i < bsd.block_mode_count; ++i) { const block_mode& bm = bsd.block_modes[i]; if (bm.is_dual_plane || (only_always && !bm.percentile_always) || !bm.percentile_hit) @@ -334,11 +327,13 @@ static float compress_symbolic_block_fixed_partition_1plane( } int decimation_mode = bm.decimation_mode; + const auto& di = bsd.get_decimation_info(decimation_mode); + + // Compute weight bitcount for the mode + unsigned int bits_used_by_weights = get_ise_sequence_bitcount( + di.weight_count, + bm.get_weight_quant_mode()); - // compute weight bitcount for the mode - int bits_used_by_weights = get_ise_sequence_bitcount( - dts[decimation_mode]->weight_count, - (quant_method)bm.quant_mode); int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights; if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96) { @@ -347,85 +342,83 @@ static float compress_symbolic_block_fixed_partition_1plane( } qwt_bitcounts[i] = bitcount; - // then, generate the optimized set of weights for the weight mode. - compute_quantized_weights_for_decimation_table( - *dts[decimation_mode], + // Generate the optimized set of weights for the weight mode + compute_quantized_weights_for_decimation( + di, weight_low_value[i], weight_high_value[i], - decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * decimation_mode, - flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i, - u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i, - bm.quant_mode); + decimated_quantized_weights + BLOCK_MAX_WEIGHTS * decimation_mode, + flt_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * i, + u8_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * i, + bm.get_weight_quant_mode()); - // then, compute weight-errors for the weight mode. + // Compute weight quantization errors for the block mode qwt_errors[i] = compute_error_of_weight_set_1plane( eix[decimation_mode], - *dts[decimation_mode], - flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i); + di, + flt_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * i); } - // for each weighting mode, determine the optimal combination of color endpoint encodings - // and weight encodings; return results for the 4 best-looking modes. - - int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4]; + // Decide the optimal combination of color endpoint encodings and weight encodings + int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS]; int block_mode_index[TUNE_MAX_TRIAL_CANDIDATES]; - // TODO: Make these enums? - int color_quant_level[TUNE_MAX_TRIAL_CANDIDATES]; - int color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]; + quant_method color_quant_level[TUNE_MAX_TRIAL_CANDIDATES]; + quant_method color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]; - determine_optimal_set_of_endpoint_formats_to_use( - bsd, *pt, blk, ewb, ei.ep, qwt_bitcounts, qwt_errors, + unsigned int candidate_count = compute_ideal_endpoint_formats( + bsd, pi, blk, ewb, ei.ep, qwt_bitcounts, qwt_errors, config.tune_candidate_limit, partition_format_specifiers, block_mode_index, color_quant_level, color_quant_level_mod); - // then iterate over the tune_candidate_limit believed-to-be-best modes to - // find out which one is actually best. + // Iterate over the N believed-to-be-best modes to find out which one is actually best float best_errorval_in_mode = 1e30f; float best_errorval_in_scb = scb.errorval; - for (unsigned int i = 0; i < config.tune_candidate_limit; i++) + for (unsigned int i = 0; i < candidate_count; i++) { TRACE_NODE(node0, "candidate"); - uint8_t *u8_weight_src; - const int bm_packed_index = block_mode_index[i]; - if (bm_packed_index < 0) - { - trace_add_data("failed", "error_block"); - continue; - } - - assert(bm_packed_index >= 0 && bm_packed_index < bsd.block_mode_count); + assert(bm_packed_index >= 0 && bm_packed_index < (int)bsd.block_mode_count); const block_mode& qw_bm = bsd.block_modes[bm_packed_index]; int decimation_mode = qw_bm.decimation_mode; int weight_quant_mode = qw_bm.quant_mode; - const decimation_table& dt = *dts[decimation_mode]; - promise(dt.weight_count > 0); - u8_weight_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * bm_packed_index; + const auto& di = bsd.get_decimation_info(decimation_mode); + promise(di.weight_count > 0); - trace_add_data("weight_x", dt.weight_x); - trace_add_data("weight_y", dt.weight_y); - trace_add_data("weight_z", dt.weight_z); + trace_add_data("weight_x", di.weight_x); + trace_add_data("weight_y", di.weight_y); + trace_add_data("weight_z", di.weight_z); trace_add_data("weight_quant", weight_quant_mode); - // recompute the ideal color endpoints before storing them. - vfloat4 rgbs_colors[4]; - vfloat4 rgbo_colors[4]; + // Recompute the ideal color endpoints before storing them + vfloat4 rgbs_colors[BLOCK_MAX_PARTITIONS]; + vfloat4 rgbo_colors[BLOCK_MAX_PARTITIONS]; symbolic_compressed_block workscb; + + for (unsigned int j = 0; j < BLOCK_MAX_COMPONENTS; j++) + { + workscb.constant_color[j] = 0; + } + + uint8_t* u8_weight_src = u8_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * bm_packed_index; + + for (unsigned int j = 0; j < di.weight_count; j++) + { + workscb.weights[j] = u8_weight_src[j]; + } + for (unsigned int l = 0; l < config.tune_refinement_limit; l++) { recompute_ideal_colors_1plane( - blk, ewb, *pt, dt, - weight_quant_mode, u8_weight_src, + blk, ewb, pi, di, + weight_quant_mode, workscb.weights, eix[decimation_mode].ep, rgbs_colors, rgbo_colors); - // quantize the chosen color - - // store the colors for the block - for (int j = 0; j < partition_count; j++) + // Quantize the chosen color + for (unsigned int j = 0; j < partition_count; j++) { workscb.color_formats[j] = pack_color_endpoints( eix[decimation_mode].ep.endpt0[j], @@ -437,10 +430,9 @@ static float compress_symbolic_block_fixed_partition_1plane( (quant_method)color_quant_level[i]); } - // if all the color endpoint modes are the same, we get a few more - // bits to store colors; let's see if we can take advantage of this: - // requantize all the colors and see if the endpoint modes remain the same; - // if they do, then exploit it. + // If all the color endpoint modes are the same, we get a few more bits to store colors; + // let's see if we can take advantage of this: requantize all the colors and see if the + // endpoint modes remain the same. workscb.color_formats_matched = 0; if ((partition_count >= 2 && workscb.color_formats[0] == workscb.color_formats[1] @@ -448,9 +440,9 @@ static float compress_symbolic_block_fixed_partition_1plane( && (partition_count == 2 || (workscb.color_formats[0] == workscb.color_formats[2] && (partition_count == 3 || (workscb.color_formats[0] == workscb.color_formats[3]))))) { - int colorvals[4][12]; - int color_formats_mod[4] { 0 }; - for (int j = 0; j < partition_count; j++) + uint8_t colorvals[BLOCK_MAX_PARTITIONS][12]; + int color_formats_mod[BLOCK_MAX_PARTITIONS] { 0 }; + for (unsigned int j = 0; j < partition_count; j++) { color_formats_mod[j] = pack_color_endpoints( eix[decimation_mode].ep.endpt0[j], @@ -467,9 +459,9 @@ static float compress_symbolic_block_fixed_partition_1plane( && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3]))))) { workscb.color_formats_matched = 1; - for (int j = 0; j < 4; j++) + for (unsigned int j = 0; j < BLOCK_MAX_PARTITIONS; j++) { - for (int k = 0; k < 12; k++) + for (unsigned int k = 0; k < 8; k++) { workscb.color_values[j][k] = colorvals[j][k]; } @@ -479,43 +471,36 @@ static float compress_symbolic_block_fixed_partition_1plane( } } - // store header fields + // Store header fields workscb.partition_count = partition_count; workscb.partition_index = partition_index; - workscb.color_quant_level = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i]; + workscb.quant_mode = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i]; workscb.block_mode = qw_bm.mode_index; - workscb.error_block = 0; + workscb.block_type = SYM_BTYPE_NONCONST; - if (workscb.color_quant_level < 4) + if (workscb.quant_mode < QUANT_6) { - workscb.error_block = 1; // should never happen, but cannot prove it impossible. + workscb.block_type = SYM_BTYPE_ERROR; } // Pre-realign test if (l == 0) { - for (int j = 0; j < dt.weight_count; j++) - { - workscb.weights[j] = u8_weight_src[j]; - } - float errorval = compute_symbolic_block_difference(config, bsd, workscb, blk, ewb); if (errorval == -1e30f) { errorval = -errorval; - workscb.error_block = 1; + workscb.block_type = SYM_BTYPE_ERROR; } - trace_add_data("error_prerealign", errorval); best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode); - // Average refinement improvement is 3.5% per iteration - // (allow 5%), but the first iteration can help more so we give - // it a extra 10% leeway. Use this knowledge to drive a - // heuristic to skip blocks that are unlikely to catch up with - // the best block we have already. - int iters_remaining = config.tune_refinement_limit - l; + // Average refinement improvement is 3.5% per iteration (allow 5%), but the first + // iteration can help more so we give it a extra 10% leeway. Use this knowledge to + // drive a heuristic to skip blocks that are unlikely to catch up with the best + // block we have already. + unsigned int iters_remaining = config.tune_refinement_limit - l; float threshold = (0.05f * static_cast(iters_remaining)) + 1.1f; if (errorval > (threshold * best_errorval_in_scb)) { @@ -535,31 +520,26 @@ static float compress_symbolic_block_fixed_partition_1plane( } } - // perform a final pass over the weights to try to improve them. + // Perform a final pass over the weights to try to improve them. bool adjustments = realign_weights( config.profile, bsd, blk, ewb, workscb, - u8_weight_src, nullptr); + workscb.weights, nullptr); // Post-realign test - for (int j = 0; j < dt.weight_count; j++) - { - workscb.weights[j] = u8_weight_src[j]; - } - float errorval = compute_symbolic_block_difference(config, bsd, workscb, blk, ewb); if (errorval == -1e30f) { errorval = -errorval; - workscb.error_block = 1; + workscb.block_type = SYM_BTYPE_ERROR; } trace_add_data("error_postrealign", errorval); best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode); - // Average refinement improvement is 3.5% per iteration, so skip - // blocks that are unlikely to catch up with the best block we - // have already. Assume a 5% per step to give benefit of the doubt - int iters_remaining = config.tune_refinement_limit - 1 - l; + // Average refinement improvement is 3.5% per iteration, so skip blocks that are + // unlikely to catch up with the best block we have already. Assume a 5% per step to + // give benefit of the doubt ... + unsigned int iters_remaining = config.tune_refinement_limit - 1 - l; float threshold = (0.05f * static_cast(iters_remaining)) + 1.0f; if (errorval > (threshold * best_errorval_in_scb)) { @@ -602,15 +582,15 @@ static float compress_symbolic_block_fixed_partition_1plane( * @param[out] scb The symbolic compressed block output. * @param[out] tmpbuf The quantized weights for plane 1. */ -static float compress_symbolic_block_fixed_partition_2planes( +static float compress_symbolic_block_for_partition_2planes( const astcenc_config& config, const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, float tune_errorval_threshold, - int partition_count, - int partition_index, - int plane2_component, + unsigned int partition_count, + unsigned int partition_index, + unsigned int plane2_component, symbolic_compressed_block& scb, compress_fixed_partition_buffers& tmpbuf ) { @@ -620,58 +600,55 @@ static float compress_symbolic_block_fixed_partition_2planes( promise(bsd.decimation_mode_count > 0); static const int free_bits_for_partition_count[5] = { - 0, 113 - 4, 109 - 4 - PARTITION_BITS, 106 - 4 - PARTITION_BITS, 103 - 4 - PARTITION_BITS + 0, 113 - 4, 109 - 4 - PARTITION_INDEX_BITS, 106 - 4 - PARTITION_INDEX_BITS, 103 - 4 - PARTITION_INDEX_BITS }; - const partition_info *pt = get_partition_table(&bsd, partition_count); - pt += partition_index; + const auto& pi = bsd.get_partition_info(partition_count, partition_index); - // first, compute ideal weights and endpoint colors + // Compute ideal weights and endpoint colors, with no quantization or decimation endpoints_and_weights& ei1 = tmpbuf.ei1; endpoints_and_weights& ei2 = tmpbuf.ei2; endpoints_and_weights* eix1 = tmpbuf.eix1; endpoints_and_weights* eix2 = tmpbuf.eix2; - compute_endpoints_and_ideal_weights_2planes(bsd, blk, ewb, *pt, plane2_component, ei1, ei2); - - // next, compute ideal weights and endpoint colors for every decimation. - const decimation_table *const *dts = bsd.decimation_tables; + compute_ideal_colors_and_weights_2planes(bsd, blk, ewb, pi, plane2_component, ei1, ei2); + // Compute ideal weights and endpoint colors for every decimation float *decimated_quantized_weights = tmpbuf.decimated_quantized_weights; float *decimated_weights = tmpbuf.decimated_weights; float *flt_quantized_decimated_quantized_weights = tmpbuf.flt_quantized_decimated_quantized_weights; uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf.u8_quantized_decimated_quantized_weights; - // for each decimation mode, compute an ideal set of weights - for (int i = 0; i < bsd.decimation_mode_count; i++) + // For each decimation mode, compute an ideal set of weights with no quantization + for (unsigned int i = 0; i < bsd.decimation_mode_count; i++) { - const decimation_mode& dm = bsd.decimation_modes[i]; + const auto& dm = bsd.get_decimation_mode(i); if (dm.maxprec_2planes < 0 || !dm.percentile_hit) { continue; } - compute_ideal_weights_for_decimation_table( + const auto& di = bsd.get_decimation_info(i); + + compute_ideal_weights_for_decimation( ei1, eix1[i], - *(dts[i]), - decimated_quantized_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK, - decimated_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK); + di, + decimated_quantized_weights + (2 * i) * BLOCK_MAX_WEIGHTS, + decimated_weights + (2 * i) * BLOCK_MAX_WEIGHTS); - compute_ideal_weights_for_decimation_table( + compute_ideal_weights_for_decimation( ei2, eix2[i], - *(dts[i]), - decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, - decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK); + di, + decimated_quantized_weights + (2 * i + 1) * BLOCK_MAX_WEIGHTS, + decimated_weights + (2 * i + 1) * BLOCK_MAX_WEIGHTS); } - // compute maximum colors for the endpoints and ideal weights. - // for each endpoint-and-ideal-weight pair, compute the smallest weight value - // that will result in a color value greater than 1. - + // Compute maximum colors for the endpoints and ideal weights, then for each endpoint and ideal + // weight pair, compute the smallest weight that will result in a color value greater than 1 vfloat4 min_ep1(10.0f); vfloat4 min_ep2(10.0f); - for (int i = 0; i < partition_count; i++) + for (unsigned int i = 0; i < partition_count; i++) { vfloat4 ep1 = (vfloat4(1.0f) - ei1.ep.endpt0[i]) / (ei1.ep.endpt1[i] - ei1.ep.endpt0[i]); vmask4 use_ep1 = (ep1 > vfloat4(0.5f)) & (ep1 < min_ep1); @@ -693,24 +670,24 @@ static float compress_symbolic_block_fixed_partition_2planes( // Set the minwt2 to the plane2 component min in ep2 float min_wt_cutoff2 = hmin_s(select(err_max, min_ep2, err_mask)); - float weight_low_value1[MAX_WEIGHT_MODES]; - float weight_high_value1[MAX_WEIGHT_MODES]; - float weight_low_value2[MAX_WEIGHT_MODES]; - float weight_high_value2[MAX_WEIGHT_MODES]; + float weight_low_value1[WEIGHTS_MAX_BLOCK_MODES]; + float weight_high_value1[WEIGHTS_MAX_BLOCK_MODES]; + float weight_low_value2[WEIGHTS_MAX_BLOCK_MODES]; + float weight_high_value2[WEIGHTS_MAX_BLOCK_MODES]; compute_angular_endpoints_2planes( bsd, decimated_quantized_weights, decimated_weights, weight_low_value1, weight_high_value1, weight_low_value2, weight_high_value2); - // for each mode (which specifies a decimation and a quantization): - // * generate an optimized set of quantized weights. - // * compute quantization errors for each mode - // * compute number of bits needed for the quantized weights. + // For each mode (which specifies a decimation and a quantization): + // * Compute number of bits needed for the quantized weights + // * Generate an optimized set of quantized weights + // * Compute quantization errors for the mode - int qwt_bitcounts[MAX_WEIGHT_MODES]; - float qwt_errors[MAX_WEIGHT_MODES]; - for (int i = 0; i < bsd.block_mode_count; ++i) + int qwt_bitcounts[WEIGHTS_MAX_BLOCK_MODES]; + float qwt_errors[WEIGHTS_MAX_BLOCK_MODES]; + for (unsigned int i = 0; i < bsd.block_mode_count; ++i) { const block_mode& bm = bsd.block_modes[i]; if (!bm.is_dual_plane || !bm.percentile_hit) @@ -719,7 +696,8 @@ static float compress_symbolic_block_fixed_partition_2planes( continue; } - int decimation_mode = bm.decimation_mode; + unsigned int decimation_mode = bm.decimation_mode; + const auto& di = bsd.get_decimation_info(decimation_mode); if (weight_high_value1[i] > 1.02f * min_wt_cutoff1) { @@ -731,10 +709,10 @@ static float compress_symbolic_block_fixed_partition_2planes( weight_high_value2[i] = 1.0f; } - // compute weight bitcount for the mode - int bits_used_by_weights = get_ise_sequence_bitcount( - 2 * dts[decimation_mode]->weight_count, - (quant_method)bm.quant_mode); + // Compute weight bitcount for the mode + unsigned int bits_used_by_weights = get_ise_sequence_bitcount( + 2 * di.weight_count, + bm.get_weight_quant_mode()); int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights; if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96) { @@ -743,97 +721,100 @@ static float compress_symbolic_block_fixed_partition_2planes( } qwt_bitcounts[i] = bitcount; - // then, generate the optimized set of weights for the mode. - compute_quantized_weights_for_decimation_table( - *dts[decimation_mode], + // Generate the optimized set of weights for the mode + compute_quantized_weights_for_decimation( + di, weight_low_value1[i], weight_high_value1[i], - decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode), - flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), - u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), bm.quant_mode); + decimated_quantized_weights + BLOCK_MAX_WEIGHTS * (2 * decimation_mode), + flt_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * (2 * i), + u8_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * (2 * i), bm.get_weight_quant_mode()); - compute_quantized_weights_for_decimation_table( - *dts[decimation_mode], + compute_quantized_weights_for_decimation( + di, weight_low_value2[i], weight_high_value2[i], - decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode + 1), - flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1), - u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1), bm.quant_mode); + decimated_quantized_weights + BLOCK_MAX_WEIGHTS * (2 * decimation_mode + 1), + flt_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * (2 * i + 1), + u8_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * (2 * i + 1), bm.get_weight_quant_mode()); - // then, compute quantization errors for the block mode. + // Compute weight quantization errors for the block mode qwt_errors[i] = compute_error_of_weight_set_2planes( eix1[decimation_mode], eix2[decimation_mode], - *dts[decimation_mode], - flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), - flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1)); + di, + flt_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * (2 * i), + flt_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * (2 * i + 1)); } - // decide the optimal combination of color endpoint encodings and weight encodings. - int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4]; + // Decide the optimal combination of color endpoint encodings and weight encodings + int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS]; int block_mode_index[TUNE_MAX_TRIAL_CANDIDATES]; - int color_quant_level[TUNE_MAX_TRIAL_CANDIDATES]; - int color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]; + + quant_method color_quant_level[TUNE_MAX_TRIAL_CANDIDATES]; + quant_method color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]; endpoints epm; merge_endpoints(ei1.ep, ei2.ep, plane2_component, epm); - determine_optimal_set_of_endpoint_formats_to_use( - bsd, *pt, blk, ewb, epm, qwt_bitcounts, qwt_errors, + unsigned int candidate_count = compute_ideal_endpoint_formats( + bsd, pi, blk, ewb, epm, qwt_bitcounts, qwt_errors, config.tune_candidate_limit, partition_format_specifiers, block_mode_index, color_quant_level, color_quant_level_mod); - // then iterate over the tune_candidate_limit believed-to-be-best modes to - // find out which one is actually best. + // Iterate over the N believed-to-be-best modes to find out which one is actually best float best_errorval_in_mode = 1e30f; float best_errorval_in_scb = scb.errorval; - for (unsigned int i = 0; i < config.tune_candidate_limit; i++) + for (unsigned int i = 0; i < candidate_count; i++) { TRACE_NODE(node0, "candidate"); const int bm_packed_index = block_mode_index[i]; - if (bm_packed_index < 0) - { - trace_add_data("failed", "error_block"); - continue; - } - - uint8_t *u8_weight1_src; - uint8_t *u8_weight2_src; - - assert(bm_packed_index >= 0 && bm_packed_index < bsd.block_mode_count); + assert(bm_packed_index >= 0 && bm_packed_index < (int)bsd.block_mode_count); const block_mode& qw_bm = bsd.block_modes[bm_packed_index]; int decimation_mode = qw_bm.decimation_mode; int weight_quant_mode = qw_bm.quant_mode; - const decimation_table& dt = *dts[decimation_mode]; - promise(dt.weight_count > 0); + const auto& di = bsd.get_decimation_info(decimation_mode); + promise(di.weight_count > 0); - u8_weight1_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * bm_packed_index); - u8_weight2_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * bm_packed_index + 1); - - trace_add_data("weight_x", dt.weight_x); - trace_add_data("weight_y", dt.weight_y); - trace_add_data("weight_z", dt.weight_z); + trace_add_data("weight_x", di.weight_x); + trace_add_data("weight_y", di.weight_y); + trace_add_data("weight_z", di.weight_z); trace_add_data("weight_quant", weight_quant_mode); - // recompute the ideal color endpoints before storing them. + // Recompute the ideal color endpoints before storing them. merge_endpoints(eix1[decimation_mode].ep, eix2[decimation_mode].ep, plane2_component, epm); - vfloat4 rgbs_colors[4]; - vfloat4 rgbo_colors[4]; + vfloat4 rgbs_colors[BLOCK_MAX_PARTITIONS]; + vfloat4 rgbo_colors[BLOCK_MAX_PARTITIONS]; symbolic_compressed_block workscb; + + for (unsigned int j = 0; j < BLOCK_MAX_COMPONENTS; j++) + { + workscb.constant_color[j] = 0; + } + + uint8_t* u8_weight1_src = u8_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * (2 * bm_packed_index); + uint8_t* u8_weight2_src = u8_quantized_decimated_quantized_weights + BLOCK_MAX_WEIGHTS * (2 * bm_packed_index + 1); + + for (int j = 0; j < di.weight_count; j++) + { + workscb.weights[j] = u8_weight1_src[j]; + workscb.weights[j + WEIGHTS_PLANE2_OFFSET] = u8_weight2_src[j]; + } + for (unsigned int l = 0; l < config.tune_refinement_limit; l++) { recompute_ideal_colors_2planes( - blk, ewb, *pt, dt, - weight_quant_mode, u8_weight1_src, u8_weight2_src, + blk, ewb, pi, di, + weight_quant_mode, workscb.weights, workscb.weights + WEIGHTS_PLANE2_OFFSET, epm, rgbs_colors, rgbo_colors, plane2_component); - // store the colors for the block - for (int j = 0; j < partition_count; j++) + // Quantize the chosen color + for (unsigned int j = 0; j < partition_count; j++) { workscb.color_formats[j] = pack_color_endpoints( epm.endpt0[j], @@ -844,6 +825,9 @@ static float compress_symbolic_block_fixed_partition_2planes( (quant_method)color_quant_level[i]); } + // If all the color endpoint modes are the same, we get a few more bits to store colors; + // let's see if we can take advantage of this: requantize all the colors and see if the + // endpoint modes remain the same. workscb.color_formats_matched = 0; if ((partition_count >= 2 && workscb.color_formats[0] == workscb.color_formats[1] @@ -851,9 +835,9 @@ static float compress_symbolic_block_fixed_partition_2planes( && (partition_count == 2 || (workscb.color_formats[0] == workscb.color_formats[2] && (partition_count == 3 || (workscb.color_formats[0] == workscb.color_formats[3]))))) { - int colorvals[4][12]; - int color_formats_mod[4] { 0 }; - for (int j = 0; j < partition_count; j++) + uint8_t colorvals[BLOCK_MAX_PARTITIONS][12]; + int color_formats_mod[BLOCK_MAX_PARTITIONS] { 0 }; + for (unsigned int j = 0; j < partition_count; j++) { color_formats_mod[j] = pack_color_endpoints( epm.endpt0[j], @@ -870,9 +854,9 @@ static float compress_symbolic_block_fixed_partition_2planes( && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3]))))) { workscb.color_formats_matched = 1; - for (int j = 0; j < 4; j++) + for (unsigned int j = 0; j < BLOCK_MAX_PARTITIONS; j++) { - for (int k = 0; k < 12; k++) + for (unsigned int k = 0; k < 8; k++) { workscb.color_values[j][k] = colorvals[j][k]; } @@ -882,45 +866,37 @@ static float compress_symbolic_block_fixed_partition_2planes( } } - // store header fields + // Store header fields workscb.partition_count = partition_count; workscb.partition_index = partition_index; - workscb.color_quant_level = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i]; + workscb.quant_mode = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i]; workscb.block_mode = qw_bm.mode_index; workscb.plane2_component = plane2_component; - workscb.error_block = 0; + workscb.block_type = SYM_BTYPE_NONCONST; - if (workscb.color_quant_level < 4) + if (workscb.quant_mode < 4) { - workscb.error_block = 1; // should never happen, but cannot prove it impossible + workscb.block_type = SYM_BTYPE_ERROR; } // Pre-realign test if (l == 0) { - for (int j = 0; j < dt.weight_count; j++) - { - workscb.weights[j] = u8_weight1_src[j]; - workscb.weights[j + PLANE2_WEIGHTS_OFFSET] = u8_weight2_src[j]; - } - float errorval = compute_symbolic_block_difference(config, bsd, workscb, blk, ewb); if (errorval == -1e30f) { errorval = -errorval; - workscb.error_block = 1; + workscb.block_type = SYM_BTYPE_ERROR; } - trace_add_data("error_prerealign", errorval); best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode); - // Average refinement improvement is 3.5% per iteration - // (allow 5%), but the first iteration can help more so we give - // it a extra 10% leeway. Use this knowledge to drive a - // heuristic to skip blocks that are unlikely to catch up with - // the best block we have already. - int iters_remaining = config.tune_refinement_limit - l; + // Average refinement improvement is 3.5% per iteration (allow 5%), but the first + // iteration can help more so we give it a extra 10% leeway. Use this knowledge to + // drive a heuristic to skip blocks that are unlikely to catch up with the best + // block we have already. + unsigned int iters_remaining = config.tune_refinement_limit - l; float threshold = (0.05f * static_cast(iters_remaining)) + 1.1f; if (errorval > (threshold * best_errorval_in_scb)) { @@ -940,32 +916,26 @@ static float compress_symbolic_block_fixed_partition_2planes( } } - // perform a final pass over the weights to try to improve them. + // Perform a final pass over the weights to try to improve them bool adjustments = realign_weights( config.profile, bsd, blk, ewb, workscb, - u8_weight1_src, u8_weight2_src); + workscb.weights, workscb.weights + WEIGHTS_PLANE2_OFFSET); // Post-realign test - for (int j = 0; j < dt.weight_count; j++) - { - workscb.weights[j] = u8_weight1_src[j]; - workscb.weights[j + PLANE2_WEIGHTS_OFFSET] = u8_weight2_src[j]; - } - float errorval = compute_symbolic_block_difference(config, bsd, workscb, blk, ewb); if (errorval == -1e30f) { errorval = -errorval; - workscb.error_block = 1; + workscb.block_type = SYM_BTYPE_ERROR; } trace_add_data("error_postrealign", errorval); best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode); - // Average refinement improvement is 3.5% per iteration, so skip - // blocks that are unlikely to catch up with the best block we - // have already. Assume a 5% per step to give benefit of the doubt - int iters_remaining = config.tune_refinement_limit - 1 - l; + // Average refinement improvement is 3.5% per iteration, so skip blocks that are + // unlikely to catch up with the best block we have already. Assume a 5% per step to + // give benefit of the doubt ... + unsigned int iters_remaining = config.tune_refinement_limit - 1 - l; float threshold = (0.05f * static_cast(iters_remaining)) + 1.0f; if (errorval > (threshold * best_errorval_in_scb)) { @@ -997,9 +967,9 @@ static float compress_symbolic_block_fixed_partition_2planes( /** * @brief Create a per-texel expansion of the error weights for deblocking. * - * Deblockign works by assigning a higher error weight to blocks the closer - * they are the edge of the block. The encourages the compressor to keep the - * periphery colors more accurate, which can help with compressing gradients. + * Deblockign works by assigning a higher error weight to blocks the closer they are the edge of the + * block. The encourages the compressor to keep the periphery colors more accurate, which can help + * reduce block artifacts when compressing gradients. * * @param[in,out] ctx The context containing both deblog memory and config. */ @@ -1037,13 +1007,11 @@ void expand_deblock_weights( /** * @brief Create a per-texel and per-channel expansion of the error weights. * - * This approach creates relatively large error block tables, but it allows a - * very flexible level of control over how specific texels and channels are - * prioritized by the compressor. + * This approach creates relatively large error block tables, but it allows a very flexible level of + * control over how specific texels and channels are prioritized by the compressor. * - * TODO: Inline the expand_deblock_weights here? The computation is cheap, and - * it would remove some memory allocation from the context, at the expense of - * recomputing per block. + * TODO: Inline the expand_deblock_weights here? The computation is cheap, and it would remove some + * memory allocation from the context, at the expense of recomputing per block. * * @param ctx The compressor context and configuration. * @param image The input image information. @@ -1060,10 +1028,10 @@ static float prepare_error_weight_block( const imageblock& blk, error_weight_block& ewb ) { - int idx = 0; - int any_mean_stdev_weight = - ctx.config.v_rgb_mean != 0.0f || ctx.config.v_rgb_stdev != 0.0f || \ - ctx.config.v_a_mean != 0.0f || ctx.config.v_a_stdev != 0.0f; + unsigned int idx = 0; + bool any_mean_stdev_weight = + ctx.config.v_rgb_mean != 0.0f || ctx.config.v_rgb_stdev != 0.0f || \ + ctx.config.v_a_mean != 0.0f || ctx.config.v_a_stdev != 0.0f; vfloat4 color_weights(ctx.config.cw_r_weight, ctx.config.cw_g_weight, @@ -1071,8 +1039,8 @@ static float prepare_error_weight_block( ctx.config.cw_a_weight); // This works because HDR is imposed globally at compression time - int rgb_lns = blk.rgb_lns[0]; - int a_lns = blk.alpha_lns[0]; + unsigned int rgb_lns = blk.rgb_lns[0]; + unsigned int a_lns = blk.alpha_lns[0]; vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns); vmask4 lns_mask = use_lns != vint4::zero(); @@ -1080,11 +1048,11 @@ static float prepare_error_weight_block( promise(bsd.ydim > 0); promise(bsd.zdim > 0); - for (int z = 0; z < bsd.zdim; z++) + for (unsigned int z = 0; z < bsd.zdim; z++) { - for (int y = 0; y < bsd.ydim; y++) + for (unsigned int y = 0; y < bsd.ydim; y++) { - for (int x = 0; x < bsd.xdim; x++) + for (unsigned int x = 0; x < bsd.xdim; x++) { unsigned int xpos = x + blk.xpos; unsigned int ypos = y + blk.ypos; @@ -1129,8 +1097,8 @@ static float prepare_error_weight_block( ctx.config.v_rgb_base, ctx.config.v_a_base); - int ydt = image.dim_x; - int zdt = image.dim_x * image.dim_y; + unsigned int ydt = image.dim_x; + unsigned int zdt = image.dim_x * image.dim_y; if (any_mean_stdev_weight) { @@ -1201,13 +1169,12 @@ static float prepare_error_weight_block( error_weight = error_weight * color_weights; error_weight = error_weight * ctx.deblock_weights[idx]; - // when we loaded the block to begin with, we applied a transfer function - // and computed the derivative of the transfer function. However, the - // error-weight computation so far is based on the original color values, - // not the transfer-function values. As such, we must multiply the - // error weights by the derivative of the inverse of the transfer function, - // which is equivalent to dividing by the derivative of the transfer - // function. + // When we loaded the block to begin with, we applied a transfer function and + // computed the derivative of the transfer function. However, the error-weight + // computation so far is based on the original color values, not the + // transfer-function values. As such, we must multiply the error weights by the + // derivative of the inverse of the transfer function, which is equivalent to + // dividing by the derivative of the transfer function. error_weight = error_weight / (derv * derv * 1e-10f); ewb.error_weights[idx] = error_weight; @@ -1236,7 +1203,6 @@ static float prepare_error_weight_block( ewb.texel_weight_rg[i] = (wr + wg) * 0.5f; ewb.texel_weight_rb[i] = (wr + wb) * 0.5f; ewb.texel_weight_gb[i] = (wg + wb) * 0.5f; - ewb.texel_weight_ra[i] = (wr + wa) * 0.5f; ewb.texel_weight_gba[i] = (wg + wb + wa) * 0.333333f; ewb.texel_weight_rba[i] = (wr + wb + wa) * 0.333333f; @@ -1410,14 +1376,12 @@ void compress_block( trace_add_data("pos_y", blk->ypos); trace_add_data("pos_z", blk->zpos); - // Set stricter block targets for luminance data as we have more bits to - // play with - fewer endpoints and never need a second weight plane - bool block_is_l = imageblock_is_lum(&blk); + // Set stricter block targets for luminance data as we have more bits to play with + bool block_is_l = blk.is_luminance(); float block_is_l_scale = block_is_l ? 1.0f / 1.5f : 1.0f; - // Set slightly stricter block targets for lumalpha data as we have more - // bits to play with - fewer endpoints but may use a second weight plane - bool block_is_la = imageblock_is_lumalp(&blk); + // Set slightly stricter block targets for lumalpha data as we have more bits to play with + bool block_is_la = blk.is_luminancealpha(); float block_is_la_scale = block_is_la ? 1.0f / 1.05f : 1.0f; bool block_skip_two_plane = false; @@ -1444,27 +1408,27 @@ void compress_block( trace_add_data("tune_error_threshold", error_threshold); #endif + // Detected a constant-color block if (all(blk.data_min == blk.data_max)) { TRACE_NODE(node1, "pass"); trace_add_data("partition_count", 0); trace_add_data("plane_count", 1); - // detected a constant-color block. Encode as FP16 if using HDR - scb.error_block = 0; scb.partition_count = 0; + // Encode as FP16 if using HDR if ((decode_mode == ASTCENC_PRF_HDR) || (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A)) { - scb.block_mode = -1; + scb.block_type = SYM_BTYPE_CONST_F16; vint4 color_f16 = float_to_float16(blk.origin_texel); store(color_f16, scb.constant_color); } + // Encode as UNORM16 if NOT using HDR else { - // Encode as UNORM16 if NOT using HDR. - scb.block_mode = -2; + scb.block_type = SYM_BTYPE_CONST_U16; vfloat4 color_f32 = clamp(0.0f, 1.0f, blk.origin_texel) * 65535.0f; vint4 color_u16 = float_to_int_rtn(color_f32); store(color_u16, scb.constant_color); @@ -1486,7 +1450,7 @@ void compress_block( // Set SCB and mode errors to a very high error value scb.errorval = 1e30f; - scb.error_block = 1; + scb.block_type = SYM_BTYPE_ERROR; float best_errorvals_in_modes[MODE_COUNT]; for (int i = 0; i < MODE_COUNT; i++) @@ -1494,14 +1458,13 @@ void compress_block( best_errorvals_in_modes[i] = 1e30f; } - int uses_alpha = imageblock_uses_alpha(&blk); + bool uses_alpha = blk.is_using_alpha(); // Trial using 1 plane of weights and 1 partition. - // Most of the time we test it twice, first with a mode cutoff of 0 and - // then with the specified mode cutoff. This causes an early-out that - // speeds up encoding of easy blocks. However, this optimization is - // disabled for 4x4 and 5x4 blocks where it nearly always slows down the + // Most of the time we test it twice, first with a mode cutoff of 0 and then with the specified + // mode cutoff. This causes an early-out that speeds up encoding of easy blocks. However, this + // optimization is disabled for 4x4 and 5x4 blocks where it nearly always slows down the // compression and slightly reduces image quality. float errorval_mult[2] = { @@ -1519,7 +1482,7 @@ void compress_block( trace_add_data("plane_count", 1); trace_add_data("search_mode", i); - float errorval = compress_symbolic_block_fixed_partition_1plane( + float errorval = compress_symbolic_block_for_partition_1plane( ctx.config, *bsd, blk, ewb, i == 0, error_threshold * errorval_mult[i] * errorval_overshoot, 1, 0, scb, tmpbuf.planes); @@ -1539,8 +1502,8 @@ void compress_block( block_skip_two_plane = lowest_correl > ctx.config.tune_2_plane_early_out_limit_correlation; - // next, test the four possible 1-partition, 2-planes modes - for (int i = 0; i < 4; i++) + // Test the four possible 1-partition, 2-planes modes + for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++) { TRACE_NODE(node1, "pass"); trace_add_data("partition_count", 1); @@ -1565,7 +1528,7 @@ void compress_block( continue; } - float errorval = compress_symbolic_block_fixed_partition_2planes( + float errorval = compress_symbolic_block_for_partition_2planes( ctx.config, *bsd, blk, ewb, error_threshold * errorval_overshoot, 1, // partition count @@ -1583,13 +1546,13 @@ void compress_block( } } - // find best blocks for 2, 3 and 4 partitions + // Find best blocks for 2, 3 and 4 partitions for (int partition_count = 2; partition_count <= max_partitions; partition_count++) { - int partition_indices_1plane[2] { 0, 0 }; - int partition_index_2planes = 0; + unsigned int partition_indices_1plane[2] { 0, 0 }; + unsigned int partition_index_2planes { 0 }; - find_best_partitionings(*bsd, blk, ewb, partition_count, + find_best_partition_candidates(*bsd, blk, ewb, partition_count, ctx.config.tune_partition_index_limit, partition_indices_1plane[0], partition_indices_1plane[1], @@ -1603,7 +1566,7 @@ void compress_block( trace_add_data("plane_count", 1); trace_add_data("search_mode", i); - float errorval = compress_symbolic_block_fixed_partition_1plane( + float errorval = compress_symbolic_block_for_partition_1plane( ctx.config, *bsd, blk, ewb, false, error_threshold * errorval_overshoot, partition_count, partition_indices_1plane[i], @@ -1680,16 +1643,16 @@ void compress_block( TRACE_NODE(node1, "pass"); trace_add_data("partition_count", partition_count); - trace_add_data("partition_index", partition_index_2planes & (PARTITION_COUNT - 1)); + trace_add_data("partition_index", partition_index_2planes & (BLOCK_MAX_PARTITIONINGS - 1)); trace_add_data("plane_count", 2); - trace_add_data("plane_component", partition_index_2planes >> PARTITION_BITS); + trace_add_data("plane_component", partition_index_2planes >> PARTITION_INDEX_BITS); - float errorval = compress_symbolic_block_fixed_partition_2planes( + float errorval = compress_symbolic_block_for_partition_2planes( ctx.config, *bsd, blk, ewb, error_threshold * errorval_overshoot, partition_count, - partition_index_2planes & (PARTITION_COUNT - 1), - partition_index_2planes >> PARTITION_BITS, + partition_index_2planes & (BLOCK_MAX_PARTITIONINGS - 1), + partition_index_2planes >> PARTITION_INDEX_BITS, scb, tmpbuf.planes); // Modes 7, 10 (13 is unreachable) diff --git a/lib/astc-encoder/Source/astcenc_compute_variance.cpp b/lib/astc-encoder/Source/astcenc_compute_variance.cpp index 1b8b92fee6..61c1481073 100644 --- a/lib/astc-encoder/Source/astcenc_compute_variance.cpp +++ b/lib/astc-encoder/Source/astcenc_compute_variance.cpp @@ -20,14 +20,13 @@ /** * @brief Functions to calculate variance per component in a NxN footprint. * - * We need N to be parametric, so the routine below uses summed area tables in - * order to execute in O(1) time independent of how big N is. + * We need N to be parametric, so the routine below uses summed area tables in order to execute in + * O(1) time independent of how big N is. * - * The addition uses a Brent-Kung-based parallel prefix adder. This uses the - * prefix tree to first perform a binary reduction, and then distributes the - * results. This method means that there is no serial dependency between a - * given element and the next one, and also significantly improves numerical - * stability allowing us to use floats rather than doubles. + * The addition uses a Brent-Kung-based parallel prefix adder. This uses the prefix tree to first + * perform a binary reduction, and then distributes the results. This method means that there is no + * serial dependency between a given element and the next one, and also significantly improves + * numerical stability allowing us to use floats rather than doubles. */ #include "astcenc_internal.h" @@ -103,8 +102,8 @@ static void brent_kung_prefix_sum( /** * @brief Compute averages and variances for a pixel region. * - * The routine computes both in a single pass, using a summed-area table to - * decouple the running time from the averaging/variance kernel size. + * The routine computes both in a single pass, using a summed-area table to decouple the running + * time from the averaging/variance kernel size. * * @param[out] ctx The compressor context storing the output data. * @param arg The input parameter structure. @@ -569,49 +568,47 @@ void compute_averages_and_variances( /* See header for documentation. */ unsigned int init_compute_averages_and_variances( - astcenc_image& img, + const astcenc_image& img, float rgb_power, float alpha_power, - int avg_var_kernel_radius, - int alpha_kernel_radius, + unsigned int avg_var_kernel_radius, + unsigned int alpha_kernel_radius, const astcenc_swizzle& swz, - pixel_region_variance_args& arg, avg_var_args& ag ) { - int size_x = img.dim_x; - int size_y = img.dim_y; - int size_z = img.dim_z; + unsigned int size_x = img.dim_x; + unsigned int size_y = img.dim_y; + unsigned int size_z = img.dim_z; // Compute maximum block size and from that the working memory buffer size - int kernel_radius = astc::max(avg_var_kernel_radius, alpha_kernel_radius); - int kerneldim = 2 * kernel_radius + 1; + unsigned int kernel_radius = astc::max(avg_var_kernel_radius, alpha_kernel_radius); + unsigned int kerneldim = 2 * kernel_radius + 1; bool have_z = (size_z > 1); - int max_blk_size_xy = have_z ? 16 : 32; - int max_blk_size_z = astc::min(size_z, have_z ? 16 : 1); + unsigned int max_blk_size_xy = have_z ? 16 : 32; + unsigned int max_blk_size_z = astc::min(size_z, have_z ? 16u : 1u); - int max_padsize_xy = max_blk_size_xy + kerneldim; - int max_padsize_z = max_blk_size_z + (have_z ? kerneldim : 0); + unsigned int max_padsize_xy = max_blk_size_xy + kerneldim; + unsigned int max_padsize_z = max_blk_size_z + (have_z ? kerneldim : 0); // Perform block-wise averages-and-variances calculations across the image // Initialize fields which are not populated until later - arg.size_x = 0; - arg.size_y = 0; - arg.size_z = 0; - arg.offset_x = 0; - arg.offset_y = 0; - arg.offset_z = 0; - arg.work_memory = nullptr; - - arg.img = &img; - arg.rgb_power = rgb_power; - arg.alpha_power = alpha_power; - arg.swz = swz; - arg.have_z = have_z; - arg.avg_var_kernel_radius = avg_var_kernel_radius; - arg.alpha_kernel_radius = alpha_kernel_radius; - - ag.arg = arg; + ag.arg.size_x = 0; + ag.arg.size_y = 0; + ag.arg.size_z = 0; + ag.arg.offset_x = 0; + ag.arg.offset_y = 0; + ag.arg.offset_z = 0; + ag.arg.work_memory = nullptr; + + ag.arg.img = &img; + ag.arg.rgb_power = rgb_power; + ag.arg.alpha_power = alpha_power; + ag.arg.swz = swz; + ag.arg.have_z = have_z; + ag.arg.avg_var_kernel_radius = avg_var_kernel_radius; + ag.arg.alpha_kernel_radius = alpha_kernel_radius; + ag.img_size_x = size_x; ag.img_size_y = size_y; ag.img_size_z = size_z; @@ -620,8 +617,8 @@ unsigned int init_compute_averages_and_variances( ag.work_memory_size = 2 * max_padsize_xy * max_padsize_xy * max_padsize_z; // The parallel task count - int z_tasks = (size_z + max_blk_size_z - 1) / max_blk_size_z; - int y_tasks = (size_y + max_blk_size_xy - 1) / max_blk_size_xy; + unsigned int z_tasks = (size_z + max_blk_size_z - 1) / max_blk_size_z; + unsigned int y_tasks = (size_y + max_blk_size_xy - 1) / max_blk_size_xy; return z_tasks * y_tasks; } diff --git a/lib/astc-encoder/Source/astcenc_decompress_symbolic.cpp b/lib/astc-encoder/Source/astcenc_decompress_symbolic.cpp index ec197ea7c7..5f73b2a1b0 100644 --- a/lib/astc-encoder/Source/astcenc_decompress_symbolic.cpp +++ b/lib/astc-encoder/Source/astcenc_decompress_symbolic.cpp @@ -27,26 +27,26 @@ /** * @brief Compute a vector of texel weights by interpolating the decimated weight grid. * - * @param texel_to_get The first texel to get; N (SIMD width) consecutive texels are loaded. - * @param dt The weight grid decimation table. - * @param weights The raw weights. + * @param base_texel_index The first texel to get; N (SIMD width) consecutive texels are loaded. + * @param di The weight grid decimation to use. + * @param weights The raw weights. * * @return The undecimated weight for N (SIMD width) texels. */ static vint compute_value_of_texel_weight_int_vla( - int texel_to_get, - const decimation_table& dt, + int base_texel_index, + const decimation_info& di, const int* weights ) { vint summed_value(8); - vint weight_count(dt.texel_weight_count + texel_to_get); + vint weight_count(di.texel_weight_count + base_texel_index); int max_weight_count = hmax(weight_count).lane<0>(); promise(max_weight_count > 0); for (int i = 0; i < max_weight_count; i++) { - vint texel_weights(dt.texel_weights_4t[i] + texel_to_get); - vint texel_weights_int(dt.texel_weights_int_4t[i] + texel_to_get); + vint texel_weights(di.texel_weights_4t[i] + base_texel_index); + vint texel_weights_int(di.texel_weights_int_4t[i] + base_texel_index); summed_value += gatheri(weights, texel_weights) * texel_weights_int; } @@ -129,16 +129,16 @@ static inline vfloat4 decode_texel( void unpack_weights( const block_size_descriptor& bsd, const symbolic_compressed_block& scb, - const decimation_table& dt, + const decimation_info& di, bool is_dual_plane, - int quant_level, - int weights_plane1[MAX_TEXELS_PER_BLOCK], - int weights_plane2[MAX_TEXELS_PER_BLOCK] + quant_method quant_level, + int weights_plane1[BLOCK_MAX_TEXELS], + int weights_plane2[BLOCK_MAX_TEXELS] ) { // First, unquantize the weights ... - int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK]; - int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK]; - int weight_count = dt.weight_count; + int uq_plane1_weights[BLOCK_MAX_WEIGHTS]; + int uq_plane2_weights[BLOCK_MAX_WEIGHTS]; + unsigned int weight_count = di.weight_count; const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[quant_level]); @@ -146,28 +146,28 @@ void unpack_weights( // Safe to overshoot as all arrays are allocated to full size if (!is_dual_plane) { - for (int i = 0; i < weight_count; i++) + for (unsigned int i = 0; i < weight_count; i++) { uq_plane1_weights[i] = qat->unquantized_value[scb.weights[i]]; } - for (int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH) + for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH) { - store(compute_value_of_texel_weight_int_vla(i, dt, uq_plane1_weights), weights_plane1 + i); + store(compute_value_of_texel_weight_int_vla(i, di, uq_plane1_weights), weights_plane1 + i); } } else { - for (int i = 0; i < weight_count; i++) + for (unsigned int i = 0; i < weight_count; i++) { uq_plane1_weights[i] = qat->unquantized_value[scb.weights[i]]; - uq_plane2_weights[i] = qat->unquantized_value[scb.weights[i + PLANE2_WEIGHTS_OFFSET]]; + uq_plane2_weights[i] = qat->unquantized_value[scb.weights[i + WEIGHTS_PLANE2_OFFSET]]; } - for (int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH) + for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH) { - store(compute_value_of_texel_weight_int_vla(i, dt, uq_plane1_weights), weights_plane1 + i); - store(compute_value_of_texel_weight_int_vla(i, dt, uq_plane2_weights), weights_plane2 + i); + store(compute_value_of_texel_weight_int_vla(i, di, uq_plane1_weights), weights_plane1 + i); + store(compute_value_of_texel_weight_int_vla(i, di, uq_plane2_weights), weights_plane2 + i); } } } @@ -191,9 +191,9 @@ void decompress_symbolic_block( blk.grayscale = false; // If we detected an error-block, blow up immediately. - if (scb.error_block) + if (scb.block_type == SYM_BTYPE_ERROR) { - for (int i = 0; i < bsd.texel_count; i++) + for (unsigned int i = 0; i < bsd.texel_count; i++) { blk.data_r[i] = std::numeric_limits::quiet_NaN(); blk.data_g[i] = std::numeric_limits::quiet_NaN(); @@ -206,18 +206,19 @@ void decompress_symbolic_block( return; } - if (scb.block_mode < 0) + if ((scb.block_type == SYM_BTYPE_CONST_F16) || + (scb.block_type == SYM_BTYPE_CONST_U16)) { vfloat4 color; int use_lns = 0; - if (scb.block_mode == -2) + // UNORM16 constant color block + if (scb.block_type == SYM_BTYPE_CONST_U16) { vint4 colori(scb.constant_color); - // For sRGB decoding a real decoder would just use the top 8 bits - // for color conversion. We don't color convert, so linearly scale - // the top 8 bits into the full 16 bit dynamic range + // For sRGB decoding a real decoder would just use the top 8 bits for color conversion. + // We don't color convert, so rescale the top 8 bits into the full 16 bit dynamic range. if (decode_mode == ASTCENC_PRF_LDR_SRGB) { colori = asr<8>(colori) * 257; @@ -226,6 +227,7 @@ void decompress_symbolic_block( vint4 colorf16 = unorm16_to_sf16(colori); color = float16_to_float(colorf16); } + // FLOAT16 constant color block else { switch (decode_mode) @@ -244,7 +246,7 @@ void decompress_symbolic_block( } // TODO: Skip this and add constant color transfer to img block? - for (int i = 0; i < bsd.texel_count; i++) + for (unsigned int i = 0; i < bsd.texel_count; i++) { blk.data_r[i] = color.lane<0>(); blk.data_g[i] = color.lane<1>(); @@ -259,25 +261,18 @@ void decompress_symbolic_block( // Get the appropriate partition-table entry int partition_count = scb.partition_count; - const partition_info *pt = get_partition_table(&bsd, partition_count); - pt += scb.partition_index; + const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); - // Get the appropriate block descriptor - const decimation_table *const *dts = bsd.decimation_tables; - - const int packed_index = bsd.block_mode_packed_index[scb.block_mode]; - assert(packed_index >= 0 && packed_index < bsd.block_mode_count); - const block_mode& bm = bsd.block_modes[packed_index]; - const decimation_table& dt = *(dts[bm.decimation_mode]); + // Get the appropriate block descriptors + const auto& bm = bsd.get_block_mode(scb.block_mode); + const auto& di = bsd.get_decimation_info(bm.decimation_mode); int is_dual_plane = bm.is_dual_plane; - int quant_level = bm.quant_mode; - // Unquantize and undecimate the weights - int weights[MAX_TEXELS_PER_BLOCK]; - int plane2_weights[MAX_TEXELS_PER_BLOCK]; - unpack_weights(bsd, scb, dt, is_dual_plane, quant_level, weights, plane2_weights); + int weights[BLOCK_MAX_TEXELS]; + int plane2_weights[BLOCK_MAX_TEXELS]; + unpack_weights(bsd, scb, di, is_dual_plane, bm.get_weight_quant_mode(), weights, plane2_weights); // Now that we have endpoint colors and weights, we can unpack texel colors int plane2_component = is_dual_plane ? scb.plane2_component : -1; @@ -293,17 +288,17 @@ void decompress_symbolic_block( unpack_color_endpoints(decode_mode, scb.color_formats[i], - scb.color_quant_level, + scb.get_color_quant_mode(), scb.color_values[i], rgb_lns, a_lns, ep0, ep1); vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns); - int texel_count = pt->partition_texel_count[i]; + int texel_count = pi.partition_texel_count[i]; for (int j = 0; j < texel_count; j++) { - int tix = pt->texels_of_partition[i][j]; + int tix = pi.texels_of_partition[i][j]; vint4 color = lerp_color_int(decode_mode, ep0, ep1, @@ -332,7 +327,7 @@ float compute_symbolic_block_difference( const error_weight_block& ewb ) { // If we detected an error-block, blow up immediately. - if (scb.error_block) + if (scb.block_type == SYM_BTYPE_ERROR) { return 1e29f; } @@ -341,22 +336,18 @@ float compute_symbolic_block_difference( // Get the appropriate partition-table entry int partition_count = scb.partition_count; - - const partition_info *pt = get_partition_table(&bsd, partition_count); - pt += scb.partition_index; + const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); // Get the appropriate block descriptor - const int packed_index = bsd.block_mode_packed_index[scb.block_mode]; - assert(packed_index >= 0 && packed_index < bsd.block_mode_count); - const block_mode& bm = bsd.block_modes[packed_index]; - const decimation_table& dt = *(bsd.decimation_tables[bm.decimation_mode]); + const block_mode& bm = bsd.get_block_mode(scb.block_mode); + const decimation_info& di = *(bsd.decimation_tables[bm.decimation_mode]); bool is_dual_plane = bm.is_dual_plane != 0; // Unquantize and undecimate the weights - int weights[MAX_TEXELS_PER_BLOCK]; - int plane2_weights[MAX_TEXELS_PER_BLOCK]; - unpack_weights(bsd, scb, dt, is_dual_plane, bm.quant_mode, weights, plane2_weights); + int weights[BLOCK_MAX_TEXELS]; + int plane2_weights[BLOCK_MAX_TEXELS]; + unpack_weights(bsd, scb, di, is_dual_plane, bm.get_weight_quant_mode(), weights, plane2_weights); int plane2_component = is_dual_plane ? scb.plane2_component : -1; vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_component); @@ -372,7 +363,7 @@ float compute_symbolic_block_difference( unpack_color_endpoints(config.profile, scb.color_formats[i], - scb.color_quant_level, + scb.get_color_quant_mode(), scb.color_values[i], rgb_lns, a_lns, ep0, ep1); @@ -380,10 +371,10 @@ float compute_symbolic_block_difference( vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns); // Unpack and compute error for each texel in the partition - int texel_count = pt->partition_texel_count[i]; + int texel_count = pi.partition_texel_count[i]; for (int j = 0; j < texel_count; j++) { - int tix = pt->texels_of_partition[i][j]; + int tix = pi.texels_of_partition[i][j]; vint4 colori = lerp_color_int(config.profile, ep0, ep1, weights[tix], diff --git a/lib/astc-encoder/Source/astcenc_diagnostic_trace.h b/lib/astc-encoder/Source/astcenc_diagnostic_trace.h index 888365a933..614894987f 100644 --- a/lib/astc-encoder/Source/astcenc_diagnostic_trace.h +++ b/lib/astc-encoder/Source/astcenc_diagnostic_trace.h @@ -21,43 +21,38 @@ * Overview * ======== * - * The built-in diagnostic trace tool generates a hierarchical JSON tree - * structure. The tree hierarchy contains three levels: + * The built-in diagnostic trace tool generates a hierarchical JSON tree structure. The tree + * hierarchy contains three levels: * * - block * - pass * - candidate * - * One block node exists for each compressed block in the image. One pass node - * exists for each major pass (N partition, M planes, O components) applied to - * a block. One candidate node exists for each encoding candidate trialed for a - * pass. + * One block node exists for each compressed block in the image. One pass node exists for each major + * pass (N partition, M planes, O components) applied to a block. One candidate node exists for each + * encoding candidate trialed for a pass. * - * Each node contains both the hierarchy but also a number of attributes which - * explain the behavior. For example, the block node contains the block - * coordinates in the image, the pass explains the pass configuration, and the - * candidate will explain the candidate encoding such as weight decimation, - * refinement error, etc. + * Each node contains both the hierarchy but also a number of attributes which explain the behavior. + * For example, the block node contains the block coordinates in the image, the pass explains the + * pass configuration, and the candidate will explain the candidate encoding such as weight + * decimation, refinement error, etc. * - * Trace Nodes are designed as scope-managed C++ objects with stack-like - * push/pop behavior. Constructing a trace node on the stack will automatically - * add it to the current node as a child, and then make it the current node. - * Destroying the current node will pop the stack and set the parent to the - * current node. This provides a robust mechanism for ensuring reliable - * nesting in the tree structure. + * Trace Nodes are designed as scope-managed C++ objects with stack-like push/pop behavior. + * Constructing a trace node on the stack will automatically add it to the current node as a child, + * and then make it the current node. Destroying the current node will pop the stack and set the + * parent to the current node. This provides a robust mechanism for ensuring reliable nesting in the + * tree structure. * - * A set of utility macros are provided to add attribute annotations to the - * current trace node. + * A set of utility macros are provided to add attribute annotations to the current trace node. * * Usage * ===== * - * Create Trace Nodes on the stack using the TRACE_NODE() macro. This will - * compile-out completely in builds with diagnostics disabled. + * Create Trace Nodes on the stack using the @c TRACE_NODE() macro. This will compile-out completely + * in builds with diagnostics disabled. * - * Add annotations to the current trace node using the trace_add_data() macro. - * This will similarly compile out completely in builds with diagnostics - * disabled. + * Add annotations to the current trace node using the @c trace_add_data() macro. This will + * similarly compile out completely in builds with diagnostics disabled. * * If you need to add additional code to support diagnostics-only behavior wrap * it in preprocessor guards: @@ -84,9 +79,8 @@ class TraceNode /** * @brief Construct a new node. * - * Constructing a node will push to the the top of the stack, automatically - * making it a child of the current node, and then setting it to become the - * current node. + * Constructing a node will push to the the top of the stack, automatically making it a child of + * the current node, and then setting it to become the current node. * * @param format The format template for the node name. * @param ... The format parameters. @@ -96,8 +90,8 @@ class TraceNode /** * @brief Add an attribute to this node. * - * Note that no quoting is applied to the @c value, so if quoting is - * needed it must be done by the caller. + * Note that no quoting is applied to the @c value, so if quoting is needed it must be done by + * the caller. * * @param type The type of the attribute. * @param key The key of the attribute. @@ -108,9 +102,9 @@ class TraceNode /** * @brief Destroy this node. * - * Destroying a node will pop it from the top of the stack, making its - * parent the current node. It is invalid behavior to destroy a node that - * is not the current node; usage must conform to stack push-pop semantics. + * Destroying a node will pop it from the top of the stack, making its parent the current node. + * It is invalid behavior to destroy a node that is not the current node; usage must conform to + * stack push-pop semantics. */ ~TraceNode(); diff --git a/lib/astc-encoder/Source/astcenc_entry.cpp b/lib/astc-encoder/Source/astcenc_entry.cpp index a6b8db8c2d..5a49df0143 100644 --- a/lib/astc-encoder/Source/astcenc_entry.cpp +++ b/lib/astc-encoder/Source/astcenc_entry.cpp @@ -370,10 +370,10 @@ static astcenc_error validate_decompression_swizzle( * * This function can respond in two ways: * - * * Numerical inputs that have valid ranges are clamped to those valid - * ranges. No error is thrown for out-of-range inputs in this case. - * * Numerical inputs and logic inputs are are logically invalid and which - * make no sense algorithmically will return an error. + * * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown + * for out-of-range inputs in this case. + * * Numerical inputs and logic inputs are are logically invalid and which make no sense + * algorithmically will return an error. * * @param[in,out] config The input compressor configuration. * @@ -425,7 +425,7 @@ static astcenc_error validate_config( config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f); config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u); - config.tune_partition_index_limit = astc::clamp(config.tune_partition_index_limit, 1u, (unsigned int)PARTITION_COUNT); + config.tune_partition_index_limit = astc::clamp(config.tune_partition_index_limit, 1u, (unsigned int)BLOCK_MAX_PARTITIONINGS); config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u); config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u); config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES); @@ -488,10 +488,9 @@ astcenc_error astcenc_config_init( float texels = static_cast(block_x * block_y * block_z); float ltexels = logf(texels) / logf(10.0f); - // Process the performance quality level or preset; note that this must be - // done before we process any additional settings, such as color profile - // and flags, which may replace some of these settings with more use case - // tuned values + // Process the performance quality level or preset; note that this must be done before we + // process any additional settings, such as color profile and flags, which may replace some of + // these settings with more use case tuned values if (quality < ASTCENC_PRE_FASTEST || quality > ASTCENC_PRE_EXHAUSTIVE) { @@ -806,7 +805,7 @@ astcenc_error astcenc_context_alloc( #if !defined(ASTCENC_DECOMPRESS_ONLY) prepare_angular_tables(); #endif - build_quant_mode_table(); + init_quant_mode_table(); return ASTCENC_SUCCESS; } @@ -1003,6 +1002,12 @@ astcenc_error astcenc_compress_image( return ASTCENC_ERR_OUT_OF_MEM; } + // If context thread count is one then implicitly reset + if (ctx->thread_count == 1) + { + astcenc_compress_reset(ctx); + } + if (ctx->config.v_rgb_mean != 0.0f || ctx->config.v_rgb_stdev != 0.0f || ctx->config.v_a_mean != 0.0f || ctx->config.v_a_stdev != 0.0f || ctx->config.a_scale_radius != 0) @@ -1019,14 +1024,14 @@ astcenc_error astcenc_compress_image( return init_compute_averages_and_variances( image, ctx->config.v_rgb_power, ctx->config.v_a_power, ctx->config.v_rgba_radius, ctx->config.a_scale_radius, *swizzle, - ctx->arg, ctx->ag); + ctx->avg_var_preprocess_args); }; // Only the first thread actually runs the initializer ctx->manage_avg_var.init(init_avg_var); // All threads will enter this function and dynamically grab work - compute_averages_and_variances(*ctx, ctx->ag); + compute_averages_and_variances(*ctx, ctx->avg_var_preprocess_args); } // Wait for compute_averages_and_variances to complete before compressing @@ -1118,6 +1123,12 @@ astcenc_error astcenc_decompress_image( imageblock blk; + // If context thread count is one then implicitly reset + if (ctx->thread_count == 1) + { + astcenc_decompress_reset(ctx); + } + // Only the first thread actually runs the initializer ctx->manage_decompress.init(zblocks * yblocks * xblocks); @@ -1199,34 +1210,31 @@ astcenc_error astcenc_get_block_info( info->block_z = ctx->config.block_z; info->texel_count = bsd.texel_count; - // Check for error blocks first - block_mode will be negative - info->is_error_block = scb.error_block != 0; + // Check for error blocks first + info->is_error_block = scb.block_type == SYM_BTYPE_ERROR; if (info->is_error_block) { return ASTCENC_SUCCESS; } - // Check for constant color blocks second - block_mode will be negative - info->is_constant_block = scb.block_mode < 0; + // Check for constant color blocks second + info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 || + scb.block_type == SYM_BTYPE_CONST_U16; if (info->is_constant_block) { return ASTCENC_SUCCESS; } - // Otherwise, handle a full block with partition payload; values are known - // to be valid once the two conditions above have been checked + // Otherwise handle a full block ; known to be valid after conditions above have been checked int partition_count = scb.partition_count; - const partition_info* pt = get_partition_table(&bsd, partition_count); - pt += scb.partition_index; + const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); - const int packed_index = bsd.block_mode_packed_index[scb.block_mode]; - assert(packed_index >= 0 && packed_index < bsd.block_mode_count); - const block_mode& bm = bsd.block_modes[packed_index]; - const decimation_table& dt = *bsd.decimation_tables[bm.decimation_mode]; + const block_mode& bm = bsd.get_block_mode(scb.block_mode); + const decimation_info& di = *bsd.decimation_tables[bm.decimation_mode]; - info->weight_x = dt.weight_x; - info->weight_y = dt.weight_y; - info->weight_z = dt.weight_z; + info->weight_x = di.weight_x; + info->weight_y = di.weight_y; + info->weight_z = di.weight_z; info->is_dual_plane_block = bm.is_dual_plane != 0; @@ -1234,11 +1242,11 @@ astcenc_error astcenc_get_block_info( info->partition_index = scb.partition_index; info->dual_plane_component = scb.plane2_component; - info->color_level_count = get_quant_method_levels((quant_method)scb.color_quant_level); - info->weight_level_count = get_quant_method_levels((quant_method)bm.quant_mode); + info->color_level_count = get_quant_level(scb.get_color_quant_mode()); + info->weight_level_count = get_quant_level(bm.get_weight_quant_mode()); // Unpack color endpoints for each active partition - for (int i = 0; i < scb.partition_count; i++) + for (unsigned int i = 0; i < scb.partition_count; i++) { bool rgb_hdr; bool a_hdr; @@ -1246,7 +1254,7 @@ astcenc_error astcenc_get_block_info( unpack_color_endpoints(ctx->config.profile, scb.color_formats[i], - scb.color_quant_level, + scb.get_color_quant_mode(), scb.color_values[i], rgb_hdr, a_hdr, endpnt[0], endpnt[1]); @@ -1267,23 +1275,23 @@ astcenc_error astcenc_get_block_info( } // Unpack weights for each texel - int weight_plane1[MAX_TEXELS_PER_BLOCK]; - int weight_plane2[MAX_TEXELS_PER_BLOCK]; + int weight_plane1[BLOCK_MAX_TEXELS]; + int weight_plane2[BLOCK_MAX_TEXELS]; - unpack_weights(bsd, scb, dt, bm.is_dual_plane, bm.quant_mode, weight_plane1, weight_plane2); - for (int i = 0; i < bsd.texel_count; i++) + unpack_weights(bsd, scb, di, bm.is_dual_plane, bm.get_weight_quant_mode(), weight_plane1, weight_plane2); + for (unsigned int i = 0; i < bsd.texel_count; i++) { - info->weight_values_plane1[i] = (float)weight_plane1[i] / (float)TEXEL_WEIGHT_SUM; + info->weight_values_plane1[i] = (float)weight_plane1[i] * (1.0f / WEIGHTS_TEXEL_SUM); if (info->is_dual_plane_block) { - info->weight_values_plane2[i] = (float)weight_plane2[i] / (float)TEXEL_WEIGHT_SUM; + info->weight_values_plane2[i] = (float)weight_plane2[i] * (1.0f / WEIGHTS_TEXEL_SUM); } } // Unpack partition assignments for each texel - for (int i = 0; i < bsd.texel_count; i++) + for (unsigned int i = 0; i < bsd.texel_count; i++) { - info->partition_assignment[i] = pt->partition_of_texel[i]; + info->partition_assignment[i] = pi.partition_of_texel[i]; } return ASTCENC_SUCCESS; diff --git a/lib/astc-encoder/Source/astcenc_find_best_partitioning.cpp b/lib/astc-encoder/Source/astcenc_find_best_partitioning.cpp index d6eaa20d87..385f778a35 100644 --- a/lib/astc-encoder/Source/astcenc_find_best_partitioning.cpp +++ b/lib/astc-encoder/Source/astcenc_find_best_partitioning.cpp @@ -20,42 +20,469 @@ /** * @brief Functions for finding best partition for a block. * - * Major step 1: + * The partition search operates in two stages. The first pass uses kmeans clustering to group + * texels into an ideal partitioning for the requested partition count, and then compares that + * against the 1024 partitionings generated by the ASTC partition hash function. The generated + * partitions are then ranked by the number of texels in the wrong partition, compared to the ideal + * clustering. All 1024 partitions are tested for similarity and ranked, apart from duplicates and + * partitionings that actually generate fewer than the requested partition count, but only the top + * N candidates are actually put through a more detailed search. N is determined by the compressor + * quality preset. * - * - Find best partitioning assuming uncorrelated colors. - * - Find best partitioning assuming RGBS color representation. + * For the detailed search, each candidate is checked against two possible encoding methods: * - * Finding best partitioning for a block: + * - The best partitioning assuming different chroma colors (RGB + RGB or RGB + delta endpoints). + * - The best partitioning assuming same chroma colors (RGB + scale endpoints). * - * For each available partitioning compute mean color and dominant direction. - * This defines two lines, both of which go through the mean-color-value. + * This is implemented by computing the compute mean color and dominant direction for each + * partition. This defines two lines, both of which go through the mean color value. * - * - One line has a direction defined by the dominant direction; this is used - * to assess the error from using an uncorrelated color representation. - * - The other line goes through (0,0,0,1) and is used to assess the error from - * using an RGBS color representation. + * - One line has a direction defined by the dominant direction; this is used to assess the error + * from using an uncorrelated color representation. + * - The other line goes through (0,0,0,1) and is used to assess the error from using a same chroma + * (RGB + scale) color representation. * - * Then compute the block's squared-errors that result from using the these - * two lines for endpoint selection. + * The best candidate is selected by computing the squared-errors that result from using these + * lines for endpoint selection. */ #include "astcenc_internal.h" +/** + * @brief Pick some initital kmeans cluster centers. + * + * @param blk The image block color data to compress. + * @param texel_count The number of texels in the block. + * @param partition_count The number of partitions in the block. + * @param[out] cluster_centers The initital partition cluster center colors. + */ +static void kmeans_init( + const imageblock& blk, + unsigned int texel_count, + unsigned int partition_count, + vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS] +) { + promise(texel_count > 0); + promise(partition_count > 0); + + unsigned int clusters_selected = 0; + float distances[BLOCK_MAX_TEXELS]; + + // Pick a random sample as first cluster center; 145897 from random.org + unsigned int sample = 145897 % texel_count; + vfloat4 center_color = blk.texel(sample); + cluster_centers[clusters_selected] = center_color; + clusters_selected++; + + // Compute the distance to the first cluster center + float distance_sum = 0.0f; + for (unsigned int i = 0; i < texel_count; i++) + { + vfloat4 color = blk.texel(i); + vfloat4 diff = color - center_color; + float distance = dot_s(diff, diff); + distance_sum += distance; + distances[i] = distance; + } + + // More numbers from random.org for weighted-random center selection + const float cluster_cutoffs[9] = { + 0.626220f, 0.932770f, 0.275454f, + 0.318558f, 0.240113f, 0.009190f, + 0.347661f, 0.731960f, 0.156391f + }; + + unsigned int cutoff = (clusters_selected - 1) + 3 * (partition_count - 2); + + // Pick the remaining samples as needed + while (true) + { + // Pick the next center in a weighted-random fashion. + float summa = 0.0f; + float distance_cutoff = distance_sum * cluster_cutoffs[cutoff++]; + for (sample = 0; sample < texel_count; sample++) + { + summa += distances[sample]; + if (summa >= distance_cutoff) + { + break; + } + } + + // Clamp to a valid range and store the selected cluster center + sample = astc::min(sample, texel_count - 1); + + center_color = blk.texel(sample); + cluster_centers[clusters_selected++] = center_color; + if (clusters_selected >= partition_count) + { + break; + } + + // Compute the distance to the new cluster center, keep the min dist + distance_sum = 0.0f; + for (unsigned int i = 0; i < texel_count; i++) + { + vfloat4 color = blk.texel(i); + vfloat4 diff = color - center_color; + float distance = dot_s(diff, diff); + distance = astc::min(distance, distances[i]); + distance_sum += distance; + distances[i] = distance; + } + } +} + +/** + * @brief Assign texels to clusters, based on a set of chosen center points. + * + * @param blk The image block color data to compress. + * @param texel_count The number of texels in the block. + * @param partition_count The number of partitions in the block. + * @param cluster_centers The partition cluster center colors. + * @param[out] partition_of_texel The partition assigned for each texel. + */ +static void kmeans_assign( + const imageblock& blk, + unsigned int texel_count, + unsigned int partition_count, + const vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS], + uint8_t partition_of_texel[BLOCK_MAX_TEXELS] +) { + promise(texel_count > 0); + promise(partition_count > 0); + + uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 }; + + // Find the best partition for every texel + for (unsigned int i = 0; i < texel_count; i++) + { + float best_distance = std::numeric_limits::max(); + unsigned int best_partition = 0; + + vfloat4 color = blk.texel(i); + for (unsigned int j = 0; j < partition_count; j++) + { + vfloat4 diff = color - cluster_centers[j]; + float distance = dot_s(diff, diff); + if (distance < best_distance) + { + best_distance = distance; + best_partition = j; + } + } + + partition_of_texel[i] = best_partition; + partition_texel_count[best_partition]++; + } + + // It is possible to get a situation where a partition ends up without any texels. In this case, + // assign texel N to partition N. This is silly, but ensures that every partition retains at + // least one texel. Reassigning a texel in this manner may cause another partition to go empty, + // so if we actually did a reassignment, run the whole loop over again. + bool problem_case; + do + { + problem_case = false; + for (unsigned int i = 0; i < partition_count; i++) + { + if (partition_texel_count[i] == 0) + { + partition_texel_count[partition_of_texel[i]]--; + partition_texel_count[i]++; + partition_of_texel[i] = i; + problem_case = true; + } + } + } while (problem_case); +} + +/** + * @brief Compute new cluster centers based on their center of gravity. + * + * @param blk The image block color data to compress. + * @param texel_count The number of texels in the block. + * @param partition_count The number of partitions in the block. + * @param[out] cluster_centers The new cluster center colors. + * @param partition_of_texel The partition assigned for each texel. + */ +static void kmeans_update( + const imageblock& blk, + unsigned int texel_count, + unsigned int partition_count, + vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS], + const uint8_t partition_of_texel[BLOCK_MAX_TEXELS] +) { + promise(texel_count > 0); + promise(partition_count > 0); + + vfloat4 color_sum[BLOCK_MAX_PARTITIONS] { + vfloat4::zero(), + vfloat4::zero(), + vfloat4::zero(), + vfloat4::zero() + }; + + uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 }; + + // Find the center-of-gravity in each cluster + for (unsigned int i = 0; i < texel_count; i++) + { + uint8_t partition = partition_of_texel[i]; + color_sum[partition] += blk.texel(i);; + partition_texel_count[partition]++; + } + + // Set the center of gravity to be the new cluster center + for (unsigned int i = 0; i < partition_count; i++) + { + float scale = 1.0f / static_cast(partition_texel_count[i]); + cluster_centers[i] = color_sum[i] * scale; + } +} + +/** + * @brief Compute bit-mismatch for partitioning in 2-partition mode. + * + * @param a The texel assignment bitvector for the block. + * @param b The texel assignment bitvector for the partition table. + * + * @return The number of bit mismatches. + */ +static inline unsigned int partition_mismatch2( + const uint64_t a[2], + const uint64_t b[2] +) { + int v1 = astc::popcount(a[0] ^ b[0]) + astc::popcount(a[1] ^ b[1]); + int v2 = astc::popcount(a[0] ^ b[1]) + astc::popcount(a[1] ^ b[0]); + return astc::min(v1, v2); +} + +/** + * @brief Compute bit-mismatch for partitioning in 3-partition mode. + * + * @param a The texel assignment bitvector for the block. + * @param b The texel assignment bitvector for the partition table. + * + * @return The number of bit mismatches. + */ +static inline unsigned int partition_mismatch3( + const uint64_t a[3], + const uint64_t b[3] +) { + int p00 = astc::popcount(a[0] ^ b[0]); + int p01 = astc::popcount(a[0] ^ b[1]); + int p02 = astc::popcount(a[0] ^ b[2]); + + int p10 = astc::popcount(a[1] ^ b[0]); + int p11 = astc::popcount(a[1] ^ b[1]); + int p12 = astc::popcount(a[1] ^ b[2]); + + int p20 = astc::popcount(a[2] ^ b[0]); + int p21 = astc::popcount(a[2] ^ b[1]); + int p22 = astc::popcount(a[2] ^ b[2]); + + int s0 = p11 + p22; + int s1 = p12 + p21; + int v0 = astc::min(s0, s1) + p00; + + int s2 = p10 + p22; + int s3 = p12 + p20; + int v1 = astc::min(s2, s3) + p01; + + int s4 = p10 + p21; + int s5 = p11 + p20; + int v2 = astc::min(s4, s5) + p02; + + return astc::min(v0, v1, v2); +} + +/** + * @brief Compute bit-mismatch for partitioning in 4-partition mode. + * + * @param a The texel assignment bitvector for the block. + * @param b The texel assignment bitvector for the partition table. + * + * @return The number of bit mismatches. + */ +static inline unsigned int partition_mismatch4( + const uint64_t a[4], + const uint64_t b[4] +) { + int p00 = astc::popcount(a[0] ^ b[0]); + int p01 = astc::popcount(a[0] ^ b[1]); + int p02 = astc::popcount(a[0] ^ b[2]); + int p03 = astc::popcount(a[0] ^ b[3]); + + int p10 = astc::popcount(a[1] ^ b[0]); + int p11 = astc::popcount(a[1] ^ b[1]); + int p12 = astc::popcount(a[1] ^ b[2]); + int p13 = astc::popcount(a[1] ^ b[3]); + + int p20 = astc::popcount(a[2] ^ b[0]); + int p21 = astc::popcount(a[2] ^ b[1]); + int p22 = astc::popcount(a[2] ^ b[2]); + int p23 = astc::popcount(a[2] ^ b[3]); + + int p30 = astc::popcount(a[3] ^ b[0]); + int p31 = astc::popcount(a[3] ^ b[1]); + int p32 = astc::popcount(a[3] ^ b[2]); + int p33 = astc::popcount(a[3] ^ b[3]); + + int mx23 = astc::min(p22 + p33, p23 + p32); + int mx13 = astc::min(p21 + p33, p23 + p31); + int mx12 = astc::min(p21 + p32, p22 + p31); + int mx03 = astc::min(p20 + p33, p23 + p30); + int mx02 = astc::min(p20 + p32, p22 + p30); + int mx01 = astc::min(p21 + p30, p20 + p31); + + int v0 = p00 + astc::min(p11 + mx23, p12 + mx13, p13 + mx12); + int v1 = p01 + astc::min(p10 + mx23, p12 + mx03, p13 + mx02); + int v2 = p02 + astc::min(p11 + mx03, p10 + mx13, p13 + mx01); + int v3 = p03 + astc::min(p11 + mx02, p12 + mx01, p10 + mx12); + + return astc::min(v0, v1, v2, v3); +} + +using mismatch_dispatch = unsigned int (*)(const uint64_t*, const uint64_t*); + +/** + * @brief Count the partition table mismatches vs the data clustering. + * + * @param bsd The block size information. + * @param partition_count The number of partitions in the block. + * @param bitmaps The block texel partition assignment patterns. + * @param[out] mismatch_counts The array storing per partitioning mismatch counts. + */ +static void count_partition_mismatch_bits( + const block_size_descriptor& bsd, + unsigned int partition_count, + const uint64_t bitmaps[BLOCK_MAX_PARTITIONS], + unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS] +) { + const auto* pt = bsd.get_partition_table(partition_count); + + // Function pointer dispatch table + const mismatch_dispatch dispatch[3] { + partition_mismatch2, + partition_mismatch3, + partition_mismatch4 + }; + + for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++) + { + int bitcount = 255; + if (pt->partition_count == partition_count) + { + bitcount = dispatch[partition_count - 2](bitmaps, pt->coverage_bitmaps); + } + + mismatch_counts[i] = bitcount; + pt++; + } +} + +/** + * @brief Use counting sort on the mismatch array to sort partition candidates. + * + * @param mismatch_count Partitioning mismatch counts, in index order. + * @param[out] partition_ordering Partition index values, in mismatch order. + */ +static void get_partition_ordering_by_mismatch_bits( + const unsigned int mismatch_count[BLOCK_MAX_PARTITIONINGS], + unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS] +) { + unsigned int mscount[256] { 0 }; + + // Create the histogram of mismatch counts + for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++) + { + mscount[mismatch_count[i]]++; + } + + // Create a running sum from the histogram array + // Cells store previous values only; i.e. exclude self after sum + unsigned int summa = 0; + for (unsigned int i = 0; i < 256; i++) + { + unsigned int cnt = mscount[i]; + mscount[i] = summa; + summa += cnt; + } + + // Use the running sum as the index, incrementing after read to allow + // sequential entries with the same count + for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++) + { + unsigned int idx = mscount[mismatch_count[i]]++; + partition_ordering[idx] = i; + } +} + +/** + * @brief Use k-means clustering to compute a partition ordering for a block.. + * + * @param bsd The block size information. + * @param blk The image block color data to compress. + * @param partition_count The desired number of partitions in the block. + * @param[out] partition_ordering The list of recommended partition indices, in priority order. + */ +static void compute_kmeans_partition_ordering( + const block_size_descriptor& bsd, + const imageblock& blk, + unsigned int partition_count, + unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS] +) { + vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS]; + uint8_t texel_partitions[BLOCK_MAX_TEXELS]; + + // Use three passes of k-means clustering to partition the block data + for (unsigned int i = 0; i < 3; i++) + { + if (i == 0) + { + kmeans_init(blk, bsd.texel_count, partition_count, cluster_centers); + } + else + { + kmeans_update(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions); + } + + kmeans_assign(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions); + } + + // Construct the block bitmaps of texel assignments to each partition + uint64_t bitmaps[BLOCK_MAX_PARTITIONS] { 0 }; + unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS); + promise(texels_to_process > 0); + for (unsigned int i = 0; i < texels_to_process; i++) + { + unsigned int idx = bsd.kmeans_texels[i]; + bitmaps[texel_partitions[idx]] |= 1ULL << i; + } + + // Count the mismatch between the block and the format's partition tables + unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS]; + count_partition_mismatch_bits(bsd, partition_count, bitmaps, mismatch_counts); + + // Sort the partitions based on the number of mismatched bits + get_partition_ordering_by_mismatch_bits(mismatch_counts, partition_ordering); +} + /* See header for documentation. */ -void find_best_partitionings( +void find_best_partition_candidates( const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, - int partition_count, - int partition_search_limit, - int& best_partition_uncor, - int& best_partition_samec, - int* best_partition_dualplane + unsigned int partition_count, + unsigned int partition_search_limit, + unsigned int& best_partition_uncor, + unsigned int& best_partition_samec, + unsigned int* best_partition_dualplane ) { - // constant used to estimate quantization error for a given partitioning; - // the optimal value for this constant depends on bitrate. - // These constants have been determined empirically. - int texels_per_block = bsd.texel_count; + // Constant used to estimate quantization error for a given partitioning; the optimal value for + // this depends on bitrate. These values have been determined empirically. + unsigned int texels_per_block = bsd.texel_count; float weight_imprecision_estim = 0.055f; if (texels_per_block <= 20) { @@ -75,61 +502,59 @@ void find_best_partitionings( weight_imprecision_estim = weight_imprecision_estim * weight_imprecision_estim; - int partition_sequence[PARTITION_COUNT]; - - kmeans_compute_partition_ordering(bsd, blk, partition_count, partition_sequence); - - int uses_alpha = imageblock_uses_alpha(&blk); + unsigned int partition_sequence[BLOCK_MAX_PARTITIONINGS]; + compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence); - const partition_info* ptab = get_partition_table(&bsd, partition_count); + bool uses_alpha = blk.is_using_alpha(); // Partitioning errors assuming uncorrelated-chrominance endpoints float uncor_best_error { ERROR_CALC_DEFAULT }; - int uncor_best_partition { 0 }; + unsigned int uncor_best_partition { 0 }; // Partitioning errors assuming same-chrominance endpoints // Store two so we can always return one different to uncorr float samec_best_errors[2] { ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT }; - int samec_best_partitions[2] { 0, 0 }; + unsigned int samec_best_partitions[2] { 0, 0 }; // Partitioning errors assuming that one color component is uncorrelated float sep_best_error { ERROR_CALC_DEFAULT }; - int sep_best_partition { 0 }; - int sep_best_component { 0 }; + unsigned int sep_best_partition { 0 }; + unsigned int sep_best_component { 0 }; bool skip_two_plane = best_partition_dualplane == nullptr; if (uses_alpha) { - for (int i = 0; i < partition_search_limit; i++) + for (unsigned int i = 0; i < partition_search_limit; i++) { - int partition = partition_sequence[i]; + unsigned int partition = partition_sequence[i]; + const auto& pi = bsd.get_partition_info(partition_count, partition); - int bk_partition_count = ptab[partition].partition_count; + unsigned int bk_partition_count = pi.partition_count; if (bk_partition_count < partition_count) { break; } // Compute weighting to give to each component in each partition - partition_metrics pms[4]; + partition_metrics pms[BLOCK_MAX_PARTITIONS]; - compute_avgs_and_dirs_4_comp(*(ptab + partition), blk, ewb, pms); + compute_avgs_and_dirs_4_comp(pi, blk, ewb, pms); - line4 uncor_lines[4]; - line4 samec_lines[4]; - line3 sep_r_lines[4]; - line3 sep_g_lines[4]; - line3 sep_b_lines[4]; - line3 sep_a_lines[4]; + line4 uncor_lines[BLOCK_MAX_PARTITIONS]; + line4 samec_lines[BLOCK_MAX_PARTITIONS]; + line3 sep_r_lines[BLOCK_MAX_PARTITIONS]; + line3 sep_g_lines[BLOCK_MAX_PARTITIONS]; + line3 sep_b_lines[BLOCK_MAX_PARTITIONS]; + line3 sep_a_lines[BLOCK_MAX_PARTITIONS]; - processed_line4 uncor_plines[4]; - processed_line4 samec_plines[4]; + processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS]; + processed_line4 samec_plines[BLOCK_MAX_PARTITIONS]; - float uncor_line_lens[4]; - float samec_line_lens[4]; + float uncor_line_lens[BLOCK_MAX_PARTITIONS]; + float samec_line_lens[BLOCK_MAX_PARTITIONS]; - for (int j = 0; j < partition_count; j++) + for (unsigned int j = 0; j < partition_count; j++) { partition_metrics& pm = pms[j]; @@ -171,7 +596,7 @@ void find_best_partitionings( float samec_error = 0.0f; vfloat4 sep_error = vfloat4::zero(); - compute_error_squared_rgba(*(ptab + partition), + compute_error_squared_rgba(pi, blk, ewb, uncor_plines, @@ -181,22 +606,20 @@ void find_best_partitionings( uncor_error, samec_error); - /* - Compute an estimate of error introduced by weight quantization imprecision. - This error is computed as follows, for each partition - 1: compute the principal-axis vector (full length) in error-space - 2: convert the principal-axis vector to regular RGB-space - 3: scale the vector by a constant that estimates average quantization error - 4: for each texel, square the vector, then do a dot-product with the texel's error weight; - sum up the results across all texels. - 4(optimized): square the vector once, then do a dot-product with the average texel error, - then multiply by the number of texels. - */ - - for (int j = 0; j < partition_count; j++) + // Compute an estimate of error introduced by weight quantization imprecision. + // This error is computed as follows, for each partition + // 1: compute the principal-axis vector (full length) in error-space + // 2: convert the principal-axis vector to regular RGB-space + // 3: scale the vector by a constant that estimates average quantization error + // 4: for each texel, square the vector, then do a dot-product with the texel's + // error weight; sum up the results across all texels. + // 4(optimized): square the vector once, then do a dot-product with the average + // texel error, then multiply by the number of texels. + + for (unsigned int j = 0; j < partition_count; j++) { partition_metrics& pm = pms[j]; - float tpp = (float)(ptab[partition].partition_texel_count[j]); + float tpp = (float)(pi.partition_texel_count[j]); vfloat4 ics = pm.icolor_scale; vfloat4 error_weights = pm.error_weight * (tpp * weight_imprecision_estim); @@ -285,27 +708,28 @@ void find_best_partitionings( } else { - for (int i = 0; i < partition_search_limit; i++) + for (unsigned int i = 0; i < partition_search_limit; i++) { - int partition = partition_sequence[i]; + unsigned int partition = partition_sequence[i]; + const auto& pi = bsd.get_partition_info(partition_count, partition); - int bk_partition_count = ptab[partition].partition_count; + unsigned int bk_partition_count = pi.partition_count; if (bk_partition_count < partition_count) { break; } // Compute weighting to give to each component in each partition - partition_metrics pms[4]; - compute_avgs_and_dirs_3_comp(*(ptab + partition), blk, ewb, 3, pms); + partition_metrics pms[BLOCK_MAX_PARTITIONS]; + compute_avgs_and_dirs_3_comp(pi, blk, ewb, 3, pms); - partition_lines3 plines[4]; + partition_lines3 plines[BLOCK_MAX_PARTITIONS]; - line2 sep_r_lines[4]; - line2 sep_g_lines[4]; - line2 sep_b_lines[4]; + line2 sep_r_lines[BLOCK_MAX_PARTITIONS]; + line2 sep_g_lines[BLOCK_MAX_PARTITIONS]; + line2 sep_b_lines[BLOCK_MAX_PARTITIONS]; - for (int j = 0; j < partition_count; j++) + for (unsigned int j = 0; j < partition_count; j++) { partition_metrics& pm = pms[j]; partition_lines3& pl = plines[j]; @@ -344,31 +768,29 @@ void find_best_partitionings( float samec_error = 0.0f; vfloat4 sep_error = vfloat4::zero(); - compute_error_squared_rgb(*(ptab + partition), + compute_error_squared_rgb(pi, blk, ewb, plines, uncor_error, samec_error); - /* - compute an estimate of error introduced by weight imprecision. - This error is computed as follows, for each partition - 1: compute the principal-axis vector (full length) in error-space - 2: convert the principal-axis vector to regular RGB-space - 3: scale the vector by a constant that estimates average quantization error. - 4: for each texel, square the vector, then do a dot-product with the texel's error weight; - sum up the results across all texels. - 4(optimized): square the vector once, then do a dot-product with the average texel error, - then multiply by the number of texels. - */ - - for (int j = 0; j < partition_count; j++) + // Compute an estimate of error introduced by weight quantization imprecision. + // This error is computed as follows, for each partition + // 1: compute the principal-axis vector (full length) in error-space + // 2: convert the principal-axis vector to regular RGB-space + // 3: scale the vector by a constant that estimates average quantization error + // 4: for each texel, square the vector, then do a dot-product with the texel's + // error weight; sum up the results across all texels. + // 4(optimized): square the vector once, then do a dot-product with the average + // texel error, then multiply by the number of texels. + + for (unsigned int j = 0; j < partition_count; j++) { partition_metrics& pm = pms[j]; partition_lines3& pl = plines[j]; - float tpp = (float)(ptab[partition].partition_texel_count[j]); + float tpp = (float)(pi.partition_texel_count[j]); vfloat4 ics = pm.icolor_scale; ics.set_lane<3>(0.0f); @@ -453,12 +875,12 @@ void find_best_partitionings( best_partition_uncor = uncor_best_partition; - int index = samec_best_partitions[0] != uncor_best_partition ? 0 : 1; + unsigned int index = samec_best_partitions[0] != uncor_best_partition ? 0 : 1; best_partition_samec = samec_best_partitions[index]; if (best_partition_dualplane) { - *best_partition_dualplane = (sep_best_component << PARTITION_BITS) | + *best_partition_dualplane = (sep_best_component << PARTITION_INDEX_BITS) | (sep_best_partition); } } diff --git a/lib/astc-encoder/Source/astcenc_ideal_endpoints_and_weights.cpp b/lib/astc-encoder/Source/astcenc_ideal_endpoints_and_weights.cpp index dad8962c83..0263d37542 100644 --- a/lib/astc-encoder/Source/astcenc_ideal_endpoints_and_weights.cpp +++ b/lib/astc-encoder/Source/astcenc_ideal_endpoints_and_weights.cpp @@ -36,7 +36,7 @@ * @param[out] ei The computed ideal endpoints and weights. * @param component The color component to compute. */ -static void compute_endpoints_and_ideal_weights_1_comp( +static void compute_ideal_colors_and_weights_1_comp( const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, @@ -51,16 +51,16 @@ static void compute_endpoints_and_ideal_weights_1_comp( int texel_count = bsd.texel_count; promise(texel_count > 0); - float lowvalues[4] { 1e10f, 1e10f, 1e10f, 1e10f }; - float highvalues[4] { -1e10f, -1e10f, -1e10f, -1e10f }; + float lowvalues[BLOCK_MAX_PARTITIONS] { 1e10f, 1e10f, 1e10f, 1e10f }; + float highvalues[BLOCK_MAX_PARTITIONS] { -1e10f, -1e10f, -1e10f, -1e10f }; - float partition_error_scale[4]; - float linelengths_rcp[4]; + float partition_error_scale[BLOCK_MAX_PARTITIONS]; + float linelengths_rcp[BLOCK_MAX_PARTITIONS]; const float *error_weights = nullptr; const float* data_vr = nullptr; - assert(component < 4); + assert(component < BLOCK_MAX_COMPONENTS); switch (component) { case 0: @@ -153,7 +153,7 @@ static void compute_endpoints_and_ideal_weights_1_comp( * @param component1 The first color component to compute. * @param component2 The second color component to compute. */ -static void compute_endpoints_and_ideal_weights_2_comp( +static void compute_ideal_colors_and_weights_2_comp( const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, @@ -169,7 +169,7 @@ static void compute_endpoints_and_ideal_weights_2_comp( int texel_count = bsd.texel_count; promise(texel_count > 0); - partition_metrics pms[4]; + partition_metrics pms[BLOCK_MAX_PARTITIONS]; const float *error_weights; const float* data_vr = nullptr; @@ -193,12 +193,12 @@ static void compute_endpoints_and_ideal_weights_2_comp( data_vg = blk.data_b; } - float lowparam[4] { 1e10f, 1e10f, 1e10f, 1e10f }; - float highparam[4] { -1e10f, -1e10f, -1e10f, -1e10f }; + float lowparam[BLOCK_MAX_PARTITIONS] { 1e10f, 1e10f, 1e10f, 1e10f }; + float highparam[BLOCK_MAX_PARTITIONS] { -1e10f, -1e10f, -1e10f, -1e10f }; - line2 lines[4]; - float scale[4]; - float length_squared[4]; + line2 lines[BLOCK_MAX_PARTITIONS]; + float scale[BLOCK_MAX_PARTITIONS]; + float length_squared[BLOCK_MAX_PARTITIONS]; compute_avgs_and_dirs_2_comp(pi, blk, ewb, component1, component2, pms); @@ -233,21 +233,20 @@ static void compute_endpoints_and_ideal_weights_2_comp( } } - vfloat4 lowvalues[4]; - vfloat4 highvalues[4]; + vfloat4 lowvalues[BLOCK_MAX_PARTITIONS]; + vfloat4 highvalues[BLOCK_MAX_PARTITIONS]; for (int i = 0; i < partition_count; i++) { float length = highparam[i] - lowparam[i]; - if (length < 0.0f) // case for when none of the texels had any weight + if (length < 0.0f) // Case for when none of the texels had any weight { lowparam[i] = 0.0f; highparam[i] = 1e-7f; } - // it is possible for a uniform-color partition to produce length=0; this - // causes NaN-production and NaN-propagation later on. Set length to - // a small value to avoid this problem. + // It is possible for a uniform-color partition to produce length=0; this causes NaN issues + // so set to a small value to avoid this problem. length = astc::max(length, 1e-7f); length_squared[i] = length * length; scale[i] = 1.0f / length; @@ -311,22 +310,22 @@ static void compute_endpoints_and_ideal_weights_2_comp( * @param[out] ei The computed ideal endpoints and weights. * @param omitted_component The color component excluded from the calculation. */ -static void compute_endpoints_and_ideal_weights_3_comp( +static void compute_ideal_colors_and_weights_3_comp( const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, const partition_info& pi, endpoints_and_weights& ei, - int omitted_component + unsigned int omitted_component ) { - int partition_count = pi.partition_count; + unsigned int partition_count = pi.partition_count; ei.ep.partition_count = partition_count; promise(partition_count > 0); - int texel_count= bsd.texel_count; + unsigned int texel_count= bsd.texel_count; promise(texel_count > 0); - partition_metrics pms[4]; + partition_metrics pms[BLOCK_MAX_PARTITIONS]; const float *error_weights; const float* data_vr = nullptr; @@ -361,28 +360,28 @@ static void compute_endpoints_and_ideal_weights_3_comp( data_vb = blk.data_b; } - float lowparam[4] { 1e10f, 1e10f, 1e10f, 1e10f }; - float highparam[4] { -1e10f, -1e10f, -1e10f, -1e10f }; + float lowparam[BLOCK_MAX_PARTITIONS] { 1e10f, 1e10f, 1e10f, 1e10f }; + float highparam[BLOCK_MAX_PARTITIONS] { -1e10f, -1e10f, -1e10f, -1e10f }; - line3 lines[4]; - float scale[4]; - float length_squared[4]; + line3 lines[BLOCK_MAX_PARTITIONS]; + float scale[BLOCK_MAX_PARTITIONS]; + float length_squared[BLOCK_MAX_PARTITIONS]; compute_avgs_and_dirs_3_comp(pi, blk, ewb, omitted_component, pms); - for (int i = 0; i < partition_count; i++) + for (unsigned int i = 0; i < partition_count; i++) { vfloat4 dir = pms[i].dir; if (hadd_rgb_s(dir) < 0.0f) { - dir = vfloat4(0.0f) - dir; + dir = vfloat4::zero() - dir; } lines[i].a = pms[i].avg; lines[i].b = normalize_safe(dir, unit3()); } - for (int i = 0; i < texel_count; i++) + for (unsigned int i = 0; i < texel_count; i++) { if (error_weights[i] > 1e-10f) { @@ -401,18 +400,17 @@ static void compute_endpoints_and_ideal_weights_3_comp( } } - for (int i = 0; i < partition_count; i++) + for (unsigned int i = 0; i < partition_count; i++) { float length = highparam[i] - lowparam[i]; - if (length < 0) // case for when none of the texels had any weight + if (length < 0) // Case for when none of the texels had any weight { lowparam[i] = 0.0f; highparam[i] = 1e-7f; } - // it is possible for a uniform-color partition to produce length=0; this - // causes NaN-production and NaN-propagation later on. Set length to - // a small value to avoid this problem. + // It is possible for a uniform-color partition to produce length=0; this causes NaN issues + // so set to a small value to avoid this problem. length = astc::max(length, 1e-7f); length_squared[i] = length * length; @@ -428,7 +426,7 @@ static void compute_endpoints_and_ideal_weights_3_comp( vfloat4 bmax = blk.data_max; // TODO: Probably a programmatic vector permute we can do here ... - assert(omitted_component < 4); + assert(omitted_component < BLOCK_MAX_COMPONENTS); switch (omitted_component) { case 0: @@ -454,7 +452,7 @@ static void compute_endpoints_and_ideal_weights_3_comp( bool is_constant_wes = true; float constant_wes = length_squared[pi.partition_of_texel[0]] * error_weights[0]; - for (int i = 0; i < texel_count; i++) + for (unsigned int i = 0; i < texel_count; i++) { int partition = pi.partition_of_texel[i]; float idx = (ei.weights[i] - lowparam[partition]) * scale[partition]; @@ -468,8 +466,8 @@ static void compute_endpoints_and_ideal_weights_3_comp( } // Zero initialize any SIMD over-fetch - int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); - for (int i = texel_count; i < texel_count_simd; i++) + unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); + for (unsigned int i = texel_count; i < texel_count_simd; i++) { ei.weights[i] = 0.0f; ei.weight_error_scale[i] = 0.0f; @@ -487,7 +485,7 @@ static void compute_endpoints_and_ideal_weights_3_comp( * @param pi The partition info for the current trial. * @param[out] ei The computed ideal endpoints and weights. */ -static void compute_endpoints_and_ideal_weights_4_comp( +static void compute_ideal_colors_and_weights_4_comp( const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, @@ -502,15 +500,15 @@ static void compute_endpoints_and_ideal_weights_4_comp( promise(texel_count > 0); promise(partition_count > 0); - float lowparam[4] { 1e10, 1e10, 1e10, 1e10 }; - float highparam[4] { -1e10, -1e10, -1e10, -1e10 }; + float lowparam[BLOCK_MAX_PARTITIONS] { 1e10, 1e10, 1e10, 1e10 }; + float highparam[BLOCK_MAX_PARTITIONS] { -1e10, -1e10, -1e10, -1e10 }; - line4 lines[4]; + line4 lines[BLOCK_MAX_PARTITIONS]; - float scale[4]; - float length_squared[4]; + float scale[BLOCK_MAX_PARTITIONS]; + float length_squared[BLOCK_MAX_PARTITIONS]; - partition_metrics pms[4]; + partition_metrics pms[BLOCK_MAX_PARTITIONS]; compute_avgs_and_dirs_4_comp(pi, blk, ewb, pms); @@ -557,8 +555,8 @@ static void compute_endpoints_and_ideal_weights_4_comp( highparam[i] = 1e-7f; } - // Uniform color partitions produce length=0, force small value to - // avoid NaN issues later ... + // It is possible for a uniform-color partition to produce length=0; this causes NaN issues + // so set to a small value to avoid this problem. length = astc::max(length, 1e-7f); length_squared[i] = length * length; @@ -599,79 +597,78 @@ static void compute_endpoints_and_ideal_weights_4_comp( } /* See header for documentation. */ -void compute_endpoints_and_ideal_weights_1plane( +void compute_ideal_colors_and_weights_1plane( const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, const partition_info& pi, endpoints_and_weights& ei ) { - int uses_alpha = imageblock_uses_alpha(&blk); - if (uses_alpha) + if (blk.is_using_alpha()) { - compute_endpoints_and_ideal_weights_4_comp(bsd, blk, ewb, pi, ei); + compute_ideal_colors_and_weights_4_comp(bsd, blk, ewb, pi, ei); } else { - compute_endpoints_and_ideal_weights_3_comp(bsd,blk, ewb, pi, ei, 3); + compute_ideal_colors_and_weights_3_comp(bsd,blk, ewb, pi, ei, 3); } } /* See header for documentation. */ -void compute_endpoints_and_ideal_weights_2planes( +void compute_ideal_colors_and_weights_2planes( const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, const partition_info& pi, - int plane2_component, + unsigned int plane2_component, endpoints_and_weights& ei1, endpoints_and_weights& ei2 ) { - int uses_alpha = imageblock_uses_alpha(&blk); + bool uses_alpha = blk.is_using_alpha();; - assert(plane2_component < 4); + assert(plane2_component < BLOCK_MAX_COMPONENTS); switch (plane2_component) { - case 0: // separate weights for red + case 0: // Separate weights for red if (uses_alpha) { - compute_endpoints_and_ideal_weights_3_comp(bsd, blk, ewb, pi, ei1, 0); + compute_ideal_colors_and_weights_3_comp(bsd, blk, ewb, pi, ei1, 0); } else { - compute_endpoints_and_ideal_weights_2_comp(bsd, blk, ewb, pi, ei1, 1, 2); + compute_ideal_colors_and_weights_2_comp(bsd, blk, ewb, pi, ei1, 1, 2); } - compute_endpoints_and_ideal_weights_1_comp(bsd, blk, ewb, pi, ei2, 0); + compute_ideal_colors_and_weights_1_comp(bsd, blk, ewb, pi, ei2, 0); break; - case 1: // separate weights for green + case 1: // Separate weights for green if (uses_alpha) { - compute_endpoints_and_ideal_weights_3_comp(bsd,blk, ewb, pi, ei1, 1); + compute_ideal_colors_and_weights_3_comp(bsd,blk, ewb, pi, ei1, 1); } else { - compute_endpoints_and_ideal_weights_2_comp(bsd, blk, ewb, pi, ei1, 0, 2); + compute_ideal_colors_and_weights_2_comp(bsd, blk, ewb, pi, ei1, 0, 2); } - compute_endpoints_and_ideal_weights_1_comp(bsd, blk, ewb, pi, ei2, 1); + compute_ideal_colors_and_weights_1_comp(bsd, blk, ewb, pi, ei2, 1); break; - case 2: // separate weights for blue + case 2: // Separate weights for blue if (uses_alpha) { - compute_endpoints_and_ideal_weights_3_comp(bsd, blk, ewb, pi, ei1, 2); + compute_ideal_colors_and_weights_3_comp(bsd, blk, ewb, pi, ei1, 2); } else { - compute_endpoints_and_ideal_weights_2_comp(bsd, blk, ewb, pi, ei1, 0, 1); + compute_ideal_colors_and_weights_2_comp(bsd, blk, ewb, pi, ei1, 0, 1); } - compute_endpoints_and_ideal_weights_1_comp(bsd, blk, ewb, pi, ei2, 2); + compute_ideal_colors_and_weights_1_comp(bsd, blk, ewb, pi, ei2, 2); break; - default: // separate weights for alpha + default: // Separate weights for alpha assert(uses_alpha); - compute_endpoints_and_ideal_weights_3_comp(bsd, blk, ewb, pi, ei1, 3); - compute_endpoints_and_ideal_weights_1_comp(bsd, blk, ewb, pi, ei2, 3); + compute_ideal_colors_and_weights_3_comp(bsd, blk, ewb, pi, ei1, 3); + compute_ideal_colors_and_weights_1_comp(bsd, blk, ewb, pi, ei2, 3); break; } } @@ -679,18 +676,18 @@ void compute_endpoints_and_ideal_weights_2planes( /* See header for documentation. */ float compute_error_of_weight_set_1plane( const endpoints_and_weights& eai, - const decimation_table& dt, + const decimation_info& di, const float* weights ) { vfloat4 error_summav = vfloat4::zero(); float error_summa = 0.0f; - int texel_count = dt.texel_count; + unsigned int texel_count = di.texel_count; // Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized - for (int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { // Compute the bilinear interpolation of the decimated weight grid - vfloat current_values = bilinear_infill_vla(dt, weights, i); + vfloat current_values = bilinear_infill_vla(di, weights, i); // Compute the error between the computed value and the ideal weight vfloat actual_values = loada(&(eai.weights[i])); @@ -711,20 +708,20 @@ float compute_error_of_weight_set_1plane( float compute_error_of_weight_set_2planes( const endpoints_and_weights& eai1, const endpoints_and_weights& eai2, - const decimation_table& dt, + const decimation_info& di, const float* weights1, const float* weights2 ) { vfloat4 error_summav = vfloat4::zero(); float error_summa = 0.0f; - int texel_count = dt.texel_count; + unsigned int texel_count = di.texel_count; // Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized - for (int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { // Plane 1 // Compute the bilinear interpolation of the decimated weight grid - vfloat current_values1 = bilinear_infill_vla(dt, weights1, i); + vfloat current_values1 = bilinear_infill_vla(di, weights1, i); // Compute the error between the computed value and the ideal weight vfloat actual_values1 = loada(&(eai1.weights[i])); @@ -733,7 +730,7 @@ float compute_error_of_weight_set_2planes( // Plane 2 // Compute the bilinear interpolation of the decimated weight grid - vfloat current_values2 = bilinear_infill_vla(dt, weights2, i); + vfloat current_values2 = bilinear_infill_vla(di, weights2, i); // Compute the error between the computed value and the ideal weight vfloat actual_values2 = loada(&(eai2.weights[i])); @@ -750,47 +747,46 @@ float compute_error_of_weight_set_2planes( } /* See header for documentation. */ -void compute_ideal_weights_for_decimation_table( +void compute_ideal_weights_for_decimation( const endpoints_and_weights& eai_in, endpoints_and_weights& eai_out, - const decimation_table& dt, - float* RESTRICT weight_set, - float* RESTRICT weights + const decimation_info& di, + float* weight_set, + float* weights ) { - int texel_count = dt.texel_count; - int weight_count = dt.weight_count; + unsigned int texel_count = di.texel_count; + unsigned int weight_count = di.weight_count; promise(texel_count > 0); promise(weight_count > 0); - // This function includes a copy of the epw from eai_in to eai_out. We do it - // here because we want to load the data anyway, so we can avoid loading it - // from memory twice. + // This function includes a copy of the epw from eai_in to eai_out. We do it here because we + // want to load the data anyway, so we can avoid loading it from memory twice. eai_out.ep = eai_in.ep; eai_out.is_constant_weight_error_scale = eai_in.is_constant_weight_error_scale; // Ensure that the end of the output arrays that are used for SIMD paths later are filled so we // can safely run SIMD elsewhere without a loop tail. Note that this is always safe as weight // arrays always contain space for 64 elements - int weight_count_simd = round_up_to_simd_multiple_vla(weight_count); - for (int i = weight_count; i < weight_count_simd; i++) + unsigned int weight_count_simd = round_up_to_simd_multiple_vla(weight_count); + for (unsigned int i = weight_count; i < weight_count_simd; i++) { weight_set[i] = 0.0f; } - // If we have a 1:1 mapping just shortcut the computation - clone the - // weights into both the weight set and the output epw copy. + // If we have a 1:1 mapping just shortcut the computation - clone the weights into both the + // weight set and the output epw copy. // Transfer enough to also copy zero initialized SIMD over-fetch region - int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); + unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); if (texel_count == weight_count) { // TODO: Use SIMD copies? - for (int i = 0; i < texel_count_simd; i++) + for (unsigned int i = 0; i < texel_count_simd; i++) { // Assert it's an identity map for valid texels, and last valid value for any overspill - assert(((i < texel_count) && (i == dt.weight_texel[0][i])) || - ((i >= texel_count) && (texel_count - 1 == dt.weight_texel[0][i]))); + assert(((i < texel_count) && (i == di.weight_texel[0][i])) || + ((i >= texel_count) && (texel_count - 1 == di.weight_texel[0][i]))); weight_set[i] = eai_in.weights[i]; weights[i] = eai_in.weight_error_scale[i]; @@ -800,11 +796,11 @@ void compute_ideal_weights_for_decimation_table( return; } - // If we don't have a 1:1 mapping just clone the weights into the output - // epw copy and then do the full algorithm to decimate weights. + // If we don't have a 1:1 mapping just clone the weights into the output epw copy and then do + // the full algorithm to decimate weights. else { - for (int i = 0; i < texel_count_simd; i++) + for (unsigned int i = 0; i < texel_count_simd; i++) { eai_out.weights[i] = eai_in.weights[i]; eai_out.weight_error_scale[i] = eai_in.weight_error_scale[i]; @@ -812,7 +808,7 @@ void compute_ideal_weights_for_decimation_table( } // Otherwise compute an estimate and perform single refinement iteration - alignas(ASTCENC_VECALIGN) float infilled_weights[MAX_TEXELS_PER_BLOCK]; + alignas(ASTCENC_VECALIGN) float infilled_weights[BLOCK_MAX_TEXELS]; // Compute an initial average for each decimated weight bool constant_wes = eai_in.is_constant_weight_error_scale; @@ -820,26 +816,26 @@ void compute_ideal_weights_for_decimation_table( // This overshoots - this is OK as we initialize the array tails in the // decimation table structures to safe values ... - for (int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) + for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) { // Start with a small value to avoid div-by-zero later vfloat weight_weight(1e-10f); vfloat initial_weight = vfloat::zero(); // Accumulate error weighting of all the texels using this weight - vint weight_texel_count(dt.weight_texel_count + i); - int max_texel_count = hmax(weight_texel_count).lane<0>(); + vint weight_texel_count(di.weight_texel_count + i); + unsigned int max_texel_count = hmax(weight_texel_count).lane<0>(); promise(max_texel_count > 0); - for (int j = 0; j < max_texel_count; j++) + for (unsigned int j = 0; j < max_texel_count; j++) { // Not all lanes may actually use j texels, so mask out if idle vmask active = weight_texel_count > vint(j); - vint texel(dt.weight_texel[j] + i); + vint texel(di.weight_texel[j] + i); texel = select(vint::zero(), texel, active); - vfloat weight = loada(dt.weights_flt[j] + i); + vfloat weight = loada(di.weights_flt[j] + i); weight = select(vfloat::zero(), weight, active); if (!constant_wes) @@ -859,27 +855,26 @@ void compute_ideal_weights_for_decimation_table( // Populate the interpolated weight grid based on the initital average // Process SIMD-width texel coordinates at at time while we can - int is = 0; - int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count); + unsigned int is = 0; + unsigned int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count); for (/* */; is < clipped_texel_count; is += ASTCENC_SIMD_WIDTH) { - vfloat weight = bilinear_infill_vla(dt, weight_set, is); + vfloat weight = bilinear_infill_vla(di, weight_set, is); storea(weight, infilled_weights + is); } // Loop tail for (/* */; is < texel_count; is++) { - infilled_weights[is] = bilinear_infill(dt, weight_set, is); + infilled_weights[is] = bilinear_infill(di, weight_set, is); } // Perform a single iteration of refinement - // Empirically determined step size; larger values don't help but smaller - // values cause a noticeable drop in image quality ... + // Empirically determined step size; larger values don't help but smaller drops image quality constexpr float stepsize = 0.25f; - constexpr float chd_scale = -TEXEL_WEIGHT_SUM; + constexpr float chd_scale = -WEIGHTS_TEXEL_SUM; - for (int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) + for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) { vfloat weight_val = loada(weight_set + i); @@ -889,19 +884,19 @@ void compute_ideal_weights_for_decimation_table( vfloat error_change1(0.0f); // Accumulate error weighting of all the texels using this weight - vint weight_texel_count(dt.weight_texel_count + i); - int max_texel_count = hmax(weight_texel_count).lane<0>(); + vint weight_texel_count(di.weight_texel_count + i); + unsigned int max_texel_count = hmax(weight_texel_count).lane<0>(); promise(max_texel_count > 0); - for (int j = 0; j < max_texel_count; j++) + for (unsigned int j = 0; j < max_texel_count; j++) { // Not all lanes may actually use j texels, so mask out if idle vmask active = weight_texel_count > vint(j); - vint texel(dt.weight_texel[j] + i); + vint texel(di.weight_texel[j] + i); texel = select(vint::zero(), texel, active); - vfloat contrib_weight = loada(dt.weights_flt[j] + i); + vfloat contrib_weight = loada(di.weights_flt[j] + i); contrib_weight = select(vfloat::zero(), contrib_weight, active); if (!constant_wes) @@ -913,8 +908,8 @@ void compute_ideal_weights_for_decimation_table( vfloat old_weight = gatherf(infilled_weights, texel); vfloat ideal_weight = gatherf(eai_in.weights, texel); - error_change0 = error_change0 + contrib_weight * scale; - error_change1 = error_change1 + (old_weight - ideal_weight) * scale; + error_change0 += contrib_weight * scale; + error_change1 += (old_weight - ideal_weight) * scale; } @@ -927,25 +922,23 @@ void compute_ideal_weights_for_decimation_table( } /* See header for documentation. */ -void compute_quantized_weights_for_decimation_table( - const decimation_table& dt, +void compute_quantized_weights_for_decimation( + const decimation_info& di, float low_bound, float high_bound, const float* weight_set_in, float* weight_set_out, uint8_t* quantized_weight_set, - int quant_level + quant_method quant_level ) { - int weight_count = dt.weight_count; + int weight_count = di.weight_count; promise(weight_count > 0); const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[quant_level]); static const int quant_levels[12] { 2,3,4,5,6,8,10,12,16,20,24,32 }; float quant_level_m1 = (float)(quant_levels[quant_level] - 1); - // Quantize the weight set using both the specified low/high bounds - // and the standard 0..1 weight bounds. - + // Quantize the weight set using both the specified low/high bounds and standard 0..1 bounds assert(high_bound > low_bound); float rscale = high_bound - low_bound; float scale = 1.0f / rscale; @@ -960,7 +953,7 @@ void compute_quantized_weights_for_decimation_table( vfloat low_boundv(low_bound); // This runs to the rounded-up SIMD size, which is safe as the loop tail is filled with known - // safe data in compute_ideal_weights_for_decimation_table and arrays are always 64 elements + // safe data in compute_ideal_weights_for_decimation and arrays are always 64 elements for (int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) { vfloat ix = loada(&weight_set_in[i]) * scalev - scaled_low_boundv; @@ -990,8 +983,8 @@ void compute_quantized_weights_for_decimation_table( /** * @brief Compute the RGB + offset for a HDR endpoint mode #7. * - * Since the matrix needed has a regular structure we can simplify the inverse - * calculation. This gives us ~24 multiplications vs. 96 for a generic inverse. + * Since the matrix needed has a regular structure we can simplify the inverse calculation. This + * gives us ~24 multiplications vs. 96 for a generic inverse. * * mat[0] = vfloat4(rgba_ws.x, 0.0f, 0.0f, wght_ws.x); * mat[1] = vfloat4( 0.0f, rgba_ws.y, 0.0f, wght_ws.y); @@ -1004,7 +997,7 @@ void compute_quantized_weights_for_decimation_table( * @param rgbq_sum Sum of partition component error weights * texel weight * color data. * @param psum Sum of RGB color weights * texel weight^2. */ -static inline vfloat4 compute_rgbovec( +static inline vfloat4 compute_rgbo_vector( vfloat4 rgba_weight_sum, vfloat4 weight_weight_sum, vfloat4 rgbq_sum, @@ -1055,14 +1048,14 @@ void recompute_ideal_colors_1plane( const imageblock& blk, const error_weight_block& ewb, const partition_info& pi, - const decimation_table& dt, + const decimation_info& di, int weight_quant_mode, const uint8_t* weight_set8, endpoints& ep, - vfloat4 rgbs_vectors[4], - vfloat4 rgbo_vectors[4] + vfloat4 rgbs_vectors[BLOCK_MAX_PARTITIONS], + vfloat4 rgbo_vectors[BLOCK_MAX_PARTITIONS] ) { - int weight_count = dt.weight_count; + int weight_count = di.weight_count; int partition_count = pi.partition_count; promise(weight_count > 0); @@ -1070,7 +1063,7 @@ void recompute_ideal_colors_1plane( const quantization_and_transfer_table& qat = quant_and_xfer_tables[weight_quant_mode]; - float weight_set[MAX_WEIGHTS_PER_BLOCK]; + float weight_set[BLOCK_MAX_WEIGHTS]; for (int i = 0; i < weight_count; i++) { weight_set[i] = qat.unquantized_value[weight_set8[i]] * (1.0f / 64.0f); @@ -1117,7 +1110,7 @@ void recompute_ideal_colors_1plane( vfloat4 weight_weight_sum = vfloat4(1e-17f); float psum = 1e-17f; - // FIXME: the loop below has too many responsibilities, making it inefficient. + // TODO: This loop has too many responsibilities, making it inefficient for (int j = 0; j < texel_count; j++) { int tix = texel_indexes[j]; @@ -1125,9 +1118,9 @@ void recompute_ideal_colors_1plane( vfloat4 rgba = blk.texel(tix); vfloat4 color_weight = ewb.error_weights[tix]; - // FIXME: move this calculation out to the color block. + // TODO: Move this calculation out to the color block? float ls_weight = hadd_rgb_s(color_weight); - float idx0 = bilinear_infill(dt, weight_set, tix); + float idx0 = bilinear_infill(di, weight_set, tix); float om_idx0 = 1.0f - idx0; wmin1 = astc::min(idx0, wmin1); @@ -1162,24 +1155,22 @@ void recompute_ideal_colors_1plane( psum += dot3_s(color_weight * color_idx, color_idx); } - // calculations specific to mode #7, the HDR RGB-scale mode. - // FIXME: Can we skip this for LDR textures? + // Calculations specific to mode #7, the HDR RGB-scale mode + // TODO: Can we skip this for LDR textures? vfloat4 rgbq_sum = color_vec_x + color_vec_y; rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y)); - vfloat4 rgbovec = compute_rgbovec(rgba_weight_sum, weight_weight_sum, + vfloat4 rgbovec = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum); rgbo_vectors[i] = rgbovec; - // We will occasionally get a failure due to the use of a singular - // (non-invertible) matrix. Record whether such a failure has taken - // place; if it did, compute rgbo_vectors[] with a different method - // later on. + // We will occasionally get a failure due to the use of a singular (non-invertible) matrix. + // Record whether such a failure has taken place; if it did, compute rgbo_vectors[] with a + // different method later float chkval = dot_s(rgbovec, rgbovec); int rgbo_fail = chkval != chkval; - // Initialize the luminance and scale vectors with a reasonable - // default, just in case the subsequent calculation blows up. + // Initialize the luminance and scale vectors with a reasonable default float scalediv = scale_min * (1.0f / astc::max(scale_max, 1e-10f)); scalediv = astc::clamp1f(scalediv); @@ -1189,8 +1180,8 @@ void recompute_ideal_colors_1plane( if (wmin1 >= wmax1 * 0.999f) { - // if all weights in the partition were equal, then just take average - // of all colors in the partition and use that as both endpoint colors. + // If all weights in the partition were equal, then just take average of all colors in + // the partition and use that as both endpoint colors vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum); vmask4 notnan_mask = avg == avg; @@ -1201,8 +1192,8 @@ void recompute_ideal_colors_1plane( } else { - // otherwise, complete the analytic calculation of ideal-endpoint-values - // for the given set of texel weights and pixel colors. + // Otherwise, complete the analytic calculation of ideal-endpoint-values for the given + // set of texel weights and pixel colors vfloat4 color_det1 = (left_sum * right_sum) - (middle_sum * middle_sum); vfloat4 color_rdet1 = 1.0f / color_det1; @@ -1238,8 +1229,7 @@ void recompute_ideal_colors_1plane( } } - // if the calculation of an RGB-offset vector failed, try to compute - // a somewhat-sensible value anyway + // If the calculation of an RGB-offset vector failed, try to compute a value another way if (rgbo_fail) { vfloat4 v0 = ep.endpt0[i]; @@ -1261,16 +1251,16 @@ void recompute_ideal_colors_2planes( const imageblock& blk, const error_weight_block& ewb, const partition_info& pi, - const decimation_table& dt, + const decimation_info& di, int weight_quant_mode, const uint8_t* weight_set8_plane1, const uint8_t* weight_set8_plane2, endpoints& ep, - vfloat4 rgbs_vectors[4], - vfloat4 rgbo_vectors[4], + vfloat4 rgbs_vectors[BLOCK_MAX_PARTITIONS], + vfloat4 rgbo_vectors[BLOCK_MAX_PARTITIONS], int plane2_component ) { - int weight_count = dt.weight_count; + int weight_count = di.weight_count; int partition_count = pi.partition_count; promise(weight_count > 0); @@ -1278,8 +1268,8 @@ void recompute_ideal_colors_2planes( const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_mode]); - float weight_set[MAX_WEIGHTS_PER_BLOCK]; - float plane2_weight_set[MAX_WEIGHTS_PER_BLOCK]; + float weight_set[BLOCK_MAX_WEIGHTS]; + float plane2_weight_set[BLOCK_MAX_WEIGHTS]; for (int i = 0; i < weight_count; i++) { @@ -1332,7 +1322,7 @@ void recompute_ideal_colors_2planes( vfloat4 weight_weight_sum = vfloat4(1e-17f); float psum = 1e-17f; - // FIXME: the loop below has too many responsibilities, making it inefficient. + // TODO: This loop has too many responsibilities, making it inefficient for (int j = 0; j < texel_count; j++) { int tix = texel_indexes[j]; @@ -1340,10 +1330,10 @@ void recompute_ideal_colors_2planes( vfloat4 rgba = blk.texel(tix); vfloat4 color_weight = ewb.error_weights[tix]; - // FIXME: move this calculation out to the color block. + // TODO: Move this calculation out to the color block? float ls_weight = hadd_rgb_s(color_weight); - float idx0 = bilinear_infill(dt, weight_set, tix); + float idx0 = bilinear_infill(di, weight_set, tix); float om_idx0 = 1.0f - idx0; wmin1 = astc::min(idx0, wmin1); @@ -1369,7 +1359,7 @@ void recompute_ideal_colors_2planes( float idx1 = 0.0f; float om_idx1 = 0.0f; - idx1 = bilinear_infill(dt, plane2_weight_set, tix); + idx1 = bilinear_infill(di, plane2_weight_set, tix); om_idx1 = 1.0f - idx1; wmin2 = astc::min(idx1, wmin2); @@ -1397,24 +1387,22 @@ void recompute_ideal_colors_2planes( psum += dot3_s(color_weight * color_idx, color_idx); } - // calculations specific to mode #7, the HDR RGB-scale mode. - // FIXME: Can we skip this for LDR textures? + // Calculations specific to mode #7, the HDR RGB-scale mode + // TODO: Can we skip this for LDR textures? vfloat4 rgbq_sum = color_vec_x + color_vec_y; rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y)); - vfloat4 rgbovec = compute_rgbovec(rgba_weight_sum, weight_weight_sum, + vfloat4 rgbovec = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum); rgbo_vectors[i] = rgbovec; - // We will occasionally get a failure due to the use of a singular - // (non-invertible) matrix. Record whether such a failure has taken - // place; if it did, compute rgbo_vectors[] with a different method - // later on. + // We will occasionally get a failure due to the use of a singular (non-invertible) matrix. + // Record whether such a failure has taken place; if it did, compute rgbo_vectors[] with a + // different method later float chkval = dot_s(rgbovec, rgbovec); int rgbo_fail = chkval != chkval; - // Initialize the luminance and scale vectors with a reasonable - // default, just in case the subsequent calculation blows up. + // Initialize the luminance and scale vectors with a reasonable default float scalediv = scale_min * (1.0f / astc::max(scale_max, 1e-10f)); scalediv = astc::clamp1f(scalediv); @@ -1424,8 +1412,8 @@ void recompute_ideal_colors_2planes( if (wmin1 >= wmax1 * 0.999f) { - // if all weights in the partition were equal, then just take average - // of all colors in the partition and use that as both endpoint colors. + // If all weights in the partition were equal, then just take average of all colors in + // the partition and use that as both endpoint colors vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum); vmask4 p1_mask = vint4::lane_id() != vint4(plane2_component); @@ -1439,8 +1427,8 @@ void recompute_ideal_colors_2planes( } else { - // otherwise, complete the analytic calculation of ideal-endpoint-values - // for the given set of texel weights and pixel colors. + // Otherwise, complete the analytic calculation of ideal-endpoint-values for the given + // set of texel weights and pixel colors vfloat4 color_det1 = (left_sum * right_sum) - (middle_sum * middle_sum); vfloat4 color_rdet1 = 1.0f / color_det1; @@ -1479,8 +1467,8 @@ void recompute_ideal_colors_2planes( if (wmin2 >= wmax2 * 0.999f) { - // if all weights in the partition were equal, then just take average - // of all colors in the partition and use that as both endpoint colors. + // If all weights in the partition were equal, then just take average of all colors in + // the partition and use that as both endpoint colors vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum); vmask4 p2_mask = vint4::lane_id() == vint4(plane2_component); @@ -1492,8 +1480,8 @@ void recompute_ideal_colors_2planes( } else { - // otherwise, complete the analytic calculation of ideal-endpoint-values - // for the given set of texel weights and pixel colors. + // Otherwise, complete the analytic calculation of ideal-endpoint-values for the given + // set of texel weights and pixel colors vfloat4 color_det2 = (left2_sum * right2_sum) - (middle2_sum * middle2_sum); vfloat4 color_rdet2 = 1.0f / color_det2; @@ -1513,8 +1501,7 @@ void recompute_ideal_colors_2planes( ep.endpt1[i] = select(ep.endpt1[i], ep1, full_mask); } - // if the calculation of an RGB-offset vector failed, try to compute - // a somewhat-sensible value anyway + // If the calculation of an RGB-offset vector failed, try to compute a value another way if (rgbo_fail) { vfloat4 v0 = ep.endpt0[i]; diff --git a/lib/astc-encoder/Source/astcenc_image.cpp b/lib/astc-encoder/Source/astcenc_image.cpp index bc410147cf..a30579a7ec 100644 --- a/lib/astc-encoder/Source/astcenc_image.cpp +++ b/lib/astc-encoder/Source/astcenc_image.cpp @@ -153,14 +153,14 @@ void fetch_imageblock( const astcenc_image& img, imageblock& blk, const block_size_descriptor& bsd, - int xpos, - int ypos, - int zpos, + unsigned int xpos, + unsigned int ypos, + unsigned int zpos, const astcenc_swizzle& swz ) { - int xsize = img.dim_x; - int ysize = img.dim_y; - int zsize = img.dim_z; + unsigned int xsize = img.dim_x; + unsigned int ysize = img.dim_y; + unsigned int zsize = img.dim_z; blk.xpos = xpos; blk.ypos = ypos; @@ -205,18 +205,18 @@ void fetch_imageblock( converter = encode_texel_lns; } - for (int z = 0; z < bsd.zdim; z++) + for (unsigned int z = 0; z < bsd.zdim; z++) { - int zi = astc::min(zpos + z, zsize - 1); + unsigned int zi = astc::min(zpos + z, zsize - 1); void* plane = img.data[zi]; - for (int y = 0; y < bsd.ydim; y++) + for (unsigned int y = 0; y < bsd.ydim; y++) { - int yi = astc::min(ypos + y, ysize - 1); + unsigned int yi = astc::min(ypos + y, ysize - 1); - for (int x = 0; x < bsd.xdim; x++) + for (unsigned int x = 0; x < bsd.xdim; x++) { - int xi = astc::min(xpos + x, xsize - 1); + unsigned int xi = astc::min(xpos + x, xsize - 1); vfloat4 datav = loader(plane, (4 * xsize * yi) + (4 * xi)); datav = swizzler(datav, swz); @@ -268,25 +268,25 @@ void write_imageblock( astcenc_image& img, const imageblock& blk, const block_size_descriptor& bsd, - int xpos, - int ypos, - int zpos, + unsigned int xpos, + unsigned int ypos, + unsigned int zpos, const astcenc_swizzle& swz ) { - int xsize = img.dim_x; - int ysize = img.dim_y; - int zsize = img.dim_z; + unsigned int xsize = img.dim_x; + unsigned int ysize = img.dim_y; + unsigned int zsize = img.dim_z; - int x_start = xpos; - int x_end = std::min(xsize, xpos + bsd.xdim); - int x_nudge = bsd.xdim - (x_end - x_start); + unsigned int x_start = xpos; + unsigned int x_end = std::min(xsize, xpos + bsd.xdim); + unsigned int x_nudge = bsd.xdim - (x_end - x_start); - int y_start = ypos; - int y_end = std::min(ysize, ypos + bsd.ydim); - int y_nudge = (bsd.ydim - (y_end - y_start)) * bsd.xdim; + unsigned int y_start = ypos; + unsigned int y_end = std::min(ysize, ypos + bsd.ydim); + unsigned int y_nudge = (bsd.ydim - (y_end - y_start)) * bsd.xdim; - int z_start = zpos; - int z_end = std::min(zsize, zpos + bsd.zdim); + unsigned int z_start = zpos; + unsigned int z_end = std::min(zsize, zpos + bsd.zdim); float data[7]; data[ASTCENC_SWZ_0] = 0.0f; @@ -303,14 +303,14 @@ void write_imageblock( int idx = 0; if (img.data_type == ASTCENC_TYPE_U8) { - for (int z = z_start; z < z_end; z++) + for (unsigned int z = z_start; z < z_end; z++) { // Fetch the image plane uint8_t* data8 = static_cast(img.data[z]); - for (int y = y_start; y < y_end; y++) + for (unsigned int y = y_start; y < y_end; y++) { - for (int x = x_start; x < x_end; x++) + for (unsigned int x = x_start; x < x_end; x++) { vint4 colori = vint4::zero(); @@ -359,14 +359,14 @@ void write_imageblock( } else if (img.data_type == ASTCENC_TYPE_F16) { - for (int z = z_start; z < z_end; z++) + for (unsigned int z = z_start; z < z_end; z++) { // Fetch the image plane uint16_t* data16 = static_cast(img.data[z]); - for (int y = y_start; y < y_end; y++) + for (unsigned int y = y_start; y < y_end; y++) { - for (int x = x_start; x < x_end; x++) + for (unsigned int x = x_start; x < x_end; x++) { vint4 color; @@ -418,14 +418,14 @@ void write_imageblock( { assert(img.data_type == ASTCENC_TYPE_F32); - for (int z = z_start; z < z_end; z++) + for (unsigned int z = z_start; z < z_end; z++) { // Fetch the image plane float* data32 = static_cast(img.data[z]); - for (int y = y_start; y < y_end; y++) + for (unsigned int y = y_start; y < y_end; y++) { - for (int x = x_start; x < x_end; x++) + for (unsigned int x = x_start; x < x_end; x++) { vfloat4 color = blk.texel(idx); diff --git a/lib/astc-encoder/Source/astcenc_integer_sequence.cpp b/lib/astc-encoder/Source/astcenc_integer_sequence.cpp index 84e27c00f7..279d27e321 100644 --- a/lib/astc-encoder/Source/astcenc_integer_sequence.cpp +++ b/lib/astc-encoder/Source/astcenc_integer_sequence.cpp @@ -333,16 +333,16 @@ static const uint8_t integer_of_trits[3][3][3][3][3] = { * @brief The number of bits, trits, and quints needed for a quant level. */ struct btq_count { - /**< The quantization level. */ + /** @brief The quantization level. */ uint8_t quant; - /**< The number of bits. */ + /** @brief The number of bits. */ uint8_t bits; - /**< The number of trits. */ + /** @brief The number of trits. */ uint8_t trits; - /**< The number of quints. */ + /** @brief The number of quints. */ uint8_t quints; }; @@ -380,16 +380,16 @@ static const std::array btq_counts = {{ * (scale * + round) / divisor */ struct ise_size { - /**< The quantization level. */ + /** @brief The quantization level. */ uint8_t quant; - /**< The scaling parameter. */ + /** @brief The scaling parameter. */ uint8_t scale; - /**< The rounding parameter. */ + /** @brief The rounding parameter. */ uint8_t round; - /**< The divisor parameter. */ + /** @brief The divisor parameter. */ uint8_t divisor; }; @@ -421,8 +421,8 @@ static const std::array ise_sizes = {{ }}; /* See header for documentation. */ -int get_ise_sequence_bitcount( - int character_count, +unsigned int get_ise_sequence_bitcount( + unsigned int character_count, quant_method quant_level ) { // Cope with out-of bounds values - input might be invalid @@ -439,8 +439,8 @@ int get_ise_sequence_bitcount( /** * @brief Write up to 8 bits at an arbitrary bit offset. * - * The stored value is at most 8 bits, but can be stored at an offset of - * between 0 and 7 bits so may span two separate bytes in memory. + * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so may + * span two separate bytes in memory. * * @param value The value to write. * @param bitcount The number of bits to write, starting from LSB. @@ -448,12 +448,12 @@ int get_ise_sequence_bitcount( * @param[in,out] ptr The data pointer to write to. */ static inline void write_bits( - int value, - int bitcount, - int bitoffset, + unsigned int value, + unsigned int bitcount, + unsigned int bitoffset, uint8_t ptr[2] ) { - int mask = (1 << bitcount) - 1; + unsigned int mask = (1 << bitcount) - 1; value &= mask; ptr += bitoffset >> 3; bitoffset &= 7; @@ -470,8 +470,8 @@ static inline void write_bits( /** * @brief Read up to 8 bits at an arbitrary bit offset. * - * The stored value is at most 8 bits, but can be stored at an offset of - * between 0 and 7 bits so may span two separate bytes in memory. + * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so may + * span two separate bytes in memory. * * @param bitcount The number of bits to read. * @param bitoffset The bit offset to read from, between 0 and 7. @@ -479,15 +479,15 @@ static inline void write_bits( * * @return The read value. */ -static inline int read_bits( - int bitcount, - int bitoffset, +static inline unsigned int read_bits( + unsigned int bitcount, + unsigned int bitoffset, const uint8_t* ptr ) { - int mask = (1 << bitcount) - 1; + unsigned int mask = (1 << bitcount) - 1; ptr += bitoffset >> 3; bitoffset &= 7; - int value = ptr[0] | (ptr[1] << 8); + unsigned int value = ptr[0] | (ptr[1] << 8); value >>= bitoffset; value &= mask; return value; @@ -496,31 +496,31 @@ static inline int read_bits( /* See header for documentation. */ void encode_ise( quant_method quant_level, - int character_count, + unsigned int character_count, const uint8_t* input_data, uint8_t* output_data, - int bit_offset + unsigned int bit_offset ) { promise(character_count > 0); - int bits = btq_counts[quant_level].bits; - int trits = btq_counts[quant_level].trits; - int quints = btq_counts[quant_level].quints; - int mask = (1 << bits) - 1; + unsigned int bits = btq_counts[quant_level].bits; + unsigned int trits = btq_counts[quant_level].trits; + unsigned int quints = btq_counts[quant_level].quints; + unsigned int mask = (1 << bits) - 1; // Write out trits and bits if (trits) { - int i = 0; - int full_trit_blocks = character_count / 5; + unsigned int i = 0; + unsigned int full_trit_blocks = character_count / 5; - for (int j = 0; j < full_trit_blocks; j++) + for (unsigned int j = 0; j < full_trit_blocks; j++) { - int i4 = input_data[i + 4] >> bits; - int i3 = input_data[i + 3] >> bits; - int i2 = input_data[i + 2] >> bits; - int i1 = input_data[i + 1] >> bits; - int i0 = input_data[i + 0] >> bits; + unsigned int i4 = input_data[i + 4] >> bits; + unsigned int i3 = input_data[i + 3] >> bits; + unsigned int i2 = input_data[i + 2] >> bits; + unsigned int i1 = input_data[i + 1] >> bits; + unsigned int i0 = input_data[i + 0] >> bits; uint8_t T = integer_of_trits[i4][i3][i2][i1][i0]; @@ -559,15 +559,15 @@ void encode_ise( { // i4 cannot be present - we know the block is partial // i0 must be present - we know the block isn't empty - int i4 = 0; - int i3 = i + 3 >= character_count ? 0 : input_data[i + 3] >> bits; - int i2 = i + 2 >= character_count ? 0 : input_data[i + 2] >> bits; - int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits; - int i0 = input_data[i + 0] >> bits; + unsigned int i4 = 0; + unsigned int i3 = i + 3 >= character_count ? 0 : input_data[i + 3] >> bits; + unsigned int i2 = i + 2 >= character_count ? 0 : input_data[i + 2] >> bits; + unsigned int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits; + unsigned int i0 = input_data[i + 0] >> bits; uint8_t T = integer_of_trits[i4][i3][i2][i1][i0]; - for (int j = 0; i < character_count; i++, j++) + for (unsigned int j = 0; i < character_count; i++, j++) { // Truncated table as this iteration is always partital static const uint8_t tbits[4] { 2, 2, 1, 2 }; @@ -584,14 +584,14 @@ void encode_ise( // Write out quints and bits else if (quints) { - int i = 0; - int full_quint_blocks = character_count / 3; + unsigned int i = 0; + unsigned int full_quint_blocks = character_count / 3; - for (int j = 0; j < full_quint_blocks; j++) + for (unsigned int j = 0; j < full_quint_blocks; j++) { - int i2 = input_data[i + 2] >> bits; - int i1 = input_data[i + 1] >> bits; - int i0 = input_data[i + 0] >> bits; + unsigned int i2 = input_data[i + 2] >> bits; + unsigned int i1 = input_data[i + 1] >> bits; + unsigned int i0 = input_data[i + 0] >> bits; uint8_t T = integer_of_quints[i2][i1][i0]; @@ -620,13 +620,13 @@ void encode_ise( { // i2 cannot be present - we know the block is partial // i0 must be present - we know the block isn't empty - int i2 = 0; - int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits; - int i0 = input_data[i + 0] >> bits; + unsigned int i2 = 0; + unsigned int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits; + unsigned int i0 = input_data[i + 0] >> bits; uint8_t T = integer_of_quints[i2][i1][i0]; - for (int j = 0; i < character_count; i++, j++) + for (unsigned int j = 0; i < character_count; i++, j++) { // Truncated table as this iteration is always partital static const uint8_t tbits[2] { 3, 2 }; @@ -644,7 +644,7 @@ void encode_ise( else { promise(character_count > 0); - for (int i = 0; i < character_count; i++) + for (unsigned int i = 0; i < character_count; i++) { write_bits(input_data[i], bits, bit_offset, output_data); bit_offset += bits; @@ -655,46 +655,39 @@ void encode_ise( /* See header for documentation. */ void decode_ise( quant_method quant_level, - int character_count, + unsigned int character_count, const uint8_t* input_data, uint8_t* output_data, - int bit_offset + unsigned int bit_offset ) { promise(character_count > 0); - // note: due to how the trit/quint-block unpacking is done in this function, - // we may write more temporary results than the number of outputs - // The maximum actual number of results is 64 bit, but we keep 4 additional character_count - // of padding. + // Note: due to how the trit/quint-block unpacking is done in this function, we may write more + // temporary results than the number of outputs. The maximum actual number of results is 64 bit, + // but we keep 4 additional character_count of padding. uint8_t results[68]; - uint8_t tq_blocks[22]; // trit-blocks or quint-blocks + uint8_t tq_blocks[22] { 0 }; // Trit-blocks or quint-blocks, must be zeroed - int bits = btq_counts[quant_level].bits; - int trits = btq_counts[quant_level].trits; - int quints = btq_counts[quant_level].quints; + unsigned int bits = btq_counts[quant_level].bits; + unsigned int trits = btq_counts[quant_level].trits; + unsigned int quints = btq_counts[quant_level].quints; - int lcounter = 0; - int hcounter = 0; + unsigned int lcounter = 0; + unsigned int hcounter = 0; - // trit-blocks or quint-blocks must be zeroed out before we collect them in the loop below. - for (int i = 0; i < 22; i++) - { - tq_blocks[i] = 0; - } - - // collect bits for each element, as well as bits for any trit-blocks and quint-blocks. - for (int i = 0; i < character_count; i++) + // Collect bits for each element, as well as bits for any trit-blocks and quint-blocks. + for (unsigned int i = 0; i < character_count; i++) { results[i] = read_bits(bits, bit_offset, input_data); bit_offset += bits; if (trits) { - static const int bits_to_read[5] { 2, 2, 1, 2, 1 }; - static const int block_shift[5] { 0, 2, 4, 5, 7 }; - static const int next_lcounter[5] { 1, 2, 3, 4, 0 }; - static const int hcounter_incr[5] { 0, 0, 0, 0, 1 }; - int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data); + static const unsigned int bits_to_read[5] { 2, 2, 1, 2, 1 }; + static const unsigned int block_shift[5] { 0, 2, 4, 5, 7 }; + static const unsigned int next_lcounter[5] { 1, 2, 3, 4, 0 }; + static const unsigned int hcounter_incr[5] { 0, 0, 0, 0, 1 }; + unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data); bit_offset += bits_to_read[lcounter]; tq_blocks[hcounter] |= tdata << block_shift[lcounter]; hcounter += hcounter_incr[lcounter]; @@ -703,11 +696,11 @@ void decode_ise( if (quints) { - static const int bits_to_read[3] { 3, 2, 2 }; - static const int block_shift[3] { 0, 3, 5 }; - static const int next_lcounter[3] { 1, 2, 0 }; - static const int hcounter_incr[3] { 0, 0, 1 }; - int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data); + static const unsigned int bits_to_read[3] { 3, 2, 2 }; + static const unsigned int block_shift[3] { 0, 3, 5 }; + static const unsigned int next_lcounter[3] { 1, 2, 0 }; + static const unsigned int hcounter_incr[3] { 0, 0, 1 }; + unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data); bit_offset += bits_to_read[lcounter]; tq_blocks[hcounter] |= tdata << block_shift[lcounter]; hcounter += hcounter_incr[lcounter]; @@ -715,11 +708,11 @@ void decode_ise( } } - // unpack trit-blocks or quint-blocks as needed + // Unpack trit-blocks or quint-blocks as needed if (trits) { - int trit_blocks = (character_count + 4) / 5; - for (int i = 0; i < trit_blocks; i++) + unsigned int trit_blocks = (character_count + 4) / 5; + for (unsigned int i = 0; i < trit_blocks; i++) { const uint8_t *tritptr = trits_of_integer[tq_blocks[i]]; results[5 * i ] |= tritptr[0] << bits; @@ -732,8 +725,8 @@ void decode_ise( if (quints) { - int quint_blocks = (character_count + 2) / 3; - for (int i = 0; i < quint_blocks; i++) + unsigned int quint_blocks = (character_count + 2) / 3; + for (unsigned int i = 0; i < quint_blocks; i++) { const uint8_t *quintptr = quints_of_integer[tq_blocks[i]]; results[3 * i ] |= quintptr[0] << bits; @@ -742,7 +735,7 @@ void decode_ise( } } - for (int i = 0; i < character_count; i++) + for (unsigned int i = 0; i < character_count; i++) { output_data[i] = results[i]; } diff --git a/lib/astc-encoder/Source/astcenc_internal.h b/lib/astc-encoder/Source/astcenc_internal.h index db9728b3ee..39904f9db7 100644 --- a/lib/astc-encoder/Source/astcenc_internal.h +++ b/lib/astc-encoder/Source/astcenc_internal.h @@ -40,11 +40,10 @@ /** * @brief Make a promise to the compiler's optimizer. * - * A promise is an expression that the optimizer is can assume is true for to - * help it generate faster code. Common use cases for this are to promise that - * a for loop will iterate more than once, or that the loop iteration count is - * a multiple of a vector length, which avoids pre-loop checks and can avoid - * loop tails if loops are unrolled by the auto-vectorizer. + * A promise is an expression that the optimizer is can assume is true for to help it generate + * faster code. Common use cases for this are to promise that a for loop will iterate more than + * once, or that the loop iteration count is a multiple of a vector length, which avoids pre-loop + * checks and can avoid loop tails if loops are unrolled by the auto-vectorizer. */ #if defined(NDEBUG) #if !defined(__clang__) && defined(_MSC_VER) @@ -64,58 +63,78 @@ #define promise(cond) assert(cond); #endif -/** - * @brief Make a promise to the compiler's optimizer parameters don't alias. - * - * This is a compiler extension to implement the equivalent of the C99 - * @c restrict keyword. Mostly expected to help on functions which are - * reading and writing to arrays via pointers of the same basic type. - */ -#if !defined(__clang__) && defined(_MSC_VER) - #define RESTRICT __restrict -#else // Assume Clang or GCC - #define RESTRICT __restrict__ -#endif - /* ============================================================================ Constants ============================================================================ */ -#define MAX_TEXELS_PER_BLOCK 216 -#define MAX_KMEANS_TEXELS 64 -#define MAX_WEIGHTS_PER_BLOCK 64 -#define PLANE2_WEIGHTS_OFFSET (MAX_WEIGHTS_PER_BLOCK/2) -#define MIN_WEIGHT_BITS_PER_BLOCK 24 -#define MAX_WEIGHT_BITS_PER_BLOCK 96 -#define PARTITION_BITS 10 -#define PARTITION_COUNT (1 << PARTITION_BITS) +/** @brief The maximum number of components a block can support. */ +static constexpr unsigned int BLOCK_MAX_COMPONENTS { 4 }; -// the sum of weights for one texel. -#define TEXEL_WEIGHT_SUM 16 -#define MAX_DECIMATION_MODES 87 -#define MAX_WEIGHT_MODES 2048 +/** @brief The maximum number of partitions a block can support. */ +static constexpr unsigned int BLOCK_MAX_PARTITIONS { 4 }; -static_assert((MAX_TEXELS_PER_BLOCK % ASTCENC_SIMD_WIDTH) == 0, - "MAX_TEXELS_PER_BLOCK must be multiple of ASTCENC_SIMD_WIDTH"); +/** @brief The number of partitionings, per partition count, suported by the ASTC format. */ +static constexpr unsigned int BLOCK_MAX_PARTITIONINGS { 1024 }; -static_assert((MAX_WEIGHTS_PER_BLOCK % ASTCENC_SIMD_WIDTH) == 0, - "MAX_WEIGHTS_PER_BLOCK must be multiple of ASTCENC_SIMD_WIDTH"); +/** @brief The maximum number of texels a block can support (6x6x6 block). */ +static constexpr unsigned int BLOCK_MAX_TEXELS { 216 }; -static_assert((MAX_WEIGHT_MODES % ASTCENC_SIMD_WIDTH) == 0, - "MAX_WEIGHT_MODES must be multiple of ASTCENC_SIMD_WIDTH"); +/** @brief The maximum number of weights used during partition selection for texel clustering. */ +static constexpr uint8_t BLOCK_MAX_KMEANS_TEXELS { 64 }; -// A high default error value -static const float ERROR_CALC_DEFAULT { 1e30f }; +/** @brief The maximum number of weights a block can support. */ +static constexpr unsigned int BLOCK_MAX_WEIGHTS { 64 }; -/* ============================================================================ - Compile-time tuning parameters -============================================================================ */ -// The max texel count in a block which can try the one partition fast path. -// Default: enabled for 4x4 and 5x4 blocks. -static const unsigned int TUNE_MAX_TEXELS_MODE0_FASTPATH { 24 }; +/** @brief The minimum number of weight bits a candidate encoding must encode. */ +static constexpr unsigned int BLOCK_MIN_WEIGHT_BITS { 24 }; + +/** @brief The maximum number of weight bits a candidate encoding can encode. */ +static constexpr unsigned int BLOCK_MAX_WEIGHT_BITS { 96 }; + +/** @brief The index indicating a bad (unused) block mode in the remap array. */ +static constexpr uint16_t BLOCK_BAD_BLOCK_MODE { 0xFFFFu }; + +/** @brief The number of partition index bits supported by the ASTC format . */ +static constexpr unsigned int PARTITION_INDEX_BITS { 10 }; + +/** @brief The offset of the plane 2 weights in shared weight arrays. */ +static constexpr unsigned int WEIGHTS_PLANE2_OFFSET { BLOCK_MAX_WEIGHTS / 2 }; + +/** @brief The sum of quantized weights for one texel. */ +static constexpr float WEIGHTS_TEXEL_SUM { 16.0f }; + +/** @brief The number of block modes suported by the ASTC format. */ +static constexpr unsigned int WEIGHTS_MAX_BLOCK_MODES { 2048 }; + +/** @brief The number of weight grid decimation modes suported by the ASTC format. */ +static constexpr unsigned int WEIGHTS_MAX_DECIMATION_MODES { 87 }; + +/** @brief The high default error used to initialize error trackers. */ +static constexpr float ERROR_CALC_DEFAULT { 1e30f }; + +/** + * @brief The max texel count in a block which can try the one partition fast path. + * + * This is enabled for 4x4 and 5x4 block sizes. + */ +static constexpr unsigned int TUNE_MAX_TEXELS_MODE0_FASTPATH { 24 }; + +/** + * @brief The maximum number of candidate encodings tested for each encoding mode.. + * + * This can be dynamically reduced by the compression quality preset. + */ +static constexpr unsigned int TUNE_MAX_TRIAL_CANDIDATES { 4 }; + + +static_assert((BLOCK_MAX_TEXELS % ASTCENC_SIMD_WIDTH) == 0, + "BLOCK_MAX_TEXELS must be multiple of ASTCENC_SIMD_WIDTH"); + +static_assert((BLOCK_MAX_WEIGHTS % ASTCENC_SIMD_WIDTH) == 0, + "BLOCK_MAX_WEIGHTS must be multiple of ASTCENC_SIMD_WIDTH"); + +static_assert((WEIGHTS_MAX_BLOCK_MODES % ASTCENC_SIMD_WIDTH) == 0, + "WEIGHTS_MAX_BLOCK_MODES must be multiple of ASTCENC_SIMD_WIDTH"); -// The maximum number of candidate encodings returned for each encoding mode. -// Default: depends on quality preset -static const unsigned int TUNE_MAX_TRIAL_CANDIDATES { 4 }; /* ============================================================================ Parallel execution control @@ -130,21 +149,18 @@ static const unsigned int TUNE_MAX_TRIAL_CANDIDATES { 4 }; * * A multi-threaded processing stage. * * A condition variable so threads can wait for processing completion. * - * The init stage will be executed by the first thread to arrive in the - * critical section, there is no main thread in the thread pool. + * The init stage will be executed by the first thread to arrive in the critical section, there is + * no main thread in the thread pool. * - * The processing stage uses dynamic dispatch to assign task tickets to threads - * on an on-demand basis. Threads may each therefore executed different numbers - * of tasks, depending on their processing complexity. The task queue and the - * task tickets are just counters; the caller must map these integers to an - * actual processing partition in a specific problem domain. + * The processing stage uses dynamic dispatch to assign task tickets to threads on an on-demand + * basis. Threads may each therefore executed different numbers of tasks, depending on their + * processing complexity. The task queue and the task tickets are just counters; the caller must map + * these integers to an actual processing partition in a specific problem domain. * - * The exit wait condition is needed to ensure processing has finished before - * a worker thread can progress to the next stage of the pipeline. Specifically - * a worker may exit the processing stage because there are no new tasks to - * assign to it while other worker threads are still processing. Calling wait() - * will ensure that all other worker have finished before the thread can - * proceed. + * The exit wait condition is needed to ensure processing has finished before a worker thread can + * progress to the next stage of the pipeline. Specifically a worker may exit the processing stage + * because there are no new tasks to assign to it while other worker threads are still processing. + * Calling @c wait() will ensure that all other worker have finished before the thread can proceed. * * The basic usage model: * @@ -215,8 +231,8 @@ class ParallelManager /** * @brief Reset the tracker for a new processing batch. * - * This must be called from single-threaded code before starting the - * multi-threaded procesing operations. + * This must be called from single-threaded code before starting the multi-threaded procesing + * operations. */ void reset() { @@ -230,12 +246,11 @@ class ParallelManager /** * @brief Trigger the pipeline stage init step. * - * This can be called from multi-threaded code. The first thread to - * hit this will process the initialization. Other threads will block - * and wait for it to complete. + * This can be called from multi-threaded code. The first thread to hit this will process the + * initialization. Other threads will block and wait for it to complete. * - * @param init_func Callable which executes the stage initialization. - * Must return the number of tasks in the stage. + * @param init_func Callable which executes the stage initialization. It must return the + * total number of tasks in the stage. */ void init(std::function init_func) { @@ -250,9 +265,8 @@ class ParallelManager /** * @brief Trigger the pipeline stage init step. * - * This can be called from multi-threaded code. The first thread to - * hit this will process the initialization. Other threads will block - * and wait for it to complete. + * This can be called from multi-threaded code. The first thread to hit this will process the + * initialization. Other threads will block and wait for it to complete. * * @param task_count Total number of tasks needing processing. */ @@ -272,11 +286,9 @@ class ParallelManager * Assign up to @c granule tasks to the caller for processing. * * @param granule Maximum number of tasks that can be assigned. - * @param[out] count Actual number of tasks assigned, or zero if - * no tasks were assigned. + * @param[out] count Actual number of tasks assigned, or zero if no tasks were assigned. * - * @return Task index of the first assigned task; assigned tasks - * increment from this. + * @return Task index of the first assigned task; assigned tasks increment from this. */ unsigned int get_task_assignment(unsigned int granule, unsigned int& count) { @@ -294,15 +306,15 @@ class ParallelManager /** * @brief Complete a task assignment. * - * Mark @c count tasks as complete. This will notify all threads blocked - * on @c wait() if this completes the processing of the stage. + * Mark @c count tasks as complete. This will notify all threads blocked on @c wait() if this + * completes the processing of the stage. * * @param count The number of completed tasks. */ void complete_task_assignment(unsigned int count) { - // Note: m_done_count cannot use an atomic without the mutex; this has - // a race between the update here and the wait() for other threads + // Note: m_done_count cannot use an atomic without the mutex; this has a race between the + // update here and the wait() for other threads std::unique_lock lck(m_lock); this->m_done_count += count; if (m_done_count == m_task_count) @@ -324,9 +336,9 @@ class ParallelManager /** * @brief Trigger the pipeline stage term step. * - * This can be called from multi-threaded code. The first thread to - * hit this will process the thread termintion. Caller must have called - * wait() prior to calling this function to ensure processing is complete. + * This can be called from multi-threaded code. The first thread to hit this will process the + * thread termintion. Caller must have called @c wait() prior to calling this function to ensure + * that processing is complete. * * @param term_func Callable which executes the stage termination. */ @@ -341,199 +353,494 @@ class ParallelManager } }; +/* ============================================================================ + Commonly used data structures +============================================================================ */ + +/** + * @brief The ASTC endpoint formats. + * + * Note, the values here are used directly in the encoding in the format so do not rearrange. + */ +enum endpoint_formats +{ + FMT_LUMINANCE = 0, + FMT_LUMINANCE_DELTA = 1, + FMT_HDR_LUMINANCE_LARGE_RANGE = 2, + FMT_HDR_LUMINANCE_SMALL_RANGE = 3, + FMT_LUMINANCE_ALPHA = 4, + FMT_LUMINANCE_ALPHA_DELTA = 5, + FMT_RGB_SCALE = 6, + FMT_HDR_RGB_SCALE = 7, + FMT_RGB = 8, + FMT_RGB_DELTA = 9, + FMT_RGB_SCALE_ALPHA = 10, + FMT_HDR_RGB = 11, + FMT_RGBA = 12, + FMT_RGBA_DELTA = 13, + FMT_HDR_RGB_LDR_ALPHA = 14, + FMT_HDR_RGBA = 15 +}; + +/** + * @brief The ASTC quantization methods. + * + * Note, the values here are used directly in the encoding in the format so do not rearrange. + */ +enum quant_method +{ + QUANT_2 = 0, + QUANT_3 = 1, + QUANT_4 = 2, + QUANT_5 = 3, + QUANT_6 = 4, + QUANT_8 = 5, + QUANT_10 = 6, + QUANT_12 = 7, + QUANT_16 = 8, + QUANT_20 = 9, + QUANT_24 = 10, + QUANT_32 = 11, + QUANT_40 = 12, + QUANT_48 = 13, + QUANT_64 = 14, + QUANT_80 = 15, + QUANT_96 = 16, + QUANT_128 = 17, + QUANT_160 = 18, + QUANT_192 = 19, + QUANT_256 = 20 +}; + +/** + * @brief The number of levels use by an ASTC quantization method. + * + * @param method The quantization method + * + * @return The number of levels used by @c method. + */ +static inline unsigned int get_quant_level(quant_method method) +{ + switch(method) + { + case QUANT_2: return 2; + case QUANT_3: return 3; + case QUANT_4: return 4; + case QUANT_5: return 5; + case QUANT_6: return 6; + case QUANT_8: return 8; + case QUANT_10: return 10; + case QUANT_12: return 12; + case QUANT_16: return 16; + case QUANT_20: return 20; + case QUANT_24: return 24; + case QUANT_32: return 32; + case QUANT_40: return 40; + case QUANT_48: return 48; + case QUANT_64: return 64; + case QUANT_80: return 80; + case QUANT_96: return 96; + case QUANT_128: return 128; + case QUANT_160: return 160; + case QUANT_192: return 192; + case QUANT_256: return 256; + // Unreachable - the enum is fully described + default: return 0; + } +} + +/** + * @brief Computed metrics about a partition in a block. + */ struct partition_metrics { + /** @brief The square of the color range (max - min) spanned by texels in this partition. */ vfloat4 range_sq; + + /** @brief The sum of the error weights for texels in this partition. */ vfloat4 error_weight; - vfloat4 icolor_scale; + + /** @brief The color scale factor used to weight color channels. */ vfloat4 color_scale; + + /** @brief The 1 / color_scale used to avoid divisions. */ + vfloat4 icolor_scale; + + /** @brief The error-weighted average color in the partition. */ vfloat4 avg; + + /** @brief The dominant error-weighted direction in the partition. */ vfloat4 dir; }; +/** + * @brief Computed lines for a a three component analysis. + */ struct partition_lines3 { + /** @brief Line for uncorrelated chroma. */ line3 uncor_line; + + /** @brief Line for correlated chroma, passing though the origin. */ line3 samec_line; + /** @brief Postprocessed line for uncorrelated chroma. */ processed_line3 uncor_pline; + + /** @brief Postprocessed line for correlated chroma, passing though the origin. */ processed_line3 samec_pline; + /** @brief The length of the line for uncorrelated chroma. */ float uncor_line_len; + + /** @brief The length of the line for correlated chroma. */ float samec_line_len; }; -/* - Partition table representation: - For each block size, we have 3 tables, each with 1024 partitionings; - these three tables correspond to 2, 3 and 4 partitions respectively. - For each partitioning, we have: - * a 4-entry table indicating how many texels there are in each of the 4 partitions. - This may be from 0 to a very large value. - * a table indicating the partition index of each of the texels in the block. - Each index may be 0, 1, 2 or 3. - * Each element in the table is an uint8_t indicating partition index (0, 1, 2 or 3) -*/ +/** + * @brief The partition information for a single partition. + * + * ASTC has a total of 1024 candidate partitions for each of 2/3/4 partition counts, although this + * 1024 includes seeds that generate duplicates of other seeds and seeds that generate completely + * empty partitions. These are both valid encodings, but astcenc will skip both during compression + * as they are not useful. + */ struct partition_info { - int partition_count; - uint8_t partition_texel_count[4]; - uint8_t partition_of_texel[MAX_TEXELS_PER_BLOCK]; - uint8_t texels_of_partition[4][MAX_TEXELS_PER_BLOCK]; - uint64_t coverage_bitmaps[4]; + /** @brief The number of partitions in this partitioning. */ + unsigned int partition_count; + + /** + * @brief The number of texels in each partition. + * + * Note that some seeds result in zero texels assigned to a partition are valid, but are skipped + * by this compressor as there is no point spending bits encoding an unused color endpoint. + */ + uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS]; + + /** @brief The partition of each texel in the block. */ + uint8_t partition_of_texel[BLOCK_MAX_TEXELS]; + + /** @brief The list of texels in each partition. */ + uint8_t texels_of_partition[BLOCK_MAX_PARTITIONS][BLOCK_MAX_TEXELS]; + + /** @brief The canonical partition coverage pattern used during block partition search. */ + uint64_t coverage_bitmaps[BLOCK_MAX_PARTITIONS]; }; -/* - In ASTC, we don't necessarily provide a weight for every texel. - As such, for each block size, there are a number of patterns where some texels - have their weights computed as a weighted average of more than 1 weight. - As such, the codec uses a data structure that tells us: for each texel, which - weights it is a combination of for each weight, which texels it contributes to. - The decimation_table is this data structure. +/** + * @brief The weight grid information for a single decimation pattern. + * + * ASTC can store one weight per texel, but is also capable of storing lower resoution weight grids + * that are interpolated during decompression to assign a with to a texel. Storing fewer weights + * can free up a substantial amount of bits that we can then spend on more useful things, such as + * more accurate endpoints and weights, or additional partitions. + * + * This data structure is used to store information about a single weight grid decimation pattern, + * for a single block size. */ -struct decimation_table +struct decimation_info { - // TODO: Make these byte values - int texel_count; - int weight_count; - int weight_x; - int weight_y; - int weight_z; - - uint8_t texel_weight_count[MAX_TEXELS_PER_BLOCK]; // number of indices that go into the calculation for a texel - - // The 4t and t4 tables are the same data, but transposed to allow optimal - // data access patterns depending on how we can unroll loops - alignas(ASTCENC_VECALIGN) float texel_weights_float_4t[4][MAX_TEXELS_PER_BLOCK]; // the weight to assign to each weight - alignas(ASTCENC_VECALIGN) uint8_t texel_weights_4t[4][MAX_TEXELS_PER_BLOCK]; // the weights that go into a texel calculation - alignas(ASTCENC_VECALIGN) uint8_t texel_weights_int_4t[4][MAX_TEXELS_PER_BLOCK]; // the weight to assign to each weight - - uint8_t weight_texel_count[MAX_WEIGHTS_PER_BLOCK]; // the number of texels that a given weight contributes to - - // Stored transposed to give better access patterns - uint8_t weight_texel[MAX_TEXELS_PER_BLOCK][MAX_WEIGHTS_PER_BLOCK]; // the texels that the weight contributes to - alignas(ASTCENC_VECALIGN) float weights_flt[MAX_TEXELS_PER_BLOCK][MAX_WEIGHTS_PER_BLOCK]; // the weights that the weight contributes to a texel. - - // folded data structures: - // * texel_weights_texel[i][j] = texel_weights[weight_texel[i][j]]; - // * texel_weights_float_texel[i][j] = texel_weights_float[weight_texel[i][j]] - uint8_t texel_weights_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK][4]; - float texel_weights_float_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK][4]; + /** @brief The total number of texels in the block. */ + uint8_t texel_count; + + /** @brief The total number of weights stored. */ + uint8_t weight_count; + + /** @brief The number of stored weights in the X dimension. */ + uint8_t weight_x; + + /** @brief The number of stored weights in the Y dimension. */ + uint8_t weight_y; + + /** @brief The number of stored weights in the Z dimension. */ + uint8_t weight_z; + + /** @brief The number of stored weights that contribute to each texel, between 1 and 4. */ + uint8_t texel_weight_count[BLOCK_MAX_TEXELS]; + + /** @brief The weight index of the N weights that need to be interpolated for each texel. */ + uint8_t texel_weights_4t[4][BLOCK_MAX_TEXELS]; + + /** @brief The bilinear interpolation weighting of the N input weights for each texel, between 0 and 16. */ + uint8_t texel_weights_int_4t[4][BLOCK_MAX_TEXELS]; + + /** @brief The bilinear interpolation weighting of the N input weights for each texel, between 0 and 1. */ + alignas(ASTCENC_VECALIGN) float texel_weights_float_4t[4][BLOCK_MAX_TEXELS]; + + /** @brief The number of texels that each stored weight contributes to. */ + uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS]; + + /** @brief The list of weights that contribute to each texel. */ + uint8_t weight_texel[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS]; + + /** @brief The list of weight indices that contribute to each texel. */ + alignas(ASTCENC_VECALIGN) float weights_flt[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS]; + + /** + * @brief Folded structure for faster access: + * texel_weights_texel[i][j][.] = texel_weights[.][weight_texel[i][j]] + */ + uint8_t texel_weights_texel[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS][4]; + + /** + * @brief Folded structure for faster access: + * texel_weights_float_texel[i][j][.] = texel_weights_float[.][weight_texel[i][j]] + */ + float texel_weights_float_texel[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS][4]; }; /** - * @brief Metadata for single block mode for a specific BSD. + * @brief Metadata for single block mode for a specific block size. */ struct block_mode { - int8_t decimation_mode; - int8_t quant_mode; + /** @brief The block mode index in the ASTC encoded form. */ + uint16_t mode_index; + + /** @brief The decimation mode index in the compressor reindexed list. */ + uint8_t decimation_mode; + + /** @brief The weight quantization used by this block mode. */ + uint8_t quant_mode; + + /** @brief Is a dual weight plane used by this block mode? */ uint8_t is_dual_plane : 1; + + /** @brief Is this mode enabled in the current search preset? */ uint8_t percentile_hit : 1; + + /** @brief Is this mode enabled for early fast-path searches in the current search preset? */ uint8_t percentile_always : 1; - int16_t mode_index; + + /** + * @brief Get the weight quantization used by this block mode. + * + * @return The quantization level. + */ + inline quant_method get_weight_quant_mode() const + { + return (quant_method)this->quant_mode; + } }; /** - * @brief Metadata for single decimation mode for a specific BSD. + * @brief Metadata for single decimation mode for a specific block size. */ struct decimation_mode { + /** @brief The max weight precision for 1 plane, or -1 if not supported. */ + // TODO: Try unsigned sentinel to avoid signext on load? int8_t maxprec_1plane; + + /** @brief The max weight precision for 2 planes, or -1 if not supported. */ int8_t maxprec_2planes; + + /** @brief Is this mode enabled in the current search preset? */ uint8_t percentile_hit : 1; + + /** @brief Is this mode enabled for early fast-path searches in the current search preset? */ uint8_t percentile_always : 1; }; /** * @brief Data tables for a single block size. * - * The decimation tables store the information to apply weight grid dimension - * reductions. We only store the decimation modes that are actually needed by - * the current context; many of the possible modes will be unused (too many - * weights for the current block size or disabled by heuristics). The actual - * number of weights stored is @c decimation_mode_count, and the - * @c decimation_modes and @c decimation_tables arrays store the active modes - * contiguously at the start of the array. These entries are not stored in any - * particuar order. - * - * The block mode tables store the unpacked block mode settings. Block modes - * are stored in the compressed block as an 11 bit field, but for any given - * block size and set of compressor heuristics, only a subset of the block - * modes will be used. The actual number of block modes stored is indicated in - * @c block_mode_count, and the @c block_modes array store the active modes - * contiguously at the start of the array. These entries are stored in - * incrementing "packed" value order, which doesn't mean much once unpacked. - * To allow decompressors to reference the packed data efficiently the - * @c block_mode_packed_index array stores the mapping between physical ID and - * the actual remapped array index. + * The decimation tables store the information to apply weight grid dimension reductions. We only + * store the decimation modes that are actually needed by the current context; many of the possible + * modes will be unused (too many weights for the current block size or disabled by heuristics). The + * actual number of weights stored is @c decimation_mode_count, and the @c decimation_modes and + * @c decimation_tables arrays store the active modes contiguously at the start of the array. These + * entries are not stored in any particuar order. + * + * The block mode tables store the unpacked block mode settings. Block modes are stored in the + * compressed block as an 11 bit field, but for any given block size and set of compressor + * heuristics, only a subset of the block modes will be used. The actual number of block modes + * stored is indicated in @c block_mode_count, and the @c block_modes array store the active modes + * contiguously at the start of the array. These entries are stored in incrementing "packed" value + * order, which doesn't mean much once unpacked. To allow decompressors to reference the packed data + * efficiently the @c block_mode_packed_index array stores the mapping between physical ID and the + * actual remapped array index. */ struct block_size_descriptor { - /**< The block X dimension, in texels. */ - int xdim; + /** @brief The block X dimension, in texels. */ + uint8_t xdim; - /**< The block Y dimension, in texels. */ - int ydim; + /** @brief The block Y dimension, in texels. */ + uint8_t ydim; - /**< The block Z dimension, in texels. */ - int zdim; + /** @brief The block Z dimension, in texels. */ + uint8_t zdim; - /**< The block total texel count. */ - int texel_count; + /** @brief The block total texel count. */ + uint8_t texel_count; + /** @brief The number of stored decimation modes. */ + unsigned int decimation_mode_count; - /**< The number of stored decimation modes. */ - int decimation_mode_count; + /** @brief The number of stored block modes. */ + unsigned int block_mode_count; - /**< The active decimation modes, stored in low indices. */ - decimation_mode decimation_modes[MAX_DECIMATION_MODES]; + /** @brief The active decimation modes, stored in low indices. */ + decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES]; - /**< The active decimation tables, stored in low indices. */ - const decimation_table *decimation_tables[MAX_DECIMATION_MODES]; + /** @brief The active decimation tables, stored in low indices. */ + const decimation_info *decimation_tables[WEIGHTS_MAX_DECIMATION_MODES]; + /** @brief The packed block mode array index, or @c BLOCK_BAD_BLOCK_MODE if not active. */ + uint16_t block_mode_packed_index[WEIGHTS_MAX_BLOCK_MODES]; - /**< The number of stored block modes. */ - int block_mode_count; + /** @brief The active block modes, stored in low indices. */ + block_mode block_modes[WEIGHTS_MAX_BLOCK_MODES]; - /**< The active block modes, stored in low indices. */ - block_mode block_modes[MAX_WEIGHT_MODES]; + /** @brief The partion tables for all of the possible partitions. */ + partition_info partitions[(3 * BLOCK_MAX_PARTITIONINGS) + 1]; - /**< The block mode array index, or -1 if not valid in current config. */ - int16_t block_mode_packed_index[MAX_WEIGHT_MODES]; + /** @brief The active texels for k-means partition selection. */ + uint8_t kmeans_texels[BLOCK_MAX_KMEANS_TEXELS]; + /** + * @brief Get the block mode structure for index @c block_mode. + * + * This function can only return block modes that are enabled by the current compressor config. + * Decompression from an arbitrary source should not use this without first checking that the + * packed block mode index is not @c BLOCK_BAD_BLOCK_MODE. + * + * @param block_mode The packed block mode index. + * + * @return The block mode structure. + */ + const block_mode& get_block_mode(unsigned int block_mode) const + { + unsigned int packed_index = this->block_mode_packed_index[block_mode]; + assert(packed_index != BLOCK_BAD_BLOCK_MODE && packed_index < this->block_mode_count); + return block_modes[packed_index]; + } - /**< The texel count for k-means partition selection. */ - int kmeans_texel_count; + /** + * @brief Get the decimation mode structure for index @c decimation_mode. + * + * This function can only return decimation modes that are enabled by the current compressor + * config. The mode array is stored packed, but this is only ever indexed by the packed index + * stored in the @c block_mode and never exists in an unpacked form. + * + * @param decimation_mode The packed decimation mode index. + * + * @return The decimation mode structure. + */ + const decimation_mode& get_decimation_mode(unsigned int decimation_mode) const + { + return this->decimation_modes[decimation_mode]; + } - /**< The active texels for k-means partition selection. */ - int kmeans_texels[MAX_KMEANS_TEXELS]; + /** + * @brief Get the decimation info structure for index @c decimation_mode. + * + * This function can only return decimation modes that are enabled by the current compressor + * config. The mode array is stored packed, but this is only ever indexed by the packed index + * stored in the @c block_mode and never exists in an unpacked form. + * + * @param decimation_mode The packed decimation mode index. + * + * @return The decimation info structure. + */ + const decimation_info& get_decimation_info(unsigned int decimation_mode) const + { + return *this->decimation_tables[decimation_mode]; + } - /**< The partion tables for all of the possible partitions. */ - partition_info partitions[(3 * PARTITION_COUNT) + 1]; + /** + * @brief Get the partition info table for a given partition count. + * + * @param partition_count The number of partitions we want the table for. + * + * @return The pointer to the table of 1024 entries (for 2/3/4 parts) or 1 entry (for 1 part). + */ + const partition_info* get_partition_table(unsigned int partition_count) const + { + if (partition_count == 1) + { + partition_count = 5; + } + unsigned int index = (partition_count - 2) * BLOCK_MAX_PARTITIONINGS; + return this->partitions + index; + } + + /** + * @brief Get the partition info structure for a given partition count and seed. + * + * @param partition_count The number of partitions we want the info for. + * @param index The partition seed (between 0 and 1023). + * + * @return The partition info structure. + */ + const partition_info& get_partition_info(unsigned int partition_count, unsigned int index) const + { + return get_partition_table(partition_count)[index]; + } }; -// data structure representing one block of an image. -// it is expanded to float prior to processing to save some computation time -// on conversions to/from uint8_t (this also allows us to handle HDR textures easily) +/** + * @brief The image data for a single block. + * + * The @c data_[rgba] fields store the image data in an encoded SoA float form designed for easy + * vectorization. Input data is converted to float and stored as values between 0 and 65535. LDR + * data is stored as direct UNORM data, HDR data is stored as LNS data. + * + * The @c rgb_lns and @c alpha_lns fields that assigned a per-texel use of HDR are only used during + * decompression. The current compressor will always use HDR endpoint formats when in HDR mode. + */ +// TODO: Rename this image_block? struct imageblock { - float data_r[MAX_TEXELS_PER_BLOCK]; // the data that we will compress, either linear or LNS (0..65535 in both cases) - float data_g[MAX_TEXELS_PER_BLOCK]; - float data_b[MAX_TEXELS_PER_BLOCK]; - float data_a[MAX_TEXELS_PER_BLOCK]; + /** @brief The input (compress) or output (decompress) data for the red color component. */ + float data_r[BLOCK_MAX_TEXELS]; + + /** @brief The input (compress) or output (decompress) data for the green color component. */ + float data_g[BLOCK_MAX_TEXELS]; + + /** @brief The input (compress) or output (decompress) data for the blue color component. */ + float data_b[BLOCK_MAX_TEXELS]; + + /** @brief The input (compress) or output (decompress) data for the alpha color component. */ + float data_a[BLOCK_MAX_TEXELS]; + /** @brief The original data for texel 0 for constant color block encoding. */ vfloat4 origin_texel; + + /** @brief The min component value of all texels in the block. */ vfloat4 data_min; + + /** @brief The max component value of all texels in the block. */ vfloat4 data_max; - bool grayscale; - uint8_t rgb_lns[MAX_TEXELS_PER_BLOCK]; // 1 if RGB data are being treated as LNS - uint8_t alpha_lns[MAX_TEXELS_PER_BLOCK]; // 1 if Alpha data are being treated as LNS + /** @brief Is this greyscale block where R == G == B for all texels? */ + bool grayscale; + + /** @brief Set to 1 if a texel is using HDR RGB endpoints (decompression only). */ + uint8_t rgb_lns[BLOCK_MAX_TEXELS]; + + /** @brief Set to 1 if a texel is using HDR alpha endpoints (decompression only). */ + uint8_t alpha_lns[BLOCK_MAX_TEXELS]; - int xpos, ypos, zpos; + /** @brief The X position of this block in the input or output image. */ + unsigned int xpos; - inline vfloat4 texel(int index) const + /** @brief The Y position of this block in the input or output image. */ + unsigned int ypos; + + /** @brief The Z position of this block in the input or output image. */ + unsigned int zpos; + + /** + * @brief Get an RGBA texel value from the data. + * + * @param index The texel index. + * + * @return The texel in RGBA component ordering. + */ + inline vfloat4 texel(unsigned int index) const { return vfloat4(data_r[index], data_g[index], @@ -541,166 +848,276 @@ struct imageblock data_a[index]); } - inline vfloat4 texel3(int index) const + /** + * @brief Get an RGB texel value from the data. + * + * @param index The texel index. + * + * @return The texel in RGB0 component ordering. + */ + inline vfloat4 texel3(unsigned int index) const { return vfloat3(data_r[index], data_g[index], data_b[index]); } + + /** + * @brief Get the default alpha value for endpoints that don't store it. + * + * The default depends on whether the alpha endpoint is LDR or HDR. + * + * @return The alpha value in the scaled range used by the compressor. + */ + inline float get_default_alpha() const + { + return this->alpha_lns[0] ? (float)0x7800 : (float)0xFFFF; + } + + /** + * @brief Test if this block is using alpha. + * + * @todo This looks suspect, but matches the original astcenc 1.7 code. This checks that the + * alpha is not constant (no weight needed), NOT that it is 1.0 and not stored as an endpoint. + * Review all uses of this function and check that it is sensible ... + * + * @return @c true if the alpha value is not constant across the block, @c false otherwise. + */ + inline bool is_using_alpha() const + { + return this->data_min.lane<3>() != this->data_max.lane<3>(); + } + + /** + * @brief Test if this block is a luminance block with constant 1.0 alpha. + * + * @return @c true if the block is a luminance block , @c false otherwise. + */ + inline bool is_luminance() const + { + float default_alpha = this->get_default_alpha(); + bool alpha1 = (this->data_min.lane<3>() == default_alpha) && + (this->data_max.lane<3>() == default_alpha); + return this->grayscale && alpha1; + } + + /** + * @brief Test if this block is a luminance block with variable alpha. + * + * @return @c true if the block is a luminance + alpha block , @c false otherwise. + */ + inline bool is_luminancealpha() const + { + float default_alpha = this->get_default_alpha(); + bool alpha1 = (this->data_min.lane<3>() == default_alpha) && + (this->data_max.lane<3>() == default_alpha); + return this->grayscale && !alpha1; + } }; -static inline float imageblock_default_alpha(const imageblock * blk) +/** + * @brief Data structure representing per-texel and per-component error weights for a block. + * + * This structure stores a multiplier for the error weight to apply to each component when computing + * block errors. This can be used as a general purpose technique to to amplify or diminish the + * significance of texels and individual color components, based on what is being stored and the + * compressor heuristics. It can be applied in many different ways, some of which are outlined in + * the description below (this is not exhaustive). + * + * For blocks that span the edge of the texture, the weighting for texels outside of the texture + * bounds can zeroed to maximize the quality of the texels inside the texture. + * + * For textures storing fewer than 4 components the weighting for color components that are unused + * can be zeroed to maximize the quality of the components that are used. This is particularly + * important for two component textures, which must be imported in LLLA format to match the two + * component endpoint encoding. Without manual component weighting to correct significance the "L" + * would be treated as three times more important than A because of the replication. + * + * For HDR textures we can use perceptual weighting which os approximately inverse to the luminance + * of a texel. + * + * For normal maps we can use perceptual weighting which assigns higher weight to low-variability + * regions than to high-variability regions, ensuring smooth surfaces don't pick up artifacts. + * + * For transparent texels we can multiply the RGB weights by the alpha value, ensuring that + * the least transprent texels maintain the highest accuracy. + */ +struct error_weight_block { - return blk->alpha_lns[0] ? (float)0x7800 : (float)0xFFFF; -} + /** @brief The full per texel per component error weights. */ + vfloat4 error_weights[BLOCK_MAX_TEXELS]; -static inline int imageblock_uses_alpha(const imageblock * blk) -{ - return blk->data_min.lane<3>() != blk->data_max.lane<3>(); -} + /** @brief The full per texel per component error weights. */ + float texel_weight[BLOCK_MAX_TEXELS]; -static inline int imageblock_is_lum(const imageblock * blk) -{ - float default_alpha = imageblock_default_alpha(blk); - bool alpha1 = (blk->data_min.lane<3>() == default_alpha) && - (blk->data_max.lane<3>() == default_alpha); - return blk->grayscale && alpha1; -} -static inline int imageblock_is_lumalp(const imageblock * blk) -{ - float default_alpha = imageblock_default_alpha(blk); - bool alpha1 = (blk->data_min.lane<3>() == default_alpha) && - (blk->data_max.lane<3>() == default_alpha); - return blk->grayscale && !alpha1; -} + /** @brief The average of the GBA error weights per texel. */ + float texel_weight_gba[BLOCK_MAX_TEXELS]; + + /** @brief The average of the RBA error weights per texel. */ + float texel_weight_rba[BLOCK_MAX_TEXELS]; + + /** @brief The average of the RGA error weights per texel. */ + float texel_weight_rga[BLOCK_MAX_TEXELS]; + + /** @brief The average of the RGB error weights per texel. */ + float texel_weight_rgb[BLOCK_MAX_TEXELS]; + + + /** @brief The average of the RG error weights per texel. */ + float texel_weight_rg[BLOCK_MAX_TEXELS]; -/* - Data structure representing error weighting for one block of an image. this is used as - a multiplier for the error weight to apply to each color component when computing PSNR. + /** @brief The average of the RB error weights per texel. */ + float texel_weight_rb[BLOCK_MAX_TEXELS]; - This weighting has several uses: it's usable for RA, GA, BA, A weighting, which is useful - for alpha-textures it's usable for HDR textures, where weighting should be approximately inverse to - luminance it's usable for perceptual weighting, where we assign higher weight to low-variability - regions than to high-variability regions. it's usable for suppressing off-edge block content in - case the texture doesn't actually extend to the edge of the block. + /** @brief The average of the GB error weights per texel. */ + float texel_weight_gb[BLOCK_MAX_TEXELS]; - For the default case (everything is evenly weighted), every weight is 1. For the RA,GA,BA,A case, - we multiply the R,G,B weights with that of the alpha. - Putting the same weight in every component should result in the default case. - The following relations should hold: + /** @brief The individual R component error weights per texel. */ + float texel_weight_r[BLOCK_MAX_TEXELS]; - texel_weight_rg[i] = (texel_weight_r[i] + texel_weight_g[i]) / 2 - texel_weight_lum[i] = (texel_weight_r[i] + texel_weight_g[i] + texel_weight_b[i]) / 3 - texel_weight[i] = (texel_weight_r[i] + texel_weight_g[i] + texel_weight_b[i] + texel_weight_a[i] / 4 + /** @brief The individual G component error weights per texel. */ + float texel_weight_g[BLOCK_MAX_TEXELS]; + + /** @brief The individual B component error weights per texel. */ + float texel_weight_b[BLOCK_MAX_TEXELS]; + + /** @brief The individual A component error weights per texel. */ + float texel_weight_a[BLOCK_MAX_TEXELS]; +}; + +/** + * @brief Data structure storing the color endpoints for a block. + * + * @todo Store as interleaved array to get better locality? */ +struct endpoints +{ + /** @brief The number of partition endpoints stored. */ + unsigned int partition_count; -struct error_weight_block + /** @brief The colors for endpoint 0. */ + vfloat4 endpt0[BLOCK_MAX_PARTITIONS]; + + /** @brief The colors for endpoint 1. */ + vfloat4 endpt1[BLOCK_MAX_PARTITIONS]; +}; + +/** + * @brief Data structure storing the color endpoints and weights. + */ +struct endpoints_and_weights { - vfloat4 error_weights[MAX_TEXELS_PER_BLOCK]; + /** @brief True if all active values in weight_error_scale are the same. */ + bool is_constant_weight_error_scale; - float texel_weight[MAX_TEXELS_PER_BLOCK]; + /** @brief The color endpoints. */ + endpoints ep; + + /** @brief The undecimated and unquantized weight for each texel. */ + alignas(ASTCENC_VECALIGN) float weights[BLOCK_MAX_TEXELS]; - float texel_weight_gba[MAX_TEXELS_PER_BLOCK]; - float texel_weight_rba[MAX_TEXELS_PER_BLOCK]; - float texel_weight_rga[MAX_TEXELS_PER_BLOCK]; - float texel_weight_rgb[MAX_TEXELS_PER_BLOCK]; + /** @brief The undecimated and unquantized weight error scaling for each texel. */ + alignas(ASTCENC_VECALIGN) float weight_error_scale[BLOCK_MAX_TEXELS]; +}; - float texel_weight_rg[MAX_TEXELS_PER_BLOCK]; - float texel_weight_rb[MAX_TEXELS_PER_BLOCK]; - float texel_weight_gb[MAX_TEXELS_PER_BLOCK]; - float texel_weight_ra[MAX_TEXELS_PER_BLOCK]; +/** + * @brief Utility storing estimated errors from choosing particular endpoint encodings. + */ +struct encoding_choice_errors +{ + /** @brief Error of using LDR RGB-scale instead of complete endpoints. */ + float rgb_scale_error; - float texel_weight_r[MAX_TEXELS_PER_BLOCK]; - float texel_weight_g[MAX_TEXELS_PER_BLOCK]; - float texel_weight_b[MAX_TEXELS_PER_BLOCK]; - float texel_weight_a[MAX_TEXELS_PER_BLOCK]; + /** @brief Error of using HDR RGB-scale instead of complete endpoints. */ + float rgb_luma_error; + + /** @brief Error of using luminance instead of RGB. */ + float luminance_error; + + /** @brief Error of discarding alpha and using a constant 1.0 alpha. */ + float alpha_drop_error; + + /** @brief Can we use delta offset encoding? */ + bool can_offset_encode; + + /** @brief CAn we use blue contraction encoding? */ + bool can_blue_contract; }; -// enumeration of all the quantization methods we support under this format. -enum quant_method +/** + * @brief Preallocated working buffers, allocated per thread during context creation. + */ +struct alignas(ASTCENC_VECALIGN) compress_fixed_partition_buffers { - QUANT_2 = 0, - QUANT_3 = 1, - QUANT_4 = 2, - QUANT_5 = 3, - QUANT_6 = 4, - QUANT_8 = 5, - QUANT_10 = 6, - QUANT_12 = 7, - QUANT_16 = 8, - QUANT_20 = 9, - QUANT_24 = 10, - QUANT_32 = 11, - QUANT_40 = 12, - QUANT_48 = 13, - QUANT_64 = 14, - QUANT_80 = 15, - QUANT_96 = 16, - QUANT_128 = 17, - QUANT_160 = 18, - QUANT_192 = 19, - QUANT_256 = 20 + /** @brief Ideal endpoints and weights for plane 1. */ + endpoints_and_weights ei1; + + /** @brief Ideal endpoints and weights for plane 2. */ + endpoints_and_weights ei2; + + /** @brief Ideal endpoints and weights for plane 1. */ + endpoints_and_weights eix1[WEIGHTS_MAX_DECIMATION_MODES]; + + /** @brief Ideal endpoints and weights for plane 2. */ + endpoints_and_weights eix2[WEIGHTS_MAX_DECIMATION_MODES]; + + /** @brief Decimated and weight values, rounded to quantization points but not stored packed. */ + alignas(ASTCENC_VECALIGN) float decimated_quantized_weights[2 * WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS]; + + /** @brief Decimated and unquantized weight values. */ + alignas(ASTCENC_VECALIGN) float decimated_weights[2 * WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS]; + + /** @brief Decimated and quantized weight values stored in the packed quantized weight range. */ + alignas(ASTCENC_VECALIGN) float flt_quantized_decimated_quantized_weights[2 * WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS]; + + /** @brief Decimated and quantized weight values stored in the packed quantized weight range. */ + alignas(ASTCENC_VECALIGN) uint8_t u8_quantized_decimated_quantized_weights[2 * WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS]; }; -static inline int get_quant_method_levels(quant_method method) +/** + * @brief Preallocated working buffers, allocated per thread during context creation. + * + * @todo Merge with compress_fixed_partition_buffers? + * @todo Allocated imageblock and scb here too? + */ +struct compress_symbolic_block_buffers { - switch(method) - { - case QUANT_2: return 2; - case QUANT_3: return 3; - case QUANT_4: return 4; - case QUANT_5: return 5; - case QUANT_6: return 6; - case QUANT_8: return 8; - case QUANT_10: return 10; - case QUANT_12: return 12; - case QUANT_16: return 16; - case QUANT_20: return 20; - case QUANT_24: return 24; - case QUANT_32: return 32; - case QUANT_40: return 40; - case QUANT_48: return 48; - case QUANT_64: return 64; - case QUANT_80: return 80; - case QUANT_96: return 96; - case QUANT_128: return 128; - case QUANT_160: return 160; - case QUANT_192: return 192; - case QUANT_256: return 256; - // Unreachable - the enum is fully described - default: return 0; - } -} + /** @brief The error weight block for the current thread. */ + error_weight_block ewb; + + /** @brief The scratch working set for the current thread. */ + compress_fixed_partition_buffers planes; +}; /** * @brief Weight quantization transfer table. * - * ASTC can store texel weights at many quantization levels, so for performance - * we store essential information about each level as a precomputed data - * structure. + * ASTC can store texel weights at many quantization levels, so for performance we store essential + * information about each level as a precomputed data structure. Unquantized weights are integers + * or floats in the range [0, 64]. * - * Unquantized weights are integers in the range [0, 64], or floats [0, 1]. - * - * This structure provides the following information: - * A table, used to estimate the closest quantized - weight for a given floating-point weight. For each quantized weight, the corresponding unquantized - and floating-point values. For each quantized weight, a previous-value and a next-value. + * This structure provides a table, used to estimate the closest quantized weight for a given + * floating-point weight. For each quantized weight, the corresponding unquantized values. For each + * quantized weight, a previous-value and a next-value. */ struct quantization_and_transfer_table { - /** The quantization level used */ + /** @brief The quantization level used */ quant_method method; - /** The unscrambled unquantized value. */ + + /** @brief The unscrambled unquantized value. */ float unquantized_value_unsc[33]; - /** The scrambling order: value[map[i]] == value_unsc[i] */ + + /** @brief The scrambling order: value[map[i]] == value_unsc[i] */ int32_t scramble_map[32]; - /** The scrambled unquantized values. */ + + /** @brief The scrambled unquantized values. */ uint8_t unquantized_value[32]; + /** - * An encoded table of previous-and-next weight values, indexed by the - * current unquantized value. + * @brief A table of previous-and-next weights, indexed by the current unquantized value. * * bits 7:0 = previous-index, unquantized * * bits 15:8 = next-index, unquantized * * bits 23:16 = previous-index, quantized @@ -709,63 +1126,254 @@ struct quantization_and_transfer_table uint32_t prev_next_values[65]; }; + +/** @brief The precomputed quant and transfer table. */ extern const quantization_and_transfer_table quant_and_xfer_tables[12]; -enum endpoint_formats -{ - FMT_LUMINANCE = 0, - FMT_LUMINANCE_DELTA = 1, - FMT_HDR_LUMINANCE_LARGE_RANGE = 2, - FMT_HDR_LUMINANCE_SMALL_RANGE = 3, - FMT_LUMINANCE_ALPHA = 4, - FMT_LUMINANCE_ALPHA_DELTA = 5, - FMT_RGB_SCALE = 6, - FMT_HDR_RGB_SCALE = 7, - FMT_RGB = 8, - FMT_RGB_DELTA = 9, - FMT_RGB_SCALE_ALPHA = 10, - FMT_HDR_RGB = 11, - FMT_RGBA = 12, - FMT_RGBA_DELTA = 13, - FMT_HDR_RGB_LDR_ALPHA = 14, - FMT_HDR_RGBA = 15 -}; +/** @brief The block is an error block, and will return error color or NaN. */ +static constexpr uint8_t SYM_BTYPE_ERROR { 0 }; + +/** @brief The block is a constant color block using FP16 colors. */ +static constexpr uint8_t SYM_BTYPE_CONST_F16 { 1 }; + +/** @brief The block is a constant color block using UNORM16 colors. */ +static constexpr uint8_t SYM_BTYPE_CONST_U16 { 2 }; + +/** @brief The block is a normal non-constant color block. */ +static constexpr uint8_t SYM_BTYPE_NONCONST { 3 }; + +/** + * @brief A symbolic representation of a compressed block. + * + * The symbolic representation stores the unpacked content of a single + * @c physical_compressed_block, in a form which is much easier to access for + * the rest of the compressor code. + */ +struct symbolic_compressed_block +{ + /** @brief The block type, one of the @c SYM_BTYPE_* constants. */ + uint8_t block_type; + + /** @brief The number of partitions; valid for @c NONCONST blocks. */ + uint8_t partition_count; + + /** @brief Non-zero if the color formats matched; valid for @c NONCONST blocks. */ + // TODO: Do we need to store this? + uint8_t color_formats_matched; + + /** @brief The plane 2 color component, or -1 if single plane; valid for @c NONCONST blocks. */ + // Try unsigned sentintel to avoid signext on load + int8_t plane2_component; + + /** @brief The block mode; valid for @c NONCONST blocks. */ + uint16_t block_mode; + + /** @brief The partition index; valid for @c NONCONST blocks if 2 or more partitions. */ + uint16_t partition_index; + + /** @brief The endpoint color formats for each partition; valid for @c NONCONST blocks. */ + uint8_t color_formats[BLOCK_MAX_PARTITIONS]; + + /** @brief The endpoint color formats for each partition; valid for @c NONCONST blocks. */ + quant_method quant_mode; + + /** @brief The error of the current encoding; valid for @c NONCONST blocks. */ + float errorval; + + // We can't have both of these at the same time + union { + /** @brief The constant color; valid for @c CONST blocks. */ + int constant_color[BLOCK_MAX_COMPONENTS]; + /** @brief The quantized endpoint color pairs; valid for @c NONCONST blocks. */ + uint8_t color_values[BLOCK_MAX_PARTITIONS][8]; + }; + + /** @brief The quantized and decimated weights. + * + * If dual plane, the second plane starts at @c weights[WEIGHTS_PLANE2_OFFSET]. + */ + uint8_t weights[BLOCK_MAX_WEIGHTS]; + + /** + * @brief Get the weight quantization used by this block mode. + * + * @return The quantization level. + */ + inline quant_method get_color_quant_mode() const + { + return this->quant_mode; + } +}; + +/** + * @brief A physical representation of a compressed block. + * + * The physical representation stores the raw bytes of the format in memory. + */ +struct physical_compressed_block +{ + /** @brief The ASTC encoded data for a single block. */ + uint8_t data[16]; +}; + + +/** + * @brief Parameter structure for @c compute_pixel_region_variance(). + * + * This function takes a structure to avoid spilling arguments to the stack on every function + * invocation, as there are a lot of parameters. + */ +struct pixel_region_variance_args +{ + /** @brief The image to analyze. */ + const astcenc_image* img; + + /** @brief The RGB component power adjustment. */ + float rgb_power; + + /** @brief The alpha component power adjustment. */ + float alpha_power; + + /** @brief The component swizzle pattern. */ + astcenc_swizzle swz; + + /** @brief Should the algorithm bother with Z axis processing? */ + bool have_z; + + /** @brief The kernel radius for average and variance. */ + unsigned int avg_var_kernel_radius; + + /** @brief The kernel radius for alpha processing. */ + unsigned int alpha_kernel_radius; + + /** @brief The X dimension of the working data to process. */ + unsigned int size_x; + + /** @brief The Y dimension of the working data to process. */ + unsigned int size_y; + + /** @brief The Z dimension of the working data to process. */ + unsigned int size_z; + + /** @brief The X position of first src and dst data in the data set. */ + unsigned int offset_x; + + /** @brief The Y position of first src and dst data in the data set. */ + unsigned int offset_y; + + /** @brief The Z position of first src and dst data in the data set. */ + unsigned int offset_z; + + /** @brief The working memory buffer. */ + vfloat4 *work_memory; +}; + +/** + * @brief Parameter structure for @c compute_averages_and_variances_proc(). + */ +struct avg_var_args +{ + /** @brief The arguments for the nested variance computation. */ + pixel_region_variance_args arg; + + // The above has a reference to the image altread? + /** @brief The image X dimensions. */ + unsigned int img_size_x; + + /** @brief The image Y dimensions. */ + unsigned int img_size_y; + + /** @brief The image Z dimensions. */ + unsigned int img_size_z; + + /** @brief The maximum working block dimensions in X and Y dimensions. */ + unsigned int blk_size_xy; + + /** @brief The maximum working block dimensions in Z dimensions. */ + unsigned int blk_size_z; + + /** @brief The working block memory size. */ + unsigned int work_memory_size; +}; + +#if defined(ASTCENC_DIAGNOSTICS) +class TraceLog; // See astcenc_diagnostic_trace for details. +#endif + +/** + * @brief The astcenc compression context. + */ +struct astcenc_context +{ + /** @brief The configuration this context was created with. */ + astcenc_config config; + + /** @brief The thread count supported by this context. */ + unsigned int thread_count; + + /** @brief The block size descriptor this context was created with. */ + block_size_descriptor* bsd; + + /* + * Fields below here are not needed in a decompress-only build, but some remain as they are + * small and it avoids littering the code with #ifdefs. The most significant contributors to + * large structure size are omitted. + */ + + /** @brief The input images averages table, may be @c nullptr if not needed. */ + vfloat4 *input_averages; + + /** @brief The input image RGBA channel variances table, may be @c nullptr if not needed. */ + vfloat4 *input_variances; + + /** @brief The input image alpha channel variances table, may be @c nullptr if not needed. */ + float *input_alpha_averages; + + + /** @brief The scratch workign buffers, one per thread (see @c thread_count). */ + compress_symbolic_block_buffers* working_buffers; + +#if !defined(ASTCENC_DECOMPRESS_ONLY) + /** @brief The pixel region and variance worker arguments. */ + avg_var_args avg_var_preprocess_args; + + /** @brief The per-texel deblocking weights for the current block size. */ + // TODO: Move to the BSD? + float deblock_weights[BLOCK_MAX_TEXELS]; + + /** @brief The parallel manager for averages and variances computation. */ + ParallelManager manage_avg_var; + + /** @brief The parallel manager for compression. */ + ParallelManager manage_compress; +#endif -struct symbolic_compressed_block -{ - int error_block; // 1 marks error block, 0 marks non-error-block. - int block_mode; // 0 to 2047. Negative value marks constant-color block (-1: FP16, -2:UINT16) - int partition_count; // 1 to 4; Zero marks a constant-color block. - int partition_index; // 0 to 1023 - int color_formats[4]; // color format for each endpoint color pair. - int color_formats_matched; // color format for all endpoint pairs are matched. - int color_quant_level; - int plane2_component; // color component for second plane of weights - - // TODO: Under what circumstances is this ever more than 8 (4 pairs) colors - int color_values[4][12]; // quantized endpoint color pairs. - int constant_color[4]; // constant-color, as FP16 or UINT16. Used for constant-color blocks only. - // Quantized and decimated weights. In the case of dual plane, the second - // index plane starts at weights[PLANE2_WEIGHTS_OFFSET] - float errorval; // The error of the current encoding - uint8_t weights[MAX_WEIGHTS_PER_BLOCK]; -}; + /** @brief The parallel manager for decompression. */ + ParallelManager manage_decompress; -struct physical_compressed_block -{ - uint8_t data[16]; +#if defined(ASTCENC_DIAGNOSTICS) + /** + * @brief The diagnostic trace logger. + * + * Note that this is a singleton, so can only be used in single threaded mode. It only exists + * here so we have a reference to close the file at the end of the capture. + */ + TraceLog* trace_log; +#endif }; /* ============================================================================ - Functions and data pertaining to quantization and encoding + Functionality for managing block sizes and partition tables. ============================================================================ */ +// TODO: Make C++ constructor/destructor? + /** * @brief Populate the block size descriptor for the target block size. * - * This will also initialize the partition table metadata, which is stored - * as part of the BSD structure. All initialized block size descriptors must be - * terminated using term_block_size_descriptor to correctly free resources. + * This will also initialize the partition table metadata, which is stored as part of the BSD + * structure. All initialized block size descriptors must be terminated using a call to + * @c term_block_size_descriptor() to free resources. * * @param x_texels The number of texels in the block X dimension. * @param y_texels The number of texels in the block Y dimension. @@ -775,9 +1383,9 @@ struct physical_compressed_block * @param[out] bsd The descriptor to initialize. */ void init_block_size_descriptor( - int x_texels, - int y_texels, - int z_texels, + unsigned int x_texels, + unsigned int y_texels, + unsigned int z_texels, bool can_omit_modes, float mode_cutoff, block_size_descriptor& bsd); @@ -801,22 +1409,11 @@ void term_block_size_descriptor( void init_partition_tables( block_size_descriptor& bsd); -static inline const partition_info *get_partition_table( - const block_size_descriptor* bsd, - int partition_count -) { - if (partition_count == 1) { - partition_count = 5; - } - int index = (partition_count - 2) * PARTITION_COUNT; - return bsd->partitions + index; -} - /** * @brief Get the percentile table for 2D block modes. * - * This is an empirically determined prioritization of which block modes to - * use in the search in terms of their centile (lower centiles = more useful). + * This is an empirically determined prioritization of which block modes to use in the search in + * terms of their centile (lower centiles = more useful). * * Returns a dynamically allocated array; caller must free with delete[]. * @@ -826,8 +1423,8 @@ static inline const partition_info *get_partition_table( * @return The unpacked table. */ const float *get_2d_percentile_table( - int xdim, - int ydim); + unsigned int xdim, + unsigned int ydim); /** * @brief Query if a 2D block size is legal. @@ -835,8 +1432,8 @@ const float *get_2d_percentile_table( * @return True if legal, false otherwise. */ bool is_legal_2d_block_size( - int xdim, - int ydim); + unsigned int xdim, + unsigned int ydim); /** * @brief Query if a 3D block size is legal. @@ -844,24 +1441,50 @@ bool is_legal_2d_block_size( * @return True if legal, false otherwise. */ bool is_legal_3d_block_size( - int xdim, - int ydim, - int zdim); + unsigned int xdim, + unsigned int ydim, + unsigned int zdim); -// *********************************************************** -// functions and data pertaining to quantization and encoding -// ********************************************************** +/* ============================================================================ + Functionality for managing BISE quantization and unquantization. +============================================================================ */ +/** + * @brief The precomputed table for quantizing color values. + * + * Indexed by [quant_mode][data_value]. + */ extern const uint8_t color_quant_tables[21][256]; + +/** + * @brief The precomputed table for unquantizing color values. + * + * Indexed by [quant_mode][data_value]. + */ extern const uint8_t color_unquant_tables[21][256]; + +/** + * @brief The precomputed quant mode storage table. + * + * Indexing by [integercount/2][bits] gives us the quantization level for a given integer count and + * number of compressed storage bits. Returns -1 for cases where the requested integer count cannot + * ever fit in the supplied storage size. + */ extern int8_t quant_mode_table[17][128]; +/** + * @brief Initialize the quant mode table. + * + * This is stored in global memory so this only needs to be done once, but is typically done + * whenever a new context is created. + */ +void init_quant_mode_table(); + /** * @brief Encode a packed string using BISE. * - * Note that BISE can return strings that are not a whole number of bytes - * in length, and ASTC can start storing strings in a block at arbitrary bit - * offsets in the encoded data. + * Note that BISE can return strings that are not a whole number of bytes in length, and ASTC can + * start storing strings in a block at arbitrary bit offsets in the encoded data. * * @param quant_level The BISE alphabet size. * @param character_count The number of characters in the string. @@ -871,16 +1494,16 @@ extern int8_t quant_mode_table[17][128]; */ void encode_ise( quant_method quant_level, - int character_count, + unsigned int character_count, const uint8_t* input_data, uint8_t* output_data, - int bit_offset); + unsigned int bit_offset); /** * @brief Decode a packed string using BISE. * - * Note that BISE input strings are not a whole number of bytes in length, and - * ASTC can start strings at arbitrary bit offsets in the encoded data. + * Note that BISE input strings are not a whole number of bytes in length, and ASTC can start + * strings at arbitrary bit offsets in the encoded data. * * @param quant_level The BISE alphabet size. * @param character_count The number of characters in the string. @@ -890,48 +1513,49 @@ void encode_ise( */ void decode_ise( quant_method quant_level, - int character_count, + unsigned int character_count, const uint8_t* input_data, uint8_t* output_data, - int bit_offset); + unsigned int bit_offset); /** * @brief Return the number of bits needed to encode an ISE sequence. * - * This implementation assumes that the @c quant level is untrusted, given it - * may come from random data being decompressed, so we return an unencodable - * size if that is the case. + * This implementation assumes that the @c quant level is untrusted, given it may come from random + * data being decompressed, so we return an arbitrary unencodable size if that is the case. * * @param character_count The number of items in the sequence. * @param quant_level The desired quantization level. * * @return The number of bits needed to encode the BISE string. */ -int get_ise_sequence_bitcount( - int character_count, +unsigned int get_ise_sequence_bitcount( + unsigned int character_count, quant_method quant_level); -void build_quant_mode_table(void); - -// ********************************************** -// functions and data pertaining to partitioning -// ********************************************** +/* ============================================================================ + Functionality for managing color partitioning. +============================================================================ */ /** - * @brief Compute averages and dominant directions for each partition in a 4 component texture. + * @brief Compute averages and dominant directions for each partition in a 2 component texture. * - * @param pi The partition info for the current trial. - * @param blk The image block color data to be compressed. - * @param ewb The image block weighted error data. - * @param[out] pm The output partition metrics. - * - Only pi.partition_count array entries actually get initialized. - * - Direction vectors @c pm.dir are not normalized. + * @param pi The partition info for the current trial. + * @param blk The image block color data to be compressed. + * @param ewb The image block weighted error data. + * @param component1 The first component included in the analysis. + * @param component2 The second component included in the analysis. + * @param[out] pm The output partition metrics. + * - Only pi.partition_count array entries actually get initialized. + * - Direction vectors @c pm.dir are not normalized. */ -void compute_avgs_and_dirs_4_comp( +void compute_avgs_and_dirs_2_comp( const partition_info& pi, const imageblock& blk, const error_weight_block& ewb, - partition_metrics pm[4]); + unsigned int component1, + unsigned int component2, + partition_metrics pm[BLOCK_MAX_PARTITIONS]); /** * @brief Compute averages and dominant directions for each partition in a 3 component texture. @@ -948,82 +1572,78 @@ void compute_avgs_and_dirs_3_comp( const partition_info& pi, const imageblock& blk, const error_weight_block& ewb, - int omitted_component, - partition_metrics pm[4]); + unsigned int omitted_component, + partition_metrics pm[BLOCK_MAX_PARTITIONS]); /** - * @brief Compute averages and dominant directions for each partition in a 2 component texture. + * @brief Compute averages and dominant directions for each partition in a 4 component texture. * - * @param pi The partition info for the current trial. - * @param blk The image block color data to be compressed. - * @param ewb The image block weighted error data. - * @param component1 The first component included in the analysis. - * @param component2 The second component included in the analysis. - * @param[out] pm The output partition metrics. - * - Only pi.partition_count array entries actually get initialized. - * - Direction vectors @c pm.dir are not normalized. + * @param pi The partition info for the current trial. + * @param blk The image block color data to be compressed. + * @param ewb The image block weighted error data. + * @param[out] pm The output partition metrics. + * - Only pi.partition_count array entries actually get initialized. + * - Direction vectors @c pm.dir are not normalized. */ -void compute_avgs_and_dirs_2_comp( +void compute_avgs_and_dirs_4_comp( const partition_info& pi, const imageblock& blk, const error_weight_block& ewb, - int component1, - int component2, - partition_metrics pm[4]); + partition_metrics pm[BLOCK_MAX_PARTITIONS]); /** - * @brief Compute the RGBA error for uncorrelated and same chroma projections. + * @brief Compute the RGB error for uncorrelated and same chroma projections. * * The output of compute averages and dirs is post processed to define two lines, both of which go * through the mean-color-value. One line has a direction defined by the dominant direction; this * is used to assess the error from using an uncorrelated color representation. The other line goes - * through (0,0,0,1) and is used to assess the error from using an RGBS color representation. + * through (0,0,0) and is used to assess the error from using an RGBS color representation. * * This function computes the squared error when using these two representations. * - * @param pi The partition info for the current trial. - * @param blk The image block color data to be compressed. - * @param ewb The image block weighted error data. - * @param uncor_plines Processed uncorrelated partition lines for each partition. - * @param samec_plines Processed same chroma partition lines for each partition. - * @param[out] uncor_lengths The length of each components deviation from the line. - * @param[out] samec_lengths The length of each components deviation from the line. - * @param[out] uncor_error The cumulative error for using the uncorrelated line. - * @param[out] samec_error The cumulative error for using the same chroma line. + * @param pi The partition info for the current trial. + * @param blk The image block color data to be compressed. + * @param ewb The image block weighted error data. + * @param[in,out] plines Processed line inputs, and line length outputs. + * @param[out] uncor_error The cumulative error for using the uncorrelated line. + * @param[out] samec_error The cumulative error for using the same chroma line. */ -void compute_error_squared_rgba( +void compute_error_squared_rgb( const partition_info& pi, const imageblock& blk, const error_weight_block& ewb, - const processed_line4 uncor_plines[4], - const processed_line4 samec_plines[4], - float uncor_lengths[4], - float samec_lengths[4], + partition_lines3 plines[BLOCK_MAX_PARTITIONS], float& uncor_error, float& samec_error); /** - * @brief Compute the RGB error for uncorrelated and same chroma projections. + * @brief Compute the RGBA error for uncorrelated and same chroma projections. * * The output of compute averages and dirs is post processed to define two lines, both of which go * through the mean-color-value. One line has a direction defined by the dominant direction; this * is used to assess the error from using an uncorrelated color representation. The other line goes - * through (0,0,0) and is used to assess the error from using an RGBS color representation. + * through (0,0,0,1) and is used to assess the error from using an RGBS color representation. * * This function computes the squared error when using these two representations. * - * @param pi The partition info for the current trial. - * @param blk The image block color data to be compressed. - * @param ewb The image block weighted error data. - * @param[in,out] plines Processed line inputs, and line length outputs. - * @param[out] uncor_error The cumulative error for using the uncorrelated line. - * @param[out] samec_error The cumulative error for using the same chroma line. + * @param pi The partition info for the current trial. + * @param blk The image block color data to be compressed. + * @param ewb The image block weighted error data. + * @param uncor_plines Processed uncorrelated partition lines for each partition. + * @param samec_plines Processed same chroma partition lines for each partition. + * @param[out] uncor_lengths The length of each components deviation from the line. + * @param[out] samec_lengths The length of each components deviation from the line. + * @param[out] uncor_error The cumulative error for using the uncorrelated line. + * @param[out] samec_error The cumulative error for using the same chroma line. */ -void compute_error_squared_rgb( +void compute_error_squared_rgba( const partition_info& pi, const imageblock& blk, const error_weight_block& ewb, - partition_lines3 plines[4], + const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS], + const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS], + float uncor_lengths[BLOCK_MAX_PARTITIONS], + float samec_lengths[BLOCK_MAX_PARTITIONS], float& uncor_error, float& samec_error); @@ -1045,113 +1665,60 @@ void compute_error_squared_rgb( * @param[out] best_partition_samec The best partition for correlated chroma. * @param[out] best_partition_dualplane The best partition for dual plane, but may be @c nullptr. */ -void find_best_partitionings( +void find_best_partition_candidates( const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, - int partition_count, - int partition_search_limit, - int& best_partition_uncor, - int& best_partition_samec, - int* best_partition_dualplane); - -/** - * @brief Use k-means clustering to compute a partition ordering for a block.. - * - * @param bsd The block size information. - * @param blk The image block color data to compress. - * @param partition_count The desired number of partitions in the block. - * @param[out] partition_ordering The list of recommended partition indices, in priority order. - */ -void kmeans_compute_partition_ordering( - const block_size_descriptor& bsd, - const imageblock& blk, - int partition_count, - int partition_ordering[PARTITION_COUNT]); + unsigned int partition_count, + unsigned int partition_search_limit, + unsigned int& best_partition_uncor, + unsigned int& best_partition_samec, + unsigned int* best_partition_dualplane); -// ********************************************************* -// functions and data pertaining to images and imageblocks -// ********************************************************* +/* ============================================================================ + Functionality for managing images and image related data. +============================================================================ */ /** - * @brief Parameter structure for compute_pixel_region_variance(). + * @brief Setup computation of regional averages and variances in an image. * - * This function takes a structure to avoid spilling arguments to the stack - * on every function invocation, as there are a lot of parameters. - */ -struct pixel_region_variance_args -{ - /** The image to analyze. */ - const astcenc_image* img; - /** The RGB component power adjustment. */ - float rgb_power; - /** The alpha component power adjustment. */ - float alpha_power; - /** The component swizzle pattern. */ - astcenc_swizzle swz; - /** Should the algorithm bother with Z axis processing? */ - bool have_z; - /** The kernel radius for average and variance. */ - int avg_var_kernel_radius; - /** The kernel radius for alpha processing. */ - int alpha_kernel_radius; - /** The size of the working data to process. */ - int size_x; - int size_y; - int size_z; - /** The position of first src and dst data in the data set. */ - int offset_x; - int offset_y; - int offset_z; - /** The working memory buffer. */ - vfloat4 *work_memory; -}; - -/** - * @brief Parameter structure for compute_averages_and_variances_proc(). - */ -struct avg_var_args -{ - /** The arguments for the nested variance computation. */ - pixel_region_variance_args arg; - /** The image dimensions. */ - int img_size_x; - int img_size_y; - int img_size_z; - /** The maximum working block dimensions. */ - int blk_size_xy; - int blk_size_z; - /** The working block memory size. */ - int work_memory_size; -}; - -/** - * @brief Compute regional averages and variances in an image. + * This must be done by only a single thread per image, before any thread calls + * @c compute_averages_and_variances(). * - * Results are written back into img->input_averages, img->input_variances, - * and img->input_alpha_averages. + * Results are written back into @c img->input_averages, @c img->input_variances, + * and @c img->input_alpha_averages. * - * @param img The input image data, also holds output data. - * @param rgb_power The RGB component power. - * @param alpha_power The A component power. - * @param avg_var_kernel_radius The kernel radius (in pixels) for avg and var. - * @param alpha_kernel_radius The kernel radius (in pixels) for alpha mods. - * @param swz Input data component swizzle. - * @param arg The pixel region arguments for this thread. - * @param ag The average variance arguments for this thread. + * @param img The input image data, also holds output data. + * @param rgb_power The RGB component power. + * @param alpha_power The A component power. + * @param avg_var_kernel_radius The kernel radius (in pixels) for avg and var. + * @param alpha_kernel_radius The kernel radius (in pixels) for alpha mods. + * @param swz Input data component swizzle. + * @param[out] ag The average variance arguments to init. * * @return The number of tasks in the processing stage. */ unsigned int init_compute_averages_and_variances( - astcenc_image& img, + const astcenc_image& img, float rgb_power, float alpha_power, - int avg_var_kernel_radius, - int alpha_kernel_radius, + unsigned int avg_var_kernel_radius, + unsigned int alpha_kernel_radius, const astcenc_swizzle& swz, - pixel_region_variance_args& arg, avg_var_args& ag); +/** + * @brief Compute regional averages and variances. + * + * This function can be called by multiple threads, but only after a single thread calls the setup + * function @c init_compute_averages_and_variances(). + * + * Results are written back into @c img->input_averages, @c img->input_variances, + * and @c img->input_alpha_averages. + * + * @param[out] ctx The context. + * @param ag The average and variance arguments created during setup. + */ void compute_averages_and_variances( astcenc_context& ctx, const avg_var_args& ag); @@ -1173,9 +1740,9 @@ void fetch_imageblock( const astcenc_image& img, imageblock& blk, const block_size_descriptor& bsd, - int xpos, - int ypos, - int zpos, + unsigned int xpos, + unsigned int ypos, + unsigned int zpos, const astcenc_swizzle& swz); /** @@ -1193,40 +1760,21 @@ void write_imageblock( astcenc_image& img, const imageblock& blk, const block_size_descriptor& bsd, - int xpos, - int ypos, - int zpos, + unsigned int xpos, + unsigned int ypos, + unsigned int zpos, const astcenc_swizzle& swz); -// *********************************************************** -// functions pertaining to computing texel weights for a block -// *********************************************************** -struct endpoints -{ - int partition_count; - vfloat4 endpt0[4]; - vfloat4 endpt1[4]; -}; - -struct endpoints_and_weights -{ - endpoints ep; - - alignas(ASTCENC_VECALIGN) float weights[MAX_TEXELS_PER_BLOCK]; - - /**< True if all active values in weight_error_scale are the same. */ - bool is_constant_weight_error_scale; - - alignas(ASTCENC_VECALIGN) float weight_error_scale[MAX_TEXELS_PER_BLOCK]; -}; +/* ============================================================================ + Functionality for computing endpoint colors and weights for a block. +============================================================================ */ /** * @brief Compute ideal endpoint colors and weights for 1 plane of weights. * - * The ideal endpoints define a color line for the partition. For each texel - * the ideal weight defines an exact position on the partition color line. We - * can then use these to assess the error introduced by removing and quantizing - * the weight grid. + * The ideal endpoints define a color line for the partition. For each texel the ideal weight + * defines an exact position on the partition color line. We can then use these to assess the error + * introduced by removing and quantizing the weight grid. * * @param bsd The block size information. * @param blk The image block color data to compress. @@ -1234,7 +1782,7 @@ struct endpoints_and_weights * @param pi The partition info for the current trial. * @param[out] ei The endpoint and weight values. */ -void compute_endpoints_and_ideal_weights_1plane( +void compute_ideal_colors_and_weights_1plane( const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, @@ -1244,10 +1792,9 @@ void compute_endpoints_and_ideal_weights_1plane( /** * @brief Compute ideal endpoint colors and weights for 2 planes of weights. * - * The ideal endpoints define a color line for the partition. For each texel - * the ideal weight defines an exact position on the partition color line. We - * can then use these to assess the error introduced by removing and quantizing - * the weight grid. + * The ideal endpoints define a color line for the partition. For each texel the ideal weight + * defines an exact position on the partition color line. We can then use these to assess the error + * introduced by removing and quantizing the weight grid. * * @param bsd The block size information. * @param blk The image block color data to compress. @@ -1257,12 +1804,12 @@ void compute_endpoints_and_ideal_weights_1plane( * @param[out] ei1 The endpoint and weight values for plane 1. * @param[out] ei2 The endpoint and weight values for plane 2. */ -void compute_endpoints_and_ideal_weights_2planes( +void compute_ideal_colors_and_weights_2planes( const block_size_descriptor& bsd, const imageblock& blk, const error_weight_block& ewb, const partition_info& pi, - int plane2_component, + unsigned int plane2_component, endpoints_and_weights& ei1, endpoints_and_weights& ei2); @@ -1279,24 +1826,24 @@ void compute_endpoints_and_ideal_weights_2planes( * * @param eai_in The non-decimated endpoints and weights. * @param eai_out A copy of eai_in we can modify later for refinement. - * @param dt The selected decimation table. + * @param di The selected weight decimation. * @param[out] weight_set The output decimated weight set. * @param[out] weights The output decimated weights. */ -void compute_ideal_weights_for_decimation_table( +void compute_ideal_weights_for_decimation( const endpoints_and_weights& eai_in, endpoints_and_weights& eai_out, - const decimation_table& dt, + const decimation_info& di, float* weight_set, float* weights); /** * @brief Compute the optimal quantized weights for a decimation table. * - * We test the two closest weight indices in the allowed quantization range - * and keep the weight that is the closest match. + * We test the two closest weight indices in the allowed quantization range and keep the weight that + * is the closest match. * - * @param dt The selected decimation table. + * @param di The selected weight decimation. * @param low_bound The lowest weight allowed. * @param high_bound The highest weight allowed. * @param weight_set_in The ideal weight set. @@ -1304,46 +1851,42 @@ void compute_ideal_weights_for_decimation_table( * @param[out] quantized_weight_set The output quantized weight as encoded int. * @param quant_level The desired weight quant level. */ -void compute_quantized_weights_for_decimation_table( - const decimation_table& dt, +void compute_quantized_weights_for_decimation( + const decimation_info& di, float low_bound, float high_bound, const float* weight_set_in, float* weight_set_out, uint8_t* quantized_weight_set, - int quant_level); - -/********************************* - Utilties for bilinear filtering -*********************************/ + quant_method quant_level); /** * @brief Compute the infilled weight for a texel index in a decimated grid. */ static inline float bilinear_infill( - const decimation_table& dt, + const decimation_info& di, const float* weights, - int index + unsigned int index ) { - return (weights[dt.texel_weights_4t[0][index]] * dt.texel_weights_float_4t[0][index] + - weights[dt.texel_weights_4t[1][index]] * dt.texel_weights_float_4t[1][index]) + - (weights[dt.texel_weights_4t[2][index]] * dt.texel_weights_float_4t[2][index] + - weights[dt.texel_weights_4t[3][index]] * dt.texel_weights_float_4t[3][index]); + return (weights[di.texel_weights_4t[0][index]] * di.texel_weights_float_4t[0][index] + + weights[di.texel_weights_4t[1][index]] * di.texel_weights_float_4t[1][index]) + + (weights[di.texel_weights_4t[2][index]] * di.texel_weights_float_4t[2][index] + + weights[di.texel_weights_4t[3][index]] * di.texel_weights_float_4t[3][index]); } /** * @brief Compute the infilled weight for N texel indices in a decimated grid. */ static inline vfloat bilinear_infill_vla( - const decimation_table& dt, + const decimation_info& di, const float* weights, - int index + unsigned int index ) { // Load the bilinear filter texel weight indexes in the decimated grid - vint weight_idx0 = vint(dt.texel_weights_4t[0] + index); - vint weight_idx1 = vint(dt.texel_weights_4t[1] + index); - vint weight_idx2 = vint(dt.texel_weights_4t[2] + index); - vint weight_idx3 = vint(dt.texel_weights_4t[3] + index); + vint weight_idx0 = vint(di.texel_weights_4t[0] + index); + vint weight_idx1 = vint(di.texel_weights_4t[1] + index); + vint weight_idx2 = vint(di.texel_weights_4t[2] + index); + vint weight_idx3 = vint(di.texel_weights_4t[3] + index); // Load the bilinear filter weights from the decimated grid vfloat weight_val0 = gatherf(weights, weight_idx0); @@ -1352,10 +1895,10 @@ static inline vfloat bilinear_infill_vla( vfloat weight_val3 = gatherf(weights, weight_idx3); // Load the weight contribution factors for each decimated weight - vfloat tex_weight_float0 = loada(dt.texel_weights_float_4t[0] + index); - vfloat tex_weight_float1 = loada(dt.texel_weights_float_4t[1] + index); - vfloat tex_weight_float2 = loada(dt.texel_weights_float_4t[2] + index); - vfloat tex_weight_float3 = loada(dt.texel_weights_float_4t[3] + index); + vfloat tex_weight_float0 = loada(di.texel_weights_float_4t[0] + index); + vfloat tex_weight_float1 = loada(di.texel_weights_float_4t[1] + index); + vfloat tex_weight_float2 = loada(di.texel_weights_float_4t[2] + index); + vfloat tex_weight_float3 = loada(di.texel_weights_float_4t[3] + index); // Compute the bilinear interpolation to generate the per-texel weight return (weight_val0 * tex_weight_float0 + weight_val1 * tex_weight_float1) + @@ -1365,33 +1908,31 @@ static inline vfloat bilinear_infill_vla( /** * @brief Compute the error of a decimated weight set for 1 plane. * - * After computing ideal weights for the case with one weight per texel, we - * want to compute the error for decimated weight grids where weights are - * stored at a lower resolution. This function computes the error of the - * reduced grid, compared to the full grid. + * After computing ideal weights for the case with one weight per texel, we want to compute the + * error for decimated weight grids where weights are stored at a lower resolution. This function + * computes the error of the reduced grid, compared to the full grid. * * @param eai The ideal weights for the full grid. - * @param dt The weight grid decimation table. + * @param di The selected weight decimation. * @param weights The ideal weights for the decimated grid. * * @return The accumulated error. */ float compute_error_of_weight_set_1plane( const endpoints_and_weights& eai, - const decimation_table& dt, + const decimation_info& di, const float *weights); /** * @brief Compute the error of a decimated weight set for 2 planes. * - * After computing ideal weights for the case with one weight per texel, we - * want to compute the error for decimated weight grids where weights are - * stored at a lower resolution. This function computes the error of the - * reduced grid, compared to the full grid. + * After computing ideal weights for the case with one weight per texel, we want to compute the + * error for decimated weight grids where weights are stored at a lower resolution. This function + * computes the error of the reduced grid, compared to the full grid. * * @param eai1 The ideal weights for the full grid and plane 1. * @param eai2 The ideal weights for the full grid and plane 2. - * @param dt The weight grid decimation table. + * @param di The selected weight decimation. * @param weights1 The ideal weights for the decimated grid plane 1. * @param weights2 The ideal weights for the decimated grid plane 2. * @@ -1400,7 +1941,7 @@ float compute_error_of_weight_set_1plane( float compute_error_of_weight_set_2planes( const endpoints_and_weights& eai1, const endpoints_and_weights& eai2, - const decimation_table& dt, + const decimation_info& di, const float* weights1, const float* weights2); @@ -1420,13 +1961,14 @@ float compute_error_of_weight_set_2planes( * * @return The actual endpoint mode used. */ +// TODO: Format as enum? int pack_color_endpoints( vfloat4 color0, vfloat4 color1, vfloat4 rgbs_color, vfloat4 rgbo_color, int format, - int* output, + uint8_t* output, quant_method quant_level); /** @@ -1442,11 +1984,12 @@ int pack_color_endpoints( * @param[out] output0 The output color for endpoint 0. * @param[out] output1 The output color for endpoint 1. */ +// TODO: Format as enum? void unpack_color_endpoints( astcenc_profile decode_mode, int format, - int quant_level, - const int* input, + quant_method quant_level, + const uint8_t* input, bool& rgb_hdr, bool& alpha_hdr, vint4& output0, @@ -1457,7 +2000,7 @@ void unpack_color_endpoints( * * @param bsd The block size information. * @param scb The symbolic compressed encoding. - * @param dt The weight grid decimation table. + * @param di The weight grid decimation table. * @param is_dual_plane @c true if this is a dual plane block, @c false otherwise. * @param quant_level The weight quantization level. * @param[out] weights_plane1 The output array for storing the plane 1 weights. @@ -1466,46 +2009,11 @@ void unpack_color_endpoints( void unpack_weights( const block_size_descriptor& bsd, const symbolic_compressed_block& scb, - const decimation_table& dt, + const decimation_info& di, bool is_dual_plane, - int quant_level, - int weights_plane1[MAX_TEXELS_PER_BLOCK], - int weights_plane2[MAX_TEXELS_PER_BLOCK]); - -struct encoding_choice_errors -{ - // Error of using LDR RGB-scale instead of complete endpoints. - float rgb_scale_error; - // Error of using HDR RGB-scale instead of complete endpoints. - float rgb_luma_error; - // Error of using luminance instead of RGB. - float luminance_error; - // Error of discarding alpha. - float alpha_drop_error; - // Validity of using offset encoding. - bool can_offset_encode; - // Validity of using blue contraction encoding. - bool can_blue_contract; -}; - -// buffers used to store intermediate data in compress_symbolic_block_fixed_partition_*() -struct alignas(ASTCENC_VECALIGN) compress_fixed_partition_buffers -{ - endpoints_and_weights ei1; - endpoints_and_weights ei2; - endpoints_and_weights eix1[MAX_DECIMATION_MODES]; - endpoints_and_weights eix2[MAX_DECIMATION_MODES]; - alignas(ASTCENC_VECALIGN) float decimated_quantized_weights[2 * MAX_DECIMATION_MODES * MAX_WEIGHTS_PER_BLOCK]; - alignas(ASTCENC_VECALIGN) float decimated_weights[2 * MAX_DECIMATION_MODES * MAX_WEIGHTS_PER_BLOCK]; - alignas(ASTCENC_VECALIGN) float flt_quantized_decimated_quantized_weights[2 * MAX_WEIGHT_MODES * MAX_WEIGHTS_PER_BLOCK]; - alignas(ASTCENC_VECALIGN) uint8_t u8_quantized_decimated_quantized_weights[2 * MAX_WEIGHT_MODES * MAX_WEIGHTS_PER_BLOCK]; -}; - -struct compress_symbolic_block_buffers -{ - error_weight_block ewb; - compress_fixed_partition_buffers planes; -}; + quant_method quant_level, + int weights_plane1[BLOCK_MAX_TEXELS], + int weights_plane2[BLOCK_MAX_TEXELS]); /** * @brief Identify, for each mode, which set of color endpoint produces the best result. @@ -1521,13 +2029,15 @@ struct compress_symbolic_block_buffers * @param ep The ideal endpoints. * @param qwt_bitcounts Bit counts for different quantization methods. * @param qwt_errors Errors for different quantization methods. - * @param tune_candidate_limit The number of candidates to return. + * @param tune_candidate_limit The max number of candidates to return, may be less. * @param[out] partition_format_specifiers The best formats per partition. - * @param[out] block_mode The best packed block mode index. + * @param[out] block_mode The best packed block mode indexes. * @param[out] quant_level The best color quant level. * @param[out] quant_level_mod The best color quant level if endpoints are the same. + * + * @return The actual number of candidate matches returned. */ -void determine_optimal_set_of_endpoint_formats_to_use( +unsigned int compute_ideal_endpoint_formats( const block_size_descriptor& bsd, const partition_info& pi, const imageblock& blk, @@ -1535,22 +2045,22 @@ void determine_optimal_set_of_endpoint_formats_to_use( const endpoints& ep, const int* qwt_bitcounts, const float* qwt_errors, - int tune_candidate_limit, - int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4], + unsigned int tune_candidate_limit, + int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS], int block_mode[TUNE_MAX_TRIAL_CANDIDATES], - int quant_level[TUNE_MAX_TRIAL_CANDIDATES], - int quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]); + quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES], + quant_method quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]); /** * @brief For a given 1 plane weight set recompute the endpoint colors. * - * As we quantize and decimate weights the optimal endpoint colors may change - * slightly, so we must recompute the ideal colors for a specific weight set. + * As we quantize and decimate weights the optimal endpoint colors may change slightly, so we must + * recompute the ideal colors for a specific weight set. * * @param blk The image block color data to compress. * @param ewb The image block weighted error data. * @param pi The partition info for the current trial. - * @param dt The weight grid decimation table. + * @param di The weight grid decimation table. * @param weight_quant_mode The weight grid quantization level. * @param weight_set8 The quantized weight set. * @param[in,out] ep The color endpoints (modifed in place). @@ -1561,23 +2071,23 @@ void recompute_ideal_colors_1plane( const imageblock& blk, const error_weight_block& ewb, const partition_info& pi, - const decimation_table& dt, + const decimation_info& di, int weight_quant_mode, const uint8_t* weight_set8, endpoints& ep, - vfloat4 rgbs_vectors[4], - vfloat4 rgbo_vectors[4]); + vfloat4 rgbs_vectors[BLOCK_MAX_PARTITIONS], + vfloat4 rgbo_vectors[BLOCK_MAX_PARTITIONS]); /** * @brief For a given 2 plane weight set recompute the endpoint colors. * - * As we quantize and decimate weights the optimal endpoint colors may change - * slightly, so we must recompute the ideal colors for a specific weight set. + * As we quantize and decimate weights the optimal endpoint colors may change slightly, so we must + * recompute the ideal colors for a specific weight set. * * @param blk The image block color data to compress. * @param ewb The image block weighted error data. * @param pi The partition info for the current trial. - * @param dt The weight grid decimation table. + * @param di The weight grid decimation table. * @param weight_quant_mode The weight grid quantization level. * @param weight_set8_plane1 The quantized weight set for plane 1. * @param weight_set8_plane2 The quantized weight set for plane 2. @@ -1590,13 +2100,13 @@ void recompute_ideal_colors_2planes( const imageblock& blk, const error_weight_block& ewb, const partition_info& pi, - const decimation_table& dt, + const decimation_info& di, int weight_quant_mode, const uint8_t* weight_set8_plane1, const uint8_t* weight_set8_plane2, endpoints& ep, - vfloat4 rgbs_vectors[4], - vfloat4 rgbo_vectors[4], + vfloat4 rgbs_vectors[BLOCK_MAX_PARTITIONS], + vfloat4 rgbo_vectors[BLOCK_MAX_PARTITIONS], int plane2_component); void expand_deblock_weights( @@ -1610,19 +2120,21 @@ void compute_angular_endpoints_1plane( const block_size_descriptor& bsd, const float* decimated_quantized_weights, const float* decimated_weights, - float low_value[MAX_WEIGHT_MODES], - float high_value[MAX_WEIGHT_MODES]); + float low_value[WEIGHTS_MAX_BLOCK_MODES], + float high_value[WEIGHTS_MAX_BLOCK_MODES]); void compute_angular_endpoints_2planes( const block_size_descriptor& bsd, const float* decimated_quantized_weights, const float* decimated_weights, - float low_value1[MAX_WEIGHT_MODES], - float high_value1[MAX_WEIGHT_MODES], - float low_value2[MAX_WEIGHT_MODES], - float high_value2[MAX_WEIGHT_MODES]); + float low_value1[WEIGHTS_MAX_BLOCK_MODES], + float high_value1[WEIGHTS_MAX_BLOCK_MODES], + float low_value2[WEIGHTS_MAX_BLOCK_MODES], + float high_value2[WEIGHTS_MAX_BLOCK_MODES]); -/* *********************************** high-level encode and decode functions ************************************ */ +/* ============================================================================ + Functionality for high level compression and decompression access. +============================================================================ */ /** * @brief Compress an image block into a physical block. @@ -1690,74 +2202,38 @@ void physical_to_symbolic( const physical_compressed_block& pcb, symbolic_compressed_block& scb); -#if defined(ASTCENC_DIAGNOSTICS) -class TraceLog; // See astcenc_diagnostic_trace for details. -#endif - -struct astcenc_context -{ - astcenc_config config; - unsigned int thread_count; - block_size_descriptor* bsd; - - // Fields below here are not needed in a decompress-only build, but some - // remain as they are small and it avoids littering the code with #ifdefs. - // The most significant contributors to large structure size are omitted. - - // Regional average-and-variance information, initialized by - // compute_averages_and_variances() only if the astc encoder - // is requested to do error weighting based on averages and variances. - vfloat4 *input_averages; - vfloat4 *input_variances; - float *input_alpha_averages; - - compress_symbolic_block_buffers* working_buffers; - -#if !defined(ASTCENC_DECOMPRESS_ONLY) - pixel_region_variance_args arg; - avg_var_args ag; - - float deblock_weights[MAX_TEXELS_PER_BLOCK]; - - ParallelManager manage_avg_var; - ParallelManager manage_compress; -#endif - - ParallelManager manage_decompress; - -#if defined(ASTCENC_DIAGNOSTICS) - TraceLog* trace_log; -#endif -}; - /* ============================================================================ - Platform-specific functions +Platform-specific functions. ============================================================================ */ +// TODO: Make these bools /** * @brief Run-time detection if the host CPU supports the POPCNT extension. + * * @return Zero if not supported, positive value if it is. */ int cpu_supports_popcnt(); /** * @brief Run-time detection if the host CPU supports F16C extension. + * * @return Zero if not supported, positive value if it is. */ int cpu_supports_f16c(); /** * @brief Run-time detection if the host CPU supports SSE 4.1 extension. + * * @return Zero if not supported, positive value if it is. */ int cpu_supports_sse41(); /** * @brief Run-time detection if the host CPU supports AVX 2 extension. + * * @return Zero if not supported, positive value if it is. */ int cpu_supports_avx2(); - /** * @brief Allocate an aligned memory buffer. * diff --git a/lib/astc-encoder/Source/astcenc_kmeans_partitioning.cpp b/lib/astc-encoder/Source/astcenc_kmeans_partitioning.cpp deleted file mode 100644 index ac297273d9..0000000000 --- a/lib/astc-encoder/Source/astcenc_kmeans_partitioning.cpp +++ /dev/null @@ -1,451 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// ---------------------------------------------------------------------------- -// Copyright 2011-2021 Arm Limited -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy -// of the License at: -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations -// under the License. -// ---------------------------------------------------------------------------- - -#if !defined(ASTCENC_DECOMPRESS_ONLY) - -/** - * @brief Functions for approximate partitioning by kmeans clustering. - * - * Do this in 2 stages: - * 1: basic clustering, a couple of passes just to get a few clusters - * 2: clustering based on line, a few passes until it seems to stabilize. - * - * After clustering is done, we use the clustering result to construct one - * bitmap for each partition. We then scan though the partition table, counting - * how well the bitmaps matched. - */ - -#include "astcenc_internal.h" - -/** - * @brief Pick some initital kmeans cluster centers. - * - * @param blk The image block color data to compress. - * @param texel_count The number of texels in the block. - * @param partition_count The number of partitions in the block. - * @param[out] cluster_centers The initital partition cluster center colors. - */ -static void kmeans_init( - const imageblock& blk, - int texel_count, - int partition_count, - vfloat4 cluster_centers[4] -) { - promise(texel_count > 0); - promise(partition_count > 0); - - int clusters_selected = 0; - float distances[MAX_TEXELS_PER_BLOCK]; - - // Pick a random sample as first cluster center; 145897 from random.org - int sample = 145897 % texel_count; - vfloat4 center_color = blk.texel(sample); - cluster_centers[clusters_selected] = center_color; - clusters_selected++; - - // Compute the distance to the first cluster center - float distance_sum = 0.0f; - for (int i = 0; i < texel_count; i++) - { - vfloat4 color = blk.texel(i); - vfloat4 diff = color - center_color; - float distance = dot_s(diff, diff); - distance_sum += distance; - distances[i] = distance; - } - - // More numbers from random.org for weighted-random center selection - const float cluster_cutoffs[9] = { - 0.626220f, 0.932770f, 0.275454f, - 0.318558f, 0.240113f, 0.009190f, - 0.347661f, 0.731960f, 0.156391f - }; - - int cutoff = (clusters_selected - 1) + 3 * (partition_count - 2); - - // Pick the remaining samples as needed - while (true) - { - // Pick the next center in a weighted-random fashion. - float summa = 0.0f; - float distance_cutoff = distance_sum * cluster_cutoffs[cutoff++]; - for (sample = 0; sample < texel_count; sample++) - { - summa += distances[sample]; - if (summa >= distance_cutoff) - { - break; - } - } - - // Clamp to a valid range and store the selected cluster center - sample = astc::min(sample, texel_count - 1); - - center_color = blk.texel(sample); - cluster_centers[clusters_selected++] = center_color; - if (clusters_selected >= partition_count) - { - break; - } - - // Compute the distance to the new cluster center, keep the min dist - distance_sum = 0.0f; - for (int i = 0; i < texel_count; i++) - { - vfloat4 color = blk.texel(i); - vfloat4 diff = color - center_color; - float distance = dot_s(diff, diff); - distance = astc::min(distance, distances[i]); - distance_sum += distance; - distances[i] = distance; - } - } -} - -/** - * @brief Assign texels to clusters, based on a set of chosen center points. - * - * @todo Can partition of texel be uint8_t not int? - * - * @param blk The image block color data to compress. - * @param texel_count The number of texels in the block. - * @param partition_count The number of partitions in the block. - * @param cluster_centers The partition cluster center colors. - * @param[out] partition_of_texel The partition assigned for each texel. - */ -static void kmeans_assign( - const imageblock& blk, - int texel_count, - int partition_count, - const vfloat4 cluster_centers[4], - int partition_of_texel[MAX_TEXELS_PER_BLOCK] -) { - promise(texel_count > 0); - promise(partition_count > 0); - - int partition_texel_count[4] { 0 }; - - // Find the best partition for every texel - for (int i = 0; i < texel_count; i++) - { - float best_distance = std::numeric_limits::max(); - int best_partition = -1; - - vfloat4 color = blk.texel(i); - for (int j = 0; j < partition_count; j++) - { - vfloat4 diff = color - cluster_centers[j]; - float distance = dot_s(diff, diff); - if (distance < best_distance) - { - best_distance = distance; - best_partition = j; - } - } - - partition_of_texel[i] = best_partition; - partition_texel_count[best_partition]++; - } - - // It is possible to get a situation where a partition ends up without any - // texels. In this case, we assign texel N to partition N. This is silly, - // but ensures that every partition retains at least one texel. Reassigning - // a texel in this manner may cause another partition to go empty, so if we - // actually did a reassignment, we run the whole loop over again. - int problem_case; - do - { - problem_case = 0; - for (int i = 0; i < partition_count; i++) - { - if (partition_texel_count[i] == 0) - { - partition_texel_count[partition_of_texel[i]]--; - partition_texel_count[i]++; - partition_of_texel[i] = i; - problem_case = 1; - } - } - } while (problem_case != 0); -} - -/** - * @brief Compute new cluster centers based on their center of gravity. - * - * @param blk The image block color data to compress. - * @param texel_count The number of texels in the block. - * @param partition_count The number of partitions in the block. - * @param[out] cluster_centers The new cluster center colors. - * @param partition_of_texel The partition assigned for each texel. - */ -static void kmeans_update( - const imageblock& blk, - int texel_count, - int partition_count, - vfloat4 cluster_centers[4], - const int partition_of_texel[MAX_TEXELS_PER_BLOCK] -) { - promise(texel_count > 0); - promise(partition_count > 0); - - vfloat4 color_sum[4] { - vfloat4::zero(), - vfloat4::zero(), - vfloat4::zero(), - vfloat4::zero() - }; - - int partition_texel_count[4] { 0 }; - - // Find the center-of-gravity in each cluster - for (int i = 0; i < texel_count; i++) - { - int partition = partition_of_texel[i]; - color_sum[partition] += blk.texel(i);; - partition_texel_count[partition]++; - } - - // Set the center of gravity to be the new cluster center - for (int i = 0; i < partition_count; i++) - { - float scale = 1.0f / static_cast(partition_texel_count[i]); - cluster_centers[i] = color_sum[i] * scale; - } -} - -/** - * @brief Compute bit-mismatch for partitioning in 2-partition mode. - * - * @param a The texel assignment bitvector for the block. - * @param b The texel assignment bitvector for the partition table. - * - * @return The number of bit mismatches. - */ -static inline int partition_mismatch2( - const uint64_t a[2], - const uint64_t b[2] -) { - int v1 = astc::popcount(a[0] ^ b[0]) + astc::popcount(a[1] ^ b[1]); - int v2 = astc::popcount(a[0] ^ b[1]) + astc::popcount(a[1] ^ b[0]); - return astc::min(v1, v2); -} - -/** - * @brief Compute bit-mismatch for partitioning in 3-partition mode. - * - * @param a The texel assignment bitvector for the block. - * @param b The texel assignment bitvector for the partition table. - * - * @return The number of bit mismatches. - */ -static inline int partition_mismatch3( - const uint64_t a[3], - const uint64_t b[3] -) { - int p00 = astc::popcount(a[0] ^ b[0]); - int p01 = astc::popcount(a[0] ^ b[1]); - int p02 = astc::popcount(a[0] ^ b[2]); - - int p10 = astc::popcount(a[1] ^ b[0]); - int p11 = astc::popcount(a[1] ^ b[1]); - int p12 = astc::popcount(a[1] ^ b[2]); - - int p20 = astc::popcount(a[2] ^ b[0]); - int p21 = astc::popcount(a[2] ^ b[1]); - int p22 = astc::popcount(a[2] ^ b[2]); - - int s0 = p11 + p22; - int s1 = p12 + p21; - int v0 = astc::min(s0, s1) + p00; - - int s2 = p10 + p22; - int s3 = p12 + p20; - int v1 = astc::min(s2, s3) + p01; - - int s4 = p10 + p21; - int s5 = p11 + p20; - int v2 = astc::min(s4, s5) + p02; - - return astc::min(v0, v1, v2); -} - -/** - * @brief Compute bit-mismatch for partitioning in 4-partition mode. - * - * @param a The texel assignment bitvector for the block. - * @param b The texel assignment bitvector for the partition table. - * - * @return The number of bit mismatches. - */ -static inline int partition_mismatch4( - const uint64_t a[4], - const uint64_t b[4] -) { - int p00 = astc::popcount(a[0] ^ b[0]); - int p01 = astc::popcount(a[0] ^ b[1]); - int p02 = astc::popcount(a[0] ^ b[2]); - int p03 = astc::popcount(a[0] ^ b[3]); - - int p10 = astc::popcount(a[1] ^ b[0]); - int p11 = astc::popcount(a[1] ^ b[1]); - int p12 = astc::popcount(a[1] ^ b[2]); - int p13 = astc::popcount(a[1] ^ b[3]); - - int p20 = astc::popcount(a[2] ^ b[0]); - int p21 = astc::popcount(a[2] ^ b[1]); - int p22 = astc::popcount(a[2] ^ b[2]); - int p23 = astc::popcount(a[2] ^ b[3]); - - int p30 = astc::popcount(a[3] ^ b[0]); - int p31 = astc::popcount(a[3] ^ b[1]); - int p32 = astc::popcount(a[3] ^ b[2]); - int p33 = astc::popcount(a[3] ^ b[3]); - - int mx23 = astc::min(p22 + p33, p23 + p32); - int mx13 = astc::min(p21 + p33, p23 + p31); - int mx12 = astc::min(p21 + p32, p22 + p31); - int mx03 = astc::min(p20 + p33, p23 + p30); - int mx02 = astc::min(p20 + p32, p22 + p30); - int mx01 = astc::min(p21 + p30, p20 + p31); - - int v0 = p00 + astc::min(p11 + mx23, p12 + mx13, p13 + mx12); - int v1 = p01 + astc::min(p10 + mx23, p12 + mx03, p13 + mx02); - int v2 = p02 + astc::min(p11 + mx03, p10 + mx13, p13 + mx01); - int v3 = p03 + astc::min(p11 + mx02, p12 + mx01, p10 + mx12); - - return astc::min(v0, v1, v2, v3); -} - -using mismatch_dispatch = int (*)(const uint64_t*, const uint64_t*); - -/** - * @brief Count the partition table mismatches vs the data clustering. - * - * @param bsd The block size information. - * @param partition_count The number of partitions in the block. - * @param bitmaps The block texel partition assignment patterns. - * @param[out] mismatch_counts The array storing per partitioning mismatch counts. - */ -static void count_partition_mismatch_bits( - const block_size_descriptor& bsd, - int partition_count, - const uint64_t bitmaps[4], - int mismatch_counts[PARTITION_COUNT] -) { - const partition_info *pt = get_partition_table(&bsd, partition_count); - - // Function pointer dispatch table - const mismatch_dispatch dispatch[3] { - partition_mismatch2, - partition_mismatch3, - partition_mismatch4 - }; - - for (int i = 0; i < PARTITION_COUNT; i++) - { - int bitcount = 255; - if (pt->partition_count == partition_count) - { - bitcount = dispatch[partition_count - 2](bitmaps, pt->coverage_bitmaps); - } - - mismatch_counts[i] = bitcount; - pt++; - } -} - -/** - * @brief Use counting sort on the mismatch array to sort partition candidates. - * - * @param mismatch_count Partitioning mismatch counts, in index order. - * @param[out] partition_ordering Partition index values, in mismatch order. - */ -static void get_partition_ordering_by_mismatch_bits( - const int mismatch_count[PARTITION_COUNT], - int partition_ordering[PARTITION_COUNT] -) { - int mscount[256] { 0 }; - - // Create the histogram of mismatch counts - for (int i = 0; i < PARTITION_COUNT; i++) - { - mscount[mismatch_count[i]]++; - } - - // Create a running sum from the histogram array - // Cells store previous values only; i.e. exclude self after sum - int summa = 0; - for (int i = 0; i < 256; i++) - { - int cnt = mscount[i]; - mscount[i] = summa; - summa += cnt; - } - - // Use the running sum as the index, incrementing after read to allow - // sequential entries with the same count - for (int i = 0; i < PARTITION_COUNT; i++) - { - int idx = mscount[mismatch_count[i]]++; - partition_ordering[idx] = i; - } -} - -/* See header for documentation. */ -void kmeans_compute_partition_ordering( - const block_size_descriptor& bsd, - const imageblock& blk, - int partition_count, - int partition_ordering[PARTITION_COUNT] -) { - vfloat4 cluster_centers[4]; - int texel_partitions[MAX_TEXELS_PER_BLOCK]; - - // Use three passes of k-means clustering to partition the block data - for (int i = 0; i < 3; i++) - { - if (i == 0) - { - kmeans_init(blk, bsd.texel_count, partition_count, cluster_centers); - } - else - { - kmeans_update(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions); - } - - kmeans_assign(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions); - } - - // Construct the block bitmaps of texel assignments to each partition - uint64_t bitmaps[4] { 0 }; - int texels_to_process = bsd.kmeans_texel_count; - promise(texels_to_process > 0); - for (int i = 0; i < texels_to_process; i++) - { - int idx = bsd.kmeans_texels[i]; - bitmaps[texel_partitions[idx]] |= 1ULL << i; - } - - // Count the mismatch between the block and the format's partition tables - int mismatch_counts[PARTITION_COUNT]; - count_partition_mismatch_bits(bsd, partition_count, bitmaps, mismatch_counts); - - // Sort the partitions based on the number of mismatched bits - get_partition_ordering_by_mismatch_bits(mismatch_counts, partition_ordering); -} - -#endif diff --git a/lib/astc-encoder/Source/astcenc_mathlib.cpp b/lib/astc-encoder/Source/astcenc_mathlib.cpp index a59cb24b5e..f276ac7e3d 100644 --- a/lib/astc-encoder/Source/astcenc_mathlib.cpp +++ b/lib/astc-encoder/Source/astcenc_mathlib.cpp @@ -28,14 +28,14 @@ static inline uint64_t rotl(uint64_t val, int count) return (val << count) | (val >> (64 - count)); } -/* Public function, see header file for detailed documentation */ +/* See header for documentation. */ void astc::rand_init(uint64_t state[2]) { state[0] = 0xfaf9e171cea1ec6bULL; state[1] = 0xf1b318cc06af5d71ULL; } -/* Public function, see header file for detailed documentation */ +/* See header for documentation. */ uint64_t astc::rand(uint64_t state[2]) { uint64_t s0 = state[0]; diff --git a/lib/astc-encoder/Source/astcenc_partition_tables.cpp b/lib/astc-encoder/Source/astcenc_partition_tables.cpp index 362e96be97..e7c421457e 100644 --- a/lib/astc-encoder/Source/astcenc_partition_tables.cpp +++ b/lib/astc-encoder/Source/astcenc_partition_tables.cpp @@ -24,10 +24,9 @@ /** * @brief Generate a canonical representation of a partition pattern. * - * The returned value stores two bits per texel, for up to 6x6x6 texels, where - * the two bits store the remapped texel index. Remapping ensures that we only - * match on the partition pattern, independent of the partition order generated - * by the hash. + * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store + * the remapped texel index. Remapping ensures that we only match on the partition pattern, + * independent of the partition order generated by the hash. * * @param texel_count The number of texels in the block. * @param partition_of_texel The partition assignments, in hash order. @@ -44,13 +43,13 @@ static void generate_canonical_partitioning( bit_pattern[i] = 0; } - // Store a mapping to reorder the raw partitions so that the the partitions - // are ordered such that the lowest texel index in partition N is smaller - // than the lowest texel index in partition N + 1. - int mapped_index[4]; + // Store a mapping to reorder the raw partitions so that the the partitions are ordered such + // that the lowest texel index in partition N is smaller than the lowest texel index in + // partition N + 1. + int mapped_index[BLOCK_MAX_PARTITIONS]; int map_weight_count = 0; - for (int i = 0; i < 4; i++) + for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) { mapped_index[i] = -1; } @@ -99,18 +98,15 @@ static bool compare_canonical_partitionings( */ static void remove_duplicate_partitionings( int texel_count, - partition_info pt[PARTITION_COUNT] + partition_info pt[BLOCK_MAX_PARTITIONINGS] ) { - uint64_t bit_patterns[PARTITION_COUNT * 7]; + uint64_t bit_patterns[BLOCK_MAX_PARTITIONINGS * 7]; - for (int i = 0; i < PARTITION_COUNT; i++) + for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++) { generate_canonical_partitioning(texel_count, pt[i].partition_of_texel, bit_patterns + i * 7); - } - for (int i = 0; i < PARTITION_COUNT; i++) - { - for (int j = 0; j < i; j++) + for (unsigned int j = 0; j < i; j++) { if (compare_canonical_partitionings(bit_patterns + 7 * i, bit_patterns + 7 * j)) { @@ -236,13 +232,13 @@ static int select_partition( int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); - // apply the saw + // Apply the saw a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F; - // remove some of the components if we are to output < 4 partitions. + // Remove some of the components if we are to output < 4 partitions. if (partition_count <= 3) { d = 0; @@ -300,12 +296,12 @@ static void generate_one_partition_info_entry( // Assign texels to partitions int texel_idx = 0; - int counts[4] { 0 }; - for (int z = 0; z < bsd.zdim; z++) + int counts[BLOCK_MAX_PARTITIONS] { 0 }; + for (unsigned int z = 0; z < bsd.zdim; z++) { - for (int y = 0; y < bsd.ydim; y++) + for (unsigned int y = 0; y < bsd.ydim; y++) { - for (int x = 0; x < bsd.xdim; x++) + for (unsigned int x = 0; x < bsd.xdim; x++) { uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block); pi.texels_of_partition[part][counts[part]++] = texel_idx++; @@ -346,16 +342,16 @@ static void generate_one_partition_info_entry( pi.partition_count = 4; } - for (int i = 0; i < 4; i++) + for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) { pi.partition_texel_count[i] = counts[i]; pi.coverage_bitmaps[i] = 0ULL; } - int texels_to_process = bsd.kmeans_texel_count; - for (int i = 0; i < texels_to_process; i++) + unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS); + for (unsigned int i = 0; i < texels_to_process; i++) { - int idx = bsd.kmeans_texels[i]; + unsigned int idx = bsd.kmeans_texels[i]; pi.coverage_bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i; } } @@ -365,9 +361,9 @@ void init_partition_tables( block_size_descriptor& bsd ) { partition_info *par_tab2 = bsd.partitions; - partition_info *par_tab3 = par_tab2 + PARTITION_COUNT; - partition_info *par_tab4 = par_tab3 + PARTITION_COUNT; - partition_info *par_tab1 = par_tab4 + PARTITION_COUNT; + partition_info *par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS; + partition_info *par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS; + partition_info *par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS; generate_one_partition_info_entry(bsd, 1, 0, *par_tab1); for (int i = 0; i < 1024; i++) diff --git a/lib/astc-encoder/Source/astcenc_percentile_tables.cpp b/lib/astc-encoder/Source/astcenc_percentile_tables.cpp index 35d64262b2..8d9f4a2704 100644 --- a/lib/astc-encoder/Source/astcenc_percentile_tables.cpp +++ b/lib/astc-encoder/Source/astcenc_percentile_tables.cpp @@ -18,8 +18,7 @@ /** * @brief Percentile data tables for different block encodings. * - * To reduce binary size the tables are stored using a packed differential - * encoding. + * To reduce binary size the tables are stored using a packed differential encoding. */ #include "astcenc_internal.h" @@ -28,8 +27,7 @@ /** * @brief Structure containing packed percentile metadata. * - * Note that percentile tables do not exist for 3D textures, so no zdim is - * stored as they are all known to be 2D. + * Note that percentile tables do not exist for 3D textures, so no zdim is stored. */ struct packed_percentile_table { @@ -1116,14 +1114,14 @@ static const packed_percentile_table *get_packed_table( /* See header for documentation. */ const float *get_2d_percentile_table( - int xdim, - int ydim + unsigned int xdim, + unsigned int ydim ) { float* unpacked_table = new float[2048]; const packed_percentile_table *apt = get_packed_table(xdim, ydim); // Set the default percentile - for (int i = 0; i < 2048; i++) + for (unsigned int i = 0; i < 2048; i++) { unpacked_table[i] = 1.0f; } @@ -1131,16 +1129,16 @@ const float *get_2d_percentile_table( // Populate the unpacked percentile values for (int i = 0; i < 2; i++) { - int itemcount = apt->itemcounts[i]; - int difscale = apt->difscales[i]; - int accum = apt->initial_percs[i]; + unsigned int itemcount = apt->itemcounts[i]; + unsigned int difscale = apt->difscales[i]; + unsigned int accum = apt->initial_percs[i]; const uint16_t *item_ptr = apt->items[i]; - for (int j = 0; j < itemcount; j++) + for (unsigned int j = 0; j < itemcount; j++) { uint16_t item = item_ptr[j]; - int idx = item & 0x7FF; - int weight = (item >> 11) & 0x1F; + unsigned int idx = item & 0x7FF; + unsigned int weight = (item >> 11) & 0x1F; accum += weight; unpacked_table[idx] = (float)accum / (float)difscale; } @@ -1152,10 +1150,10 @@ const float *get_2d_percentile_table( /* See header for documentation. */ bool is_legal_2d_block_size( - int xdim, - int ydim + unsigned int xdim, + unsigned int ydim ) { - int idx = (xdim << 8) | ydim; + unsigned int idx = (xdim << 8) | ydim; switch (idx) { case 0x0404: @@ -1180,11 +1178,11 @@ bool is_legal_2d_block_size( /* See header for documentation. */ bool is_legal_3d_block_size( - int xdim, - int ydim, - int zdim + unsigned int xdim, + unsigned int ydim, + unsigned int zdim ) { - int idx = (xdim << 16) | (ydim << 8) | zdim; + unsigned int idx = (xdim << 16) | (ydim << 8) | zdim; switch (idx) { case 0x030303: diff --git a/lib/astc-encoder/Source/astcenc_pick_best_endpoint_format.cpp b/lib/astc-encoder/Source/astcenc_pick_best_endpoint_format.cpp index c239cac438..06a334a246 100644 --- a/lib/astc-encoder/Source/astcenc_pick_best_endpoint_format.cpp +++ b/lib/astc-encoder/Source/astcenc_pick_best_endpoint_format.cpp @@ -50,10 +50,9 @@ /** * @brief Compute cumulative error weight of each partition. * - * The cumulative error weight is used to determine the relative importance of - * each partiton when deciding how to quantize colors, as not all partitions - * are equal. For example, some partitions will have far fewer texels than - * others in the same block. + * The cumulative error weight is used to determine the relative importance of each partiton when + * deciding how to quantize colors, as not all partitions are equal. For example, some partitions + * will have far fewer texels than others in the same block. * * @param ewb The block error weights. * @param pi The partiion info. @@ -62,7 +61,7 @@ static void compute_partition_error_color_weightings( const error_weight_block& ewb, const partition_info& pi, - partition_metrics pm[4] + partition_metrics pm[BLOCK_MAX_PARTITIONS] ) { int partition_count = pi.partition_count; promise(partition_count > 0); @@ -88,10 +87,10 @@ static void compute_partition_error_color_weightings( /** * @brief Compute the errors of the endpoint line options for one partition. * - * Uncorrelated data assumes storing completely independent RGBA channels for - * each endpoint. Same chroma data assumes storing RGBA endpoints which pass - * though the origin (LDR only). RGBL data assumes storing RGB + lumashift (HDR - * only). Luminance error assumes storing RGB channels as a single value. + * Uncorrelated data assumes storing completely independent RGBA channels for each endpoint. Same + * chroma data assumes storing RGBA endpoints which pass though the origin (LDR only). RGBL data + * assumes storing RGB + lumashift (HDR only). Luminance error assumes storing RGB channels as a + * single value. * * * @param pi The partition info data. @@ -145,7 +144,7 @@ static void compute_error_squared_rgb_single_partition( vfloat4 ews = ewb.error_weights[tix]; // Compute the error that arises from just ditching alpha - float default_alpha = imageblock_default_alpha(&blk); + float default_alpha = blk.get_default_alpha(); float omalpha = point.lane<3>() - default_alpha; a_drop_err += omalpha * omalpha * ews.lane<3>(); @@ -194,7 +193,7 @@ static void compute_encoding_choice_errors( const partition_info& pi, const error_weight_block& ewb, const endpoints& ep, - encoding_choice_errors eci[4]) + encoding_choice_errors eci[BLOCK_MAX_PARTITIONS]) { int partition_count = pi.partition_count; int texels_per_block = bsd.texel_count; @@ -202,7 +201,7 @@ static void compute_encoding_choice_errors( promise(partition_count > 0); promise(texels_per_block > 0); - partition_metrics pms[4]; + partition_metrics pms[BLOCK_MAX_PARTITIONS]; compute_avgs_and_dirs_3_comp(pi, blk, ewb, 3, pms); @@ -358,9 +357,8 @@ static void compute_color_error_for_every_integer_count_and_quant_level( float range_upper_limit_rgb = encode_hdr_rgb ? 61440.0f : 65535.0f; float range_upper_limit_alpha = encode_hdr_alpha ? 61440.0f : 65535.0f; - // it is possible to get endpoint colors significantly outside [0,upper-limit] - // even if the input data are safely contained in [0,upper-limit]; - // we need to add an error term for this situation, + // It is possible to get endpoint colors significantly outside [0,upper-limit] even if the + // input data are safely contained in [0,upper-limit]; we need to add an error term for this vfloat4 ep0_range_error_high; vfloat4 ep1_range_error_high; vfloat4 ep0_range_error_low; @@ -387,7 +385,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level( if (encode_hdr_rgb) { - // collect some statistics + // Collect some statistics float af, cf; if (ep1.lane<0>() > ep1.lane<1>() && ep1.lane<0>() > ep1.lane<2>()) { @@ -405,18 +403,18 @@ static void compute_color_error_for_every_integer_count_and_quant_level( cf = ep1.lane<2>() - ep0.lane<2>(); } - float bf = af - ep1_min; // estimate of color-component spread in high endpoint color + // Estimate of color-component spread in high endpoint color + float bf = af - ep1_min; vfloat4 prd = (ep1 - vfloat4(cf)).swz<0, 1, 2>(); vfloat4 pdif = prd - ep0.swz<0, 1, 2>(); - // estimate of color-component spread in low endpoint color + // Estimate of color-component spread in low endpoint color float df = hmax_s(abs(pdif)); int b = (int)bf; int c = (int)cf; int d = (int)df; - // determine which one of the 6 submodes is likely to be used in - // case of an RGBO-mode + // Determine which one of the 6 submodes is likely to be used in case of an RGBO-mode int rgbo_mode = 5; // 7 bits per component // mode 4: 8 7 6 if (b < 32768 && c < 16384) @@ -448,8 +446,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level( rgbo_mode = 0; } - // determine which one of the 9 submodes is likely to be used in - // case of an RGB-mode. + // Determine which one of the 9 submodes is likely to be used in case of an RGB-mode. int rgb_mode = 8; // 8 bits per component, except 7 bits for blue // mode 0: 9 7 6 7 @@ -503,8 +500,8 @@ static void compute_color_error_for_every_integer_count_and_quant_level( static const float rgbo_error_scales[6] { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f }; static const float rgb_error_scales[9] { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f }; - float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f; // empirically determined .... - float mode11mult = rgb_error_scales[rgb_mode] * 0.010f; // empirically determined .... + float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f; // Empirically determined .... + float mode11mult = rgb_error_scales[rgb_mode] * 0.010f; // Empirically determined .... float lum_high = hadd_rgb_s(ep1) * (1.0f / 3.0f); @@ -512,9 +509,9 @@ static void compute_color_error_for_every_integer_count_and_quant_level( float lumdif = lum_high - lum_low; float mode23mult = lumdif < 960 ? 4.0f : lumdif < 3968 ? 16.0f : 128.0f; - mode23mult *= 0.0005f; // empirically determined .... + mode23mult *= 0.0005f; // Empirically determined .... - // pick among the available HDR endpoint modes + // Pick among the available HDR endpoint modes for (int i = 0; i < 8; i++) { best_error[i][3] = 1e30f; @@ -529,33 +526,32 @@ static void compute_color_error_for_every_integer_count_and_quant_level( for (int i = 8; i < 21; i++) { - // base_quant_error should depend on the scale-factor that would be used - // during actual encode of the color value. + // The base_quant_error should depend on the scale-factor that would be used during + // actual encode of the color value float base_quant_error = baseline_quant_error[i] * static_cast(partition_size); float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f; float alpha_quantization_error = error_weight.lane<3>() * base_quant_error * 2.0f; float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error; - // for 8 integers, we have two encodings: one with HDR alpha and another one - // with LDR alpha. + // For 8 integers, we have two encodings: one with HDR A and another one with LDR A float full_hdr_rgba_error = rgba_quantization_error + rgb_range_error + alpha_range_error; best_error[i][3] = full_hdr_rgba_error; format_of_choice[i][3] = encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA; - // for 6 integers, we have one HDR-RGB encoding + // For 6 integers, we have one HDR-RGB encoding float full_hdr_rgb_error = (rgb_quantization_error * mode11mult) + rgb_range_error + eci.alpha_drop_error; best_error[i][2] = full_hdr_rgb_error; format_of_choice[i][2] = FMT_HDR_RGB; - // for 4 integers, we have one HDR-RGB-Scale encoding + // For 4 integers, we have one HDR-RGB-Scale encoding float hdr_rgb_scale_error = (rgb_quantization_error * mode7mult) + rgb_range_error + eci.alpha_drop_error + eci.rgb_luma_error; best_error[i][1] = hdr_rgb_scale_error; format_of_choice[i][1] = FMT_HDR_RGB_SCALE; - // for 2 integers, we assume luminance-with-large-range + // For 2 integers, we assume luminance-with-large-range float hdr_luminance_error = (rgb_quantization_error * mode23mult) + rgb_range_error + eci.alpha_drop_error + eci.luminance_error; best_error[i][0] = hdr_luminance_error; format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE; @@ -586,7 +582,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level( float error_scale_bc_rgb = eci.can_blue_contract ? 0.5f : 1.0f; float error_scale_oe_rgb = eci.can_offset_encode ? 0.25f : 1.0f; - // pick among the available LDR endpoint modes + // Pick among the available LDR endpoint modes for (int i = 4; i < 21; i++) { // Offset encoding not possible at higher quant levels @@ -681,32 +677,35 @@ static void one_partition_find_best_combination_for_bitcount( const float best_combined_error[21][4], const int best_combined_format[21][4], int bits_available, - int& best_quant_level, + quant_method& best_quant_level, int& best_format, float& best_error ) { - int best_integer_count = -1; + int best_integer_count = 0; float best_integer_count_error = 1e20f; - for (int i = 0; i < 4; i++) + + for (int integer_count = 1; integer_count <= 4; integer_count++) { - // compute the quantization level for a given number of integers and a given number of bits. - int quant_level = quant_mode_table[i + 1][bits_available]; + // Compute the quantization level for a given number of integers and a given number of bits + int quant_level = quant_mode_table[integer_count][bits_available]; + // Don't have enough bits to represent a given endpoint format at all! if (quant_level == -1) { - continue; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all. + continue; } - if (best_combined_error[quant_level][i] < best_integer_count_error) + float integer_count_error = best_combined_error[quant_level][integer_count - 1]; + if (integer_count_error < best_integer_count_error) { - best_integer_count_error = best_combined_error[quant_level][i]; - best_integer_count = i; + best_integer_count_error = integer_count_error; + best_integer_count = integer_count - 1; } } int ql = quant_mode_table[best_integer_count + 1][bits_available]; - best_quant_level = ql; + best_quant_level = (quant_method)ql; best_format = FMT_LUMINANCE; best_error = best_integer_count_error; @@ -779,8 +778,8 @@ static void two_partitions_find_best_combination_for_bitcount( float best_combined_error[21][7], int best_combined_format[21][7][2], int bits_available, - int& best_quant_level, - int& best_quant_level_mod, + quant_method& best_quant_level, + quant_method& best_quant_level_mod, int* best_formats, float& best_error ) { @@ -789,16 +788,16 @@ static void two_partitions_find_best_combination_for_bitcount( for (int integer_count = 2; integer_count <= 8; integer_count++) { - // compute the quantization level for a given number of integers and a given number of bits. + // Compute the quantization level for a given number of integers and a given number of bits int quant_level = quant_mode_table[integer_count][bits_available]; + // Don't have enough bits to represent a given endpoint format at all! if (quant_level == -1) { - break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all. + break; } float integer_count_error = best_combined_error[quant_level][integer_count - 2]; - if (integer_count_error < best_integer_count_error) { best_integer_count_error = integer_count_error; @@ -809,8 +808,8 @@ static void two_partitions_find_best_combination_for_bitcount( int ql = quant_mode_table[best_integer_count][bits_available]; int ql_mod = quant_mode_table[best_integer_count][bits_available + 2]; - best_quant_level = ql; - best_quant_level_mod = ql_mod; + best_quant_level = (quant_method)ql; + best_quant_level_mod = (quant_method)ql_mod; best_error = best_integer_count_error; if (ql >= 0) { @@ -902,8 +901,8 @@ static void three_partitions_find_best_combination_for_bitcount( const float best_combined_error[21][10], const int best_combined_format[21][10][3], int bits_available, - int& best_quant_level, - int& best_quant_level_mod, + quant_method& best_quant_level, + quant_method& best_quant_level_mod, int* best_formats, float& best_error ) { @@ -912,16 +911,16 @@ static void three_partitions_find_best_combination_for_bitcount( for (int integer_count = 3; integer_count <= 9; integer_count++) { - // compute the quantization level for a given number of integers and a given number of bits. + // Compute the quantization level for a given number of integers and a given number of bits int quant_level = quant_mode_table[integer_count][bits_available]; + // Don't have enough bits to represent a given endpoint format at all! if (quant_level == -1) { - break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all. + break; } float integer_count_error = best_combined_error[quant_level][integer_count - 3]; - if (integer_count_error < best_integer_count_error) { best_integer_count_error = integer_count_error; @@ -932,8 +931,8 @@ static void three_partitions_find_best_combination_for_bitcount( int ql = quant_mode_table[best_integer_count][bits_available]; int ql_mod = quant_mode_table[best_integer_count][bits_available + 5]; - best_quant_level = ql; - best_quant_level_mod = ql_mod; + best_quant_level = (quant_method)ql; + best_quant_level_mod = (quant_method)ql_mod; best_error = best_integer_count_error; if (ql >= 0) { @@ -1036,8 +1035,8 @@ static void four_partitions_find_best_combination_for_bitcount( const float best_combined_error[21][13], const int best_combined_format[21][13][4], int bits_available, - int& best_quant_level, - int& best_quant_level_mod, + quant_method& best_quant_level, + quant_method& best_quant_level_mod, int* best_formats, float& best_error ) { @@ -1046,16 +1045,16 @@ static void four_partitions_find_best_combination_for_bitcount( for (int integer_count = 4; integer_count <= 9; integer_count++) { - // compute the quantization level for a given number of integers and a given number of bits. + // Compute the quantization level for a given number of integers and a given number of bits int quant_level = quant_mode_table[integer_count][bits_available]; + // Don't have enough bits to represent a given endpoint format at all! if (quant_level == -1) { - break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all. + break; } float integer_count_error = best_combined_error[quant_level][integer_count - 4]; - if (integer_count_error < best_integer_count_error) { best_integer_count_error = integer_count_error; @@ -1066,8 +1065,8 @@ static void four_partitions_find_best_combination_for_bitcount( int ql = quant_mode_table[best_integer_count][bits_available]; int ql_mod = quant_mode_table[best_integer_count][bits_available + 8]; - best_quant_level = ql; - best_quant_level_mod = ql_mod; + best_quant_level = (quant_method)ql; + best_quant_level_mod = (quant_method)ql_mod; best_error = best_integer_count_error; if (ql >= 0) { @@ -1086,7 +1085,7 @@ static void four_partitions_find_best_combination_for_bitcount( } /* See header for documentation. */ -void determine_optimal_set_of_endpoint_formats_to_use( +unsigned int compute_ideal_endpoint_formats( const block_size_descriptor& bsd, const partition_info& pi, const imageblock& blk, @@ -1095,12 +1094,12 @@ void determine_optimal_set_of_endpoint_formats_to_use( // bitcounts and errors computed for the various quantization methods const int* qwt_bitcounts, const float* qwt_errors, - int tune_candidate_limit, + unsigned int tune_candidate_limit, // output data - int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4], + int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS], int block_mode[TUNE_MAX_TRIAL_CANDIDATES], - int quant_level[TUNE_MAX_TRIAL_CANDIDATES], - int quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES] + quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES], + quant_method quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES] ) { int partition_count = pi.partition_count; @@ -1110,19 +1109,18 @@ void determine_optimal_set_of_endpoint_formats_to_use( int encode_hdr_rgb = blk.rgb_lns[0]; int encode_hdr_alpha = blk.alpha_lns[0]; - // call a helper function to compute the errors that result from various - // encoding choices (such as using luminance instead of RGB, discarding Alpha, - // using RGB-scale in place of two separate RGB endpoints and so on) - encoding_choice_errors eci[4]; + // Compute the errors that result from various encoding choices (such as using luminance instead + // of RGB, discarding Alpha, using RGB-scale in place of two separate RGB endpoints and so on) + encoding_choice_errors eci[BLOCK_MAX_PARTITIONS]; compute_encoding_choice_errors(bsd, blk, pi, ewb, ep, eci); - // for each partition, compute the error weights to apply for that partition. - partition_metrics pms[4]; + // For each partition, compute the error weights to apply for that partition + partition_metrics pms[BLOCK_MAX_PARTITIONS]; compute_partition_error_color_weightings(ewb, pi, pms); - float best_error[4][21][4]; - int format_of_choice[4][21][4]; + float best_error[BLOCK_MAX_PARTITIONS][21][4]; + int format_of_choice[BLOCK_MAX_PARTITIONS][21][4]; for (int i = 0; i < partition_count; i++) { compute_color_error_for_every_integer_count_and_quant_level( @@ -1131,27 +1129,27 @@ void determine_optimal_set_of_endpoint_formats_to_use( format_of_choice[i]); } - alignas(ASTCENC_VECALIGN) float errors_of_best_combination[MAX_WEIGHT_MODES]; - alignas(ASTCENC_VECALIGN) int best_quant_levels[MAX_WEIGHT_MODES]; - int best_quant_levels_mod[MAX_WEIGHT_MODES]; - int best_ep_formats[MAX_WEIGHT_MODES][4]; + alignas(ASTCENC_VECALIGN) float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES]; + alignas(ASTCENC_VECALIGN) quant_method best_quant_levels[WEIGHTS_MAX_BLOCK_MODES]; + quant_method best_quant_levels_mod[WEIGHTS_MAX_BLOCK_MODES]; + int best_ep_formats[WEIGHTS_MAX_BLOCK_MODES][4]; - // have to ensure that the "overstep" of the last iteration in the vectorized - // loop will contain data that will never be picked as best candidate + // Ensure that the "overstep" of the last iteration in the vectorized loop will contain data + // that will never be picked as best candidate const int packed_mode_count = bsd.block_mode_count; const int packed_mode_count_simd_up = round_up_to_simd_multiple_vla(packed_mode_count); for (int i = packed_mode_count; i < packed_mode_count_simd_up; ++i) { errors_of_best_combination[i] = 1e30f; - best_quant_levels[i] = 0; - best_quant_levels_mod[i] = 0; + best_quant_levels[i] = QUANT_2; + best_quant_levels_mod[i] = QUANT_2; } - // code for the case where the block contains 1 partition + // The block contains 1 partition if (partition_count == 1) { float error_of_best_combination; - for (int i = 0; i < bsd.block_mode_count; ++i) + for (unsigned int i = 0; i < bsd.block_mode_count; ++i) { if (qwt_errors[i] >= 1e29f) { @@ -1168,7 +1166,7 @@ void determine_optimal_set_of_endpoint_formats_to_use( best_quant_levels_mod[i] = best_quant_levels[i]; } } - // code for the case where the block contains 2 partitions + // The block contains 2 partitions else if (partition_count == 2) { float combined_best_error[21][7]; @@ -1178,7 +1176,7 @@ void determine_optimal_set_of_endpoint_formats_to_use( best_error, format_of_choice, combined_best_error, formats_of_choice); - for (int i = 0; i < bsd.block_mode_count; ++i) + for (unsigned int i = 0; i < bsd.block_mode_count; ++i) { if (qwt_errors[i] >= 1e29f) { @@ -1195,7 +1193,7 @@ void determine_optimal_set_of_endpoint_formats_to_use( errors_of_best_combination[i] = error_of_best_combination + qwt_errors[i]; } } - // code for the case where the block contains 3 partitions + // The block contains 3 partitions else if (partition_count == 3) { float combined_best_error[21][10]; @@ -1204,7 +1202,7 @@ void determine_optimal_set_of_endpoint_formats_to_use( three_partitions_find_best_combination_for_every_quantization_and_integer_count( best_error, format_of_choice, combined_best_error, formats_of_choice); - for (int i = 0; i < bsd.block_mode_count; ++i) + for (unsigned int i = 0; i < bsd.block_mode_count; ++i) { if (qwt_errors[i] >= 1e29f) { @@ -1221,7 +1219,7 @@ void determine_optimal_set_of_endpoint_formats_to_use( errors_of_best_combination[i] = error_of_best_combination + qwt_errors[i]; } } - // code for the case where the block contains 4 partitions + // The block contains 4 partitions else if (partition_count == 4) { float combined_best_error[21][13]; @@ -1230,7 +1228,7 @@ void determine_optimal_set_of_endpoint_formats_to_use( four_partitions_find_best_combination_for_every_quantization_and_integer_count( best_error, format_of_choice, combined_best_error, formats_of_choice); - for (int i = 0; i < bsd.block_mode_count; ++i) + for (unsigned int i = 0; i < bsd.block_mode_count; ++i) { if (qwt_errors[i] >= 1e29f) { @@ -1250,24 +1248,23 @@ void determine_optimal_set_of_endpoint_formats_to_use( // Go through the results and pick the best candidate modes int best_error_weights[TUNE_MAX_TRIAL_CANDIDATES]; - for (int i = 0; i < tune_candidate_limit; i++) + for (unsigned int i = 0; i < tune_candidate_limit; i++) { vint vbest_error_index(-1); vfloat vbest_ep_error(1e30f); vint lane_ids = vint::lane_id(); - for (int j = 0; j < bsd.block_mode_count; j += ASTCENC_SIMD_WIDTH) + for (unsigned int j = 0; j < bsd.block_mode_count; j += ASTCENC_SIMD_WIDTH) { vfloat err = vfloat(&errors_of_best_combination[j]); vmask mask1 = err < vbest_ep_error; - vmask mask2 = vint(&best_quant_levels[j]) > vint(4); + vmask mask2 = vint((int*)(&best_quant_levels[j])) > vint(4); vmask mask = mask1 & mask2; vbest_ep_error = select(vbest_ep_error, err, mask); vbest_error_index = select(vbest_error_index, lane_ids, mask); lane_ids = lane_ids + vint(ASTCENC_SIMD_WIDTH); } - // Pick best mode from the SIMD result. If multiple SIMD lanes have - // the best score, pick the one with the lowest index. + // Pick best mode from the SIMD result, using lowest matching index to ensure invariance vmask lanes_min_error = vbest_ep_error == hmin(vbest_ep_error); vbest_error_index = select(vint(0x7FFFFFFF), vbest_error_index, lanes_min_error); vbest_error_index = hmin(vbest_error_index); @@ -1280,22 +1277,31 @@ void determine_optimal_set_of_endpoint_formats_to_use( { errors_of_best_combination[best_error_index] = 1e30f; } + // Early-out if no more candidates are valid + else + { + break; + } } - for (int i = 0; i < tune_candidate_limit; i++) + for (unsigned int i = 0; i < tune_candidate_limit; i++) { + if (best_error_weights[i] < 0) + { + return i; + } + block_mode[i] = best_error_weights[i]; - if (block_mode[i] >= 0) + quant_level[i] = best_quant_levels[best_error_weights[i]]; + assert(quant_level[i] >= 0 && quant_level[i] < 21); + quant_level_mod[i] = best_quant_levels_mod[best_error_weights[i]]; + for (int j = 0; j < partition_count; j++) { - quant_level[i] = best_quant_levels[best_error_weights[i]]; - assert(quant_level[i] >= 0 && quant_level[i] < 21); - quant_level_mod[i] = best_quant_levels_mod[best_error_weights[i]]; - for (int j = 0; j < partition_count; j++) - { - partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j]; - } + partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j]; } } + + return tune_candidate_limit; } #endif diff --git a/lib/astc-encoder/Source/astcenc_quantization.cpp b/lib/astc-encoder/Source/astcenc_quantization.cpp index 6fda47158d..aac628ca33 100644 --- a/lib/astc-encoder/Source/astcenc_quantization.cpp +++ b/lib/astc-encoder/Source/astcenc_quantization.cpp @@ -533,27 +533,26 @@ const uint8_t color_unquant_tables[21][256] = { } }; -// The quant_mode_table[integercount/2][bits] gives us the quantization -// level for a given integer count and number of bits that the integer may fit -// into. This is needed for color encoding and decoding. +// The quant_mode_table[integercount/2][bits] gives us the quantization level for a given integer +// count and number of bits that the integer may fit into. int8_t quant_mode_table[17][128]; /* See header for documentation. */ -void build_quant_mode_table() +void init_quant_mode_table() { - for (int i = 0; i <= 16; i++) + for (unsigned int i = 0; i <= 16; i++) { - for (int j = 0; j < 128; j++) + for (unsigned int j = 0; j < 128; j++) { quant_mode_table[i][j] = -1; } } - for (int i = 0; i < 21; i++) + for (unsigned int i = 0; i < 21; i++) { - for (int j = 1; j <= 16; j++) + for (unsigned int j = 1; j <= 16; j++) { - int p = get_ise_sequence_bitcount(2 * j, (quant_method)i); + unsigned int p = get_ise_sequence_bitcount(2 * j, (quant_method)i); if (p < 128) { quant_mode_table[j][p] = i; @@ -564,7 +563,7 @@ void build_quant_mode_table() for (int i = 0; i <= 16; i++) { int largest_value_so_far = -1; - for (int j = 0; j < 128; j++) + for (unsigned int j = 0; j < 128; j++) { if (quant_mode_table[i][j] > largest_value_so_far) { diff --git a/lib/astc-encoder/Source/astcenc_symbolic_physical.cpp b/lib/astc-encoder/Source/astcenc_symbolic_physical.cpp index fe9606f8dc..c42b5d50ac 100644 --- a/lib/astc-encoder/Source/astcenc_symbolic_physical.cpp +++ b/lib/astc-encoder/Source/astcenc_symbolic_physical.cpp @@ -21,13 +21,13 @@ #include "astcenc_internal.h" -#include +#include /** * @brief Write up to 8 bits at an arbitrary bit offset. * - * The stored value is at most 8 bits, but can be stored at an offset of - * between 0 and 7 bits so may span two separate bytes in memory. + * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so + * may span two separate bytes in memory. * * @param value The value to write. * @param bitcount The number of bits to write, starting from LSB. @@ -57,8 +57,8 @@ static inline void write_bits( /** * @brief Read up to 8 bits at an arbitrary bit offset. * - * The stored value is at most 8 bits, but can be stored at an offset of - * between 0 and 7 bits so may span two separate bytes in memory. + * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so may + * span two separate bytes in memory. * * @param bitcount The number of bits to read. * @param bitoffset The bit offset to read from, between 0 and 7. @@ -101,19 +101,17 @@ void symbolic_to_physical( const symbolic_compressed_block& scb, physical_compressed_block& pcb ) { - if (scb.block_mode == -2) + // Constant color block using UNORM16 colors + if (scb.block_type == SYM_BTYPE_CONST_U16) { - // UNORM16 constant-color block. - // This encodes separate constant-color blocks. There is currently - // no attempt to coalesce them into larger void-extents. - + // There is currently no attempt to coalesce larger void-extents static const uint8_t cbytes[8] { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; - for (int i = 0; i < 8; i++) + for (unsigned int i = 0; i < 8; i++) { pcb.data[i] = cbytes[i]; } - for (int i = 0; i < 4; i++) + for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++) { pcb.data[2 * i + 8] = scb.constant_color[i] & 0xFF; pcb.data[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF; @@ -122,19 +120,17 @@ void symbolic_to_physical( return; } - if (scb.block_mode == -1) + // Constant color block using FP16 colors + if (scb.block_type == SYM_BTYPE_CONST_F16) { - // FP16 constant-color block. - // This encodes separate constant-color blocks. There is currently - // no attempt to coalesce them into larger void-extents. - + // There is currently no attempt to coalesce larger void-extents static const uint8_t cbytes[8] { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; - for (int i = 0; i < 8; i++) + for (unsigned int i = 0; i < 8; i++) { pcb.data[i] = cbytes[i]; } - for (int i = 0; i < 4; i++) + for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++) { pcb.data[2 * i + 8] = scb.constant_color[i] & 0xFF; pcb.data[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF; @@ -143,30 +139,21 @@ void symbolic_to_physical( return; } - int partition_count = scb.partition_count; + unsigned int partition_count = scb.partition_count; - // First, compress the weights. + // Compress the weights. // They are encoded as an ordinary integer-sequence, then bit-reversed - uint8_t weightbuf[16]; - for (int i = 0; i < 16; i++) - { - weightbuf[i] = 0; - } - - const decimation_table *const *dts = bsd.decimation_tables; + uint8_t weightbuf[16] { 0 }; - const int packed_index = bsd.block_mode_packed_index[scb.block_mode]; - assert(packed_index >= 0 && packed_index < bsd.block_mode_count); - const block_mode& bm = bsd.block_modes[packed_index]; - - int weight_count = dts[bm.decimation_mode]->weight_count; - quant_method weight_quant_method = (quant_method)bm.quant_mode; + const auto& bm = bsd.get_block_mode(scb.block_mode); + const auto& di = bsd.get_decimation_info(bm.decimation_mode); + int weight_count = di.weight_count; + quant_method weight_quant_method = bm.get_weight_quant_mode(); int is_dual_plane = bm.is_dual_plane; int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count; - int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, - (quant_method)weight_quant_method); + int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, weight_quant_method); if (is_dual_plane) { @@ -174,7 +161,7 @@ void symbolic_to_physical( for (int i = 0; i < weight_count; i++) { weights[2 * i] = scb.weights[i]; - weights[2 * i + 1] = scb.weights[i + PLANE2_WEIGHTS_OFFSET]; + weights[2 * i + 1] = scb.weights[i + WEIGHTS_PLANE2_OFFSET]; } encode_ise(weight_quant_method, real_weight_count, weights, weightbuf, 0); } @@ -197,18 +184,18 @@ void symbolic_to_physical( if (partition_count > 1) { write_bits(scb.partition_index, 6, 13, pcb.data); - write_bits(scb.partition_index >> 6, PARTITION_BITS - 6, 19, pcb.data); + write_bits(scb.partition_index >> 6, PARTITION_INDEX_BITS - 6, 19, pcb.data); if (scb.color_formats_matched) { - write_bits(scb.color_formats[0] << 2, 6, 13 + PARTITION_BITS, pcb.data); + write_bits(scb.color_formats[0] << 2, 6, 13 + PARTITION_INDEX_BITS, pcb.data); } else { // Check endpoint types for each partition to determine the lowest class present int low_class = 4; - for (int i = 0; i < partition_count; i++) + for (unsigned int i = 0; i < partition_count; i++) { int class_of_format = scb.color_formats[i] >> 2; low_class = astc::min(class_of_format, low_class); @@ -222,14 +209,14 @@ void symbolic_to_physical( int encoded_type = low_class + 1; int bitpos = 2; - for (int i = 0; i < partition_count; i++) + for (unsigned int i = 0; i < partition_count; i++) { int classbit_of_format = (scb.color_formats[i] >> 2) - low_class; encoded_type |= classbit_of_format << bitpos; bitpos++; } - for (int i = 0; i < partition_count; i++) + for (unsigned int i = 0; i < partition_count; i++) { int lowbits_of_format = scb.color_formats[i] & 3; encoded_type |= lowbits_of_format << bitpos; @@ -240,7 +227,7 @@ void symbolic_to_physical( int encoded_type_highpart = encoded_type >> 6; int encoded_type_highpart_size = (3 * partition_count) - 4; int encoded_type_highpart_pos = 128 - bits_for_weights - encoded_type_highpart_size; - write_bits(encoded_type_lowpart, 6, 13 + PARTITION_BITS, pcb.data); + write_bits(encoded_type_lowpart, 6, 13 + PARTITION_INDEX_BITS, pcb.data); write_bits(encoded_type_highpart, encoded_type_highpart_size, encoded_type_highpart_pos, pcb.data); below_weights_pos -= encoded_type_highpart_size; } @@ -259,9 +246,10 @@ void symbolic_to_physical( // Encode the color components uint8_t values_to_encode[32]; int valuecount_to_encode = 0; - for (int i = 0; i < scb.partition_count; i++) + for (unsigned int i = 0; i < scb.partition_count; i++) { int vals = 2 * (scb.color_formats[i] >> 2) + 2; + assert(vals <= 8); for (int j = 0; j < vals; j++) { values_to_encode[j + valuecount_to_encode] = scb.color_values[i][j]; @@ -269,7 +257,8 @@ void symbolic_to_physical( valuecount_to_encode += vals; } - encode_ise((quant_method)scb.color_quant_level, valuecount_to_encode, values_to_encode, pcb.data, (scb.partition_count == 1 ? 17 : 19 + PARTITION_BITS)); + encode_ise(scb.get_color_quant_mode(), valuecount_to_encode, values_to_encode, pcb.data, + scb.partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS); } /* See header for documentation. */ @@ -280,10 +269,7 @@ void physical_to_symbolic( ) { uint8_t bswapped[16]; - scb.error_block = 0; - - // Fetch the decimation tables - const decimation_table *const *dts = bsd.decimation_tables; + scb.block_type = SYM_BTYPE_NONCONST; // Extract header fields int block_mode = read_bits(11, 0, pcb.data); @@ -294,11 +280,11 @@ void physical_to_symbolic( // Check what format the data has if (block_mode & 0x200) { - scb.block_mode = -1; // floating-point + scb.block_type = SYM_BTYPE_CONST_F16; } else { - scb.block_mode = -2; // unorm16. + scb.block_type = SYM_BTYPE_CONST_U16; } scb.partition_count = 0; @@ -314,7 +300,7 @@ void physical_to_symbolic( int rsvbits = read_bits(2, 10, pcb.data); if (rsvbits != 3) { - scb.error_block = 1; + scb.block_type = SYM_BTYPE_ERROR; } int vx_low_s = read_bits(8, 12, pcb.data) | (read_bits(5, 12 + 8, pcb.data) << 8); @@ -326,7 +312,7 @@ void physical_to_symbolic( if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones) { - scb.error_block = 1; + scb.block_type = SYM_BTYPE_ERROR; } } else @@ -343,23 +329,24 @@ void physical_to_symbolic( if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_p >= vx_high_p) && !all_ones) { - scb.error_block = 1; + scb.block_type = SYM_BTYPE_ERROR; } } return; } - const int packed_index = bsd.block_mode_packed_index[block_mode]; - if (packed_index < 0) + unsigned int packed_index = bsd.block_mode_packed_index[block_mode]; + if (packed_index == BLOCK_BAD_BLOCK_MODE) { - scb.error_block = 1; + scb.block_type = SYM_BTYPE_ERROR; return; } - assert(packed_index >= 0 && packed_index < bsd.block_mode_count); - const struct block_mode& bm = bsd.block_modes[packed_index]; - int weight_count = dts[bm.decimation_mode]->weight_count; + const auto& bm = bsd.get_block_mode(block_mode); + const auto& di = bsd.get_decimation_info(bm.decimation_mode); + + int weight_count = di.weight_count; quant_method weight_quant_method = (quant_method)bm.quant_mode; int is_dual_plane = bm.is_dual_plane; @@ -386,7 +373,7 @@ void physical_to_symbolic( for (int i = 0; i < weight_count; i++) { scb.weights[i] = indices[2 * i]; - scb.weights[i + PLANE2_WEIGHTS_OFFSET] = indices[2 * i + 1]; + scb.weights[i + WEIGHTS_PLANE2_OFFSET] = indices[2 * i + 1]; } } else @@ -396,13 +383,13 @@ void physical_to_symbolic( if (is_dual_plane && partition_count == 4) { - scb.error_block = 1; + scb.block_type = SYM_BTYPE_ERROR; } scb.color_formats_matched = 0; // Determine the format of each endpoint pair - int color_formats[4]; + int color_formats[BLOCK_MAX_PARTITIONS]; int encoded_type_highpart_size = 0; if (partition_count == 1) { @@ -413,7 +400,7 @@ void physical_to_symbolic( { encoded_type_highpart_size = (3 * partition_count) - 4; below_weights_pos -= encoded_type_highpart_size; - int encoded_type = read_bits(6, 13 + PARTITION_BITS, pcb.data) | (read_bits(encoded_type_highpart_size, below_weights_pos, pcb.data) << 6); + int encoded_type = read_bits(6, 13 + PARTITION_INDEX_BITS, pcb.data) | (read_bits(encoded_type_highpart_size, below_weights_pos, pcb.data) << 6); int baseclass = encoded_type & 0x3; if (baseclass == 0) { @@ -443,7 +430,7 @@ void physical_to_symbolic( bitpos += 2; } } - scb.partition_index = read_bits(6, 13, pcb.data) | (read_bits(PARTITION_BITS - 6, 19, pcb.data) << 6); + scb.partition_index = read_bits(6, 13, pcb.data) | (read_bits(PARTITION_INDEX_BITS - 6, 19, pcb.data) << 6); } for (int i = 0; i < partition_count; i++) @@ -461,11 +448,11 @@ void physical_to_symbolic( if (color_integer_count > 18) { - scb.error_block = 1; + scb.block_type = SYM_BTYPE_ERROR; } // Determine the color endpoint format to use - static const int color_bits_arr[5] { -1, 115 - 4, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS }; + static const int color_bits_arr[5] { -1, 115 - 4, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS }; int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size; if (is_dual_plane) { @@ -478,15 +465,15 @@ void physical_to_symbolic( } int color_quant_level = quant_mode_table[color_integer_count >> 1][color_bits]; - scb.color_quant_level = color_quant_level; - if (color_quant_level < 4) + scb.quant_mode = (quant_method)color_quant_level; + if (color_quant_level < QUANT_6) { - scb.error_block = 1; + scb.block_type = SYM_BTYPE_ERROR; } // Unpack the integer color values and assign to endpoints uint8_t values_to_decode[32]; - decode_ise((quant_method)color_quant_level, color_integer_count, pcb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_BITS)); + decode_ise((quant_method)color_quant_level, color_integer_count, pcb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS)); int valuecount_to_decode = 0; for (int i = 0; i < partition_count; i++) diff --git a/lib/astc-encoder/Source/astcenc_vecmathlib.h b/lib/astc-encoder/Source/astcenc_vecmathlib.h index 5c0f941da5..bcd2a51d76 100644 --- a/lib/astc-encoder/Source/astcenc_vecmathlib.h +++ b/lib/astc-encoder/Source/astcenc_vecmathlib.h @@ -148,7 +148,7 @@ * * @return The rounded value. */ -ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_8(int count) +ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_8(unsigned int count) { return count & ~(8 - 1); } @@ -160,7 +160,7 @@ ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_8(int count) * * @return The rounded value. */ -ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_4(int count) +ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_4(unsigned int count) { return count & ~(4 - 1); } @@ -174,7 +174,7 @@ ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_4(int count) * * @return The rounded value. */ -ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_vla(int count) +ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_vla(unsigned int count) { return count & ~(ASTCENC_SIMD_WIDTH - 1); } @@ -188,7 +188,7 @@ ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_vla(int count) * * @return The rounded value. */ -ASTCENC_SIMD_INLINE int round_up_to_simd_multiple_vla(int count) +ASTCENC_SIMD_INLINE unsigned int round_up_to_simd_multiple_vla(unsigned int count) { int multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH; return multiples * ASTCENC_SIMD_WIDTH; diff --git a/lib/astc-encoder/Source/astcenc_vecmathlib_neon_4.h b/lib/astc-encoder/Source/astcenc_vecmathlib_neon_4.h index 7600262757..5c215e07b6 100755 --- a/lib/astc-encoder/Source/astcenc_vecmathlib_neon_4.h +++ b/lib/astc-encoder/Source/astcenc_vecmathlib_neon_4.h @@ -148,8 +148,6 @@ struct vfloat4 /** * @brief Return a swizzled float 2. - * - * TODO: Implement using permutes. */ template ASTCENC_SIMD_INLINE vfloat4 swz() const { @@ -158,8 +156,6 @@ struct vfloat4 /** * @brief Return a swizzled float 3. - * - * TODO: Implement using permutes. */ template ASTCENC_SIMD_INLINE vfloat4 swz() const { @@ -168,8 +164,6 @@ struct vfloat4 /** * @brief Return a swizzled float 4. - * - * TODO: Implement using permutes. */ template ASTCENC_SIMD_INLINE vfloat4 swz() const { diff --git a/lib/astc-encoder/Source/astcenc_weight_align.cpp b/lib/astc-encoder/Source/astcenc_weight_align.cpp index 1edc1a2d00..5b27897ac8 100644 --- a/lib/astc-encoder/Source/astcenc_weight_align.cpp +++ b/lib/astc-encoder/Source/astcenc_weight_align.cpp @@ -22,26 +22,19 @@ * * This algorithm works as follows: * - we compute a complex number P as (cos s*i, sin s*i) for each weight, - * where i is the input value and s is a scaling factor based on the spacing - * between the weights. + * where i is the input value and s is a scaling factor based on the spacing between the weights. * - we then add together complex numbers for all the weights. * - we then compute the length and angle of the resulting sum. * * This should produce the following results: - * - perfect alignment results in a vector whose length is equal to the sum of - * lengths of all inputs + * - perfect alignment results in a vector whose length is equal to the sum of lengths of all inputs * - even distribution results in a vector of length 0. * - all samples identical results in perfect alignment for every scaling. * - * For each scaling factor within a given set, we compute an alignment factor - * from 0 to 1. This should then result in some scalings standing out as having - * particularly good alignment factors; we can use this to produce a set of - * candidate scale/shift values for various quantization levels; we should then - * actually try them and see what happens. - * - * Assuming N quantization steps, the scaling factor becomes s=2*PI*(N-1); we - * should probably have about 1 scaling factor for every 1/4 quantization step - * (perhaps 1/8 for low levels of quantization). + * For each scaling factor within a given set, we compute an alignment factor from 0 to 1. This + * should then result in some scalings standing out as having particularly good alignment factors; + * we can use this to produce a set of candidate scale/shift values for various quantization levels; + * we should then actually try them and see what happens. */ #include "astcenc_internal.h" @@ -51,36 +44,36 @@ #include #include -#define ANGULAR_STEPS 40 + +static constexpr unsigned int ANGULAR_STEPS { 40 }; + +// Store a reduced sin/cos table for 64 possible weight values; this causes slight quality loss +// compared to using sin() and cos() directly. Must be 2^N. +static constexpr unsigned int SINCOS_STEPS { 64 }; static_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0, "ANGULAR_STEPS must be multiple of ASTCENC_SIMD_WIDTH"); -static int max_angular_steps_needed_for_quant_level[13]; +static unsigned int max_angular_steps_needed_for_quant_level[13]; -// Yes, the next-to-last entry is supposed to have the value 33. This because -// the 32-weight mode leaves a double-sized hole in the middle of the weight -// space, so we are better off matching 33 weights than 32. -static const int quantization_steps_for_level[13] = { +// The next-to-last entry is supposed to have the value 33. This because the 32-weight mode leaves a +// double-sized hole in the middle of the weight space, so we are better off matching 33 weights. +static const unsigned int quantization_steps_for_level[13] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36 }; -// Store a reduced sin/cos table for 64 possible weight values; this causes -// slight quality loss compared to using sin() and cos() directly. Must be 2^N. -#define SINCOS_STEPS 64 - alignas(ASTCENC_VECALIGN) static float sin_table[SINCOS_STEPS][ANGULAR_STEPS]; alignas(ASTCENC_VECALIGN) static float cos_table[SINCOS_STEPS][ANGULAR_STEPS]; /* See header for documentation. */ void prepare_angular_tables() { - int max_angular_steps_needed_for_quant_steps[ANGULAR_STEPS + 1]; - for (int i = 0; i < ANGULAR_STEPS; i++) + unsigned int max_angular_steps_needed_for_quant_steps[ANGULAR_STEPS + 1]; + for (unsigned int i = 0; i < ANGULAR_STEPS; i++) { float angle_step = (float)(i + 1); - for (int j = 0; j < SINCOS_STEPS; j++) + for (unsigned int j = 0; j < SINCOS_STEPS; j++) { sin_table[j][i] = static_cast(sinf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast(j))); cos_table[j][i] = static_cast(cosf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast(j))); @@ -89,7 +82,7 @@ void prepare_angular_tables() max_angular_steps_needed_for_quant_steps[i + 1] = astc::min(i + 1, ANGULAR_STEPS - 1); } - for (int i = 0; i < 13; i++) + for (unsigned int i = 0; i < 13; i++) { max_angular_steps_needed_for_quant_level[i] = max_angular_steps_needed_for_quant_steps[quantization_steps_for_level[i]]; } @@ -105,19 +98,19 @@ void prepare_angular_tables() * @param[out] offsets The output angular offsets array. */ static void compute_angular_offsets( - int sample_count, + unsigned int sample_count, const float* samples, const float* sample_weights, - int max_angular_steps, + unsigned int max_angular_steps, float* offsets ) { promise(sample_count > 0); promise(max_angular_steps > 0); - alignas(ASTCENC_VECALIGN) int isamplev[MAX_WEIGHTS_PER_BLOCK] { 0 }; + alignas(ASTCENC_VECALIGN) int isamplev[BLOCK_MAX_WEIGHTS] { 0 }; // Precompute isample; arrays are always allocated 64 elements long - for (int i = 0; i < sample_count; i += ASTCENC_SIMD_WIDTH) + for (unsigned int i = 0; i < sample_count; i += ASTCENC_SIMD_WIDTH) { // Add 2^23 and interpreting bits extracts round-to-nearest int vfloat sample = loada(samples + i) * (SINCOS_STEPS - 1.0f) + vfloat(12582912.0f); @@ -128,12 +121,12 @@ static void compute_angular_offsets( // Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max vfloat mult = vfloat(1.0f / (2.0f * astc::PI)); - for (int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH) + for (unsigned int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH) { vfloat anglesum_x = vfloat::zero(); vfloat anglesum_y = vfloat::zero(); - for (int j = 0; j < sample_count; j++) + for (unsigned int j = 0; j < sample_count; j++) { int isample = isamplev[j]; vfloat sample_weightv(sample_weights[j]); @@ -167,11 +160,11 @@ static void compute_angular_offsets( * @param[out] cut_high_weight_error Per angular step, the high weight cut error. */ static void compute_lowest_and_highest_weight( - int sample_count, + unsigned int sample_count, const float* samples, const float* sample_weights, - int max_angular_steps, - int max_quant_steps, + unsigned int max_angular_steps, + unsigned int max_quant_steps, const float* offsets, int* lowest_weight, int* weight_span, @@ -185,7 +178,7 @@ static void compute_lowest_and_highest_weight( vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f); // Arrays are ANGULAR_STEPS long, so always safe to run full vectors - for (int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH) + for (unsigned int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH) { vint minidx(128); vint maxidx(-128); @@ -194,7 +187,7 @@ static void compute_lowest_and_highest_weight( vfloat cut_high_weight_err = vfloat::zero(); vfloat offset = loada(&offsets[sp]); - for (int j = 0; j < sample_count; ++j) + for (unsigned int j = 0; j < sample_count; ++j) { vfloat wt = load1(&sample_weights[j]); vfloat sval = load1(&samples[j]) * rcp_stepsize - offset; @@ -232,9 +225,8 @@ static void compute_lowest_and_highest_weight( storea(minidx, &lowest_weight[sp]); storea(span, &weight_span[sp]); - // The cut_(lowest/highest)_weight_error indicate the error that - // results from forcing samples that should have had the weight value - // one step (up/down). + // The cut_(lowest/highest)_weight_error indicate the error that results from forcing + // samples that should have had the weight value one step (up/down). vfloat ssize = 1.0f / rcp_stepsize; vfloat errscale = ssize * ssize; storea(errval * errscale, &error[sp]); @@ -256,17 +248,17 @@ static void compute_lowest_and_highest_weight( * @param[out] high_value Per angular step, the highest weight value. */ static void compute_angular_endpoints_for_quant_levels( - int sample_count, + unsigned int sample_count, const float* samples, const float* sample_weights, - int max_quant_level, + unsigned int max_quant_level, float low_value[12], float high_value[12] ) { - int max_quant_steps = quantization_steps_for_level[max_quant_level]; + unsigned int max_quant_steps = quantization_steps_for_level[max_quant_level]; alignas(ASTCENC_VECALIGN) float angular_offsets[ANGULAR_STEPS]; - int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quant_level]; + unsigned int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quant_level]; compute_angular_offsets(sample_count, samples, sample_weights, max_angular_steps, angular_offsets); alignas(ASTCENC_VECALIGN) int32_t lowest_weight[ANGULAR_STEPS]; @@ -280,15 +272,14 @@ static void compute_angular_endpoints_for_quant_levels( angular_offsets, lowest_weight, weight_span, error, cut_low_weight_error, cut_high_weight_error); - // For each quantization level, find the best error terms. Use packed - // vectors so data-dependent branches can become selects. This involves - // some integer to float casts, but the values are small enough so they - // never round the wrong way. + // For each quantization level, find the best error terms. Use packed vectors so data-dependent + // branches can become selects. This involves some integer to float casts, but the values are + // small enough so they never round the wrong way. vfloat4 best_results[40]; // Initialize the array to some safe defaults promise(max_quant_steps > 0); - for (int i = 0; i < (max_quant_steps + 4); i++) + for (unsigned int i = 0; i < (max_quant_steps + 4); i++) { // Lane<0> = Best error // Lane<1> = Best scale; -1 indicates no solution found @@ -297,7 +288,7 @@ static void compute_angular_endpoints_for_quant_levels( } promise(max_angular_steps > 0); - for (int i = 0; i < max_angular_steps; i++) + for (unsigned int i = 0; i < max_angular_steps; i++) { int idx_span = weight_span[i]; float error_cut_low = error[i] + cut_low_weight_error[i]; @@ -331,9 +322,9 @@ static void compute_angular_endpoints_for_quant_levels( best_results[idx_span - 2] = select(best_result, new_result, mask4); } - // If we get a better error for lower sample count then use the lower - // sample count's error for the higher sample count as well. - for (int i = 3; i <= max_quant_steps; i++) + // If we get a better error for lower sample count then use the lower sample count's error for + // the higher sample count as well. + for (unsigned int i = 3; i <= max_quant_steps; i++) { vfloat4 result = best_results[i]; vfloat4 prev_result = best_results[i - 1]; @@ -341,15 +332,13 @@ static void compute_angular_endpoints_for_quant_levels( best_results[i] = select(result, prev_result, mask); } - promise(max_quant_level >= 0); - for (int i = 0; i <= max_quant_level; i++) + for (unsigned int i = 0; i <= max_quant_level; i++) { - int q = quantization_steps_for_level[i]; + unsigned int q = quantization_steps_for_level[i]; int bsi = (int)best_results[q].lane<1>(); // Did we find anything? - // TODO: Can we do better than bsi = 0 here. We should at least - // propagate an error (and move the printf into the CLI). + // TODO: Can we do better than bsi = 0 here. We should at least propagate an error? #if !defined(NDEBUG) if (bsi < 0) { @@ -376,14 +365,14 @@ void compute_angular_endpoints_1plane( const block_size_descriptor& bsd, const float* decimated_quantized_weights, const float* decimated_weights, - float low_value[MAX_WEIGHT_MODES], - float high_value[MAX_WEIGHT_MODES] + float low_value[WEIGHTS_MAX_BLOCK_MODES], + float high_value[WEIGHTS_MAX_BLOCK_MODES] ) { - float low_values[MAX_DECIMATION_MODES][12]; - float high_values[MAX_DECIMATION_MODES][12]; + float low_values[WEIGHTS_MAX_DECIMATION_MODES][12]; + float high_values[WEIGHTS_MAX_DECIMATION_MODES][12]; promise(bsd.decimation_mode_count > 0); - for (int i = 0; i < bsd.decimation_mode_count; i++) + for (unsigned int i = 0; i < bsd.decimation_mode_count; i++) { const decimation_mode& dm = bsd.decimation_modes[i]; if (dm.maxprec_1plane < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit) @@ -394,13 +383,13 @@ void compute_angular_endpoints_1plane( int sample_count = bsd.decimation_tables[i]->weight_count; compute_angular_endpoints_for_quant_levels( sample_count, - decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK, - decimated_weights + i * MAX_WEIGHTS_PER_BLOCK, + decimated_quantized_weights + i * BLOCK_MAX_WEIGHTS, + decimated_weights + i * BLOCK_MAX_WEIGHTS, dm.maxprec_1plane, low_values[i], high_values[i]); } promise(bsd.block_mode_count > 0); - for (int i = 0; i < bsd.block_mode_count; ++i) + for (unsigned int i = 0; i < bsd.block_mode_count; ++i) { const block_mode& bm = bsd.block_modes[i]; if (bm.is_dual_plane || (only_always && !bm.percentile_always) || !bm.percentile_hit) @@ -408,8 +397,8 @@ void compute_angular_endpoints_1plane( continue; } - int quant_mode = bm.quant_mode; - int decim_mode = bm.decimation_mode; + unsigned int quant_mode = bm.quant_mode; + unsigned int decim_mode = bm.decimation_mode; low_value[i] = low_values[decim_mode][quant_mode]; high_value[i] = high_values[decim_mode][quant_mode]; @@ -421,18 +410,18 @@ void compute_angular_endpoints_2planes( const block_size_descriptor& bsd, const float* decimated_quantized_weights, const float* decimated_weights, - float low_value1[MAX_WEIGHT_MODES], - float high_value1[MAX_WEIGHT_MODES], - float low_value2[MAX_WEIGHT_MODES], - float high_value2[MAX_WEIGHT_MODES] + float low_value1[WEIGHTS_MAX_BLOCK_MODES], + float high_value1[WEIGHTS_MAX_BLOCK_MODES], + float low_value2[WEIGHTS_MAX_BLOCK_MODES], + float high_value2[WEIGHTS_MAX_BLOCK_MODES] ) { - float low_values1[MAX_DECIMATION_MODES][12]; - float high_values1[MAX_DECIMATION_MODES][12]; - float low_values2[MAX_DECIMATION_MODES][12]; - float high_values2[MAX_DECIMATION_MODES][12]; + float low_values1[WEIGHTS_MAX_DECIMATION_MODES][12]; + float high_values1[WEIGHTS_MAX_DECIMATION_MODES][12]; + float low_values2[WEIGHTS_MAX_DECIMATION_MODES][12]; + float high_values2[WEIGHTS_MAX_DECIMATION_MODES][12]; promise(bsd.decimation_mode_count > 0); - for (int i = 0; i < bsd.decimation_mode_count; i++) + for (unsigned int i = 0; i < bsd.decimation_mode_count; i++) { const decimation_mode& dm = bsd.decimation_modes[i]; if (dm.maxprec_2planes < 0 || !dm.percentile_hit) @@ -440,23 +429,23 @@ void compute_angular_endpoints_2planes( continue; } - int sample_count = bsd.decimation_tables[i]->weight_count; + unsigned int sample_count = bsd.decimation_tables[i]->weight_count; compute_angular_endpoints_for_quant_levels( sample_count, - decimated_quantized_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK, - decimated_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK, + decimated_quantized_weights + 2 * i * BLOCK_MAX_WEIGHTS, + decimated_weights + 2 * i * BLOCK_MAX_WEIGHTS, dm.maxprec_2planes, low_values1[i], high_values1[i]); compute_angular_endpoints_for_quant_levels( sample_count, - decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, - decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, + decimated_quantized_weights + (2 * i + 1) * BLOCK_MAX_WEIGHTS, + decimated_weights + (2 * i + 1) * BLOCK_MAX_WEIGHTS, dm.maxprec_2planes, low_values2[i], high_values2[i]); } promise(bsd.block_mode_count > 0); - for (int i = 0; i < bsd.block_mode_count; ++i) + for (unsigned int i = 0; i < bsd.block_mode_count; ++i) { const block_mode& bm = bsd.block_modes[i]; if (!bm.is_dual_plane || !bm.percentile_hit) @@ -464,8 +453,8 @@ void compute_angular_endpoints_2planes( continue; } - int quant_mode = bm.quant_mode; - int decim_mode = bm.decimation_mode; + unsigned int quant_mode = bm.quant_mode; + unsigned int decim_mode = bm.decimation_mode; low_value1[i] = low_values1[decim_mode][quant_mode]; high_value1[i] = high_values1[decim_mode][quant_mode]; diff --git a/lib/astc-encoder/Source/cmake_core.cmake b/lib/astc-encoder/Source/cmake_core.cmake index 6704caf692..a4a4a3c746 100644 --- a/lib/astc-encoder/Source/cmake_core.cmake +++ b/lib/astc-encoder/Source/cmake_core.cmake @@ -35,7 +35,6 @@ add_library(astc${CODEC}-${ISA_SIMD}-static astcenc_ideal_endpoints_and_weights.cpp astcenc_image.cpp astcenc_integer_sequence.cpp - astcenc_kmeans_partitioning.cpp astcenc_mathlib.cpp astcenc_mathlib_softfloat.cpp astcenc_partition_tables.cpp