@@ -110,8 +110,9 @@ public class AESFastEngine
110110 0x2f , 0x5e , 0xbc , 0x63 , 0xc6 , 0x97 , 0x35 , 0x6a , 0xd4 , 0xb3 , 0x7d , 0xfa , 0xef , 0xc5 , 0x91 };
111111
112112 // precomputation tables of calculations for rounds
113- private static final int [] T0 =
113+ private static final int [] T =
114114 {
115+ // T0
115116 0xa56363c6 , 0x847c7cf8 , 0x997777ee , 0x8d7b7bf6 , 0x0df2f2ff ,
116117 0xbd6b6bd6 , 0xb16f6fde , 0x54c5c591 , 0x50303060 , 0x03010102 ,
117118 0xa96767ce , 0x7d2b2b56 , 0x19fefee7 , 0x62d7d7b5 , 0xe6abab4d ,
@@ -163,10 +164,9 @@ public class AESFastEngine
163164 0x8f8c8c03 , 0xf8a1a159 , 0x80898909 , 0x170d0d1a , 0xdabfbf65 ,
164165 0x31e6e6d7 , 0xc6424284 , 0xb86868d0 , 0xc3414182 , 0xb0999929 ,
165166 0x772d2d5a , 0x110f0f1e , 0xcbb0b07b , 0xfc5454a8 , 0xd6bbbb6d ,
166- 0x3a16162c };
167+ 0x3a16162c ,
167168
168- private static final int [] T1 =
169- {
169+ // T1
170170 0x6363c6a5 , 0x7c7cf884 , 0x7777ee99 , 0x7b7bf68d , 0xf2f2ff0d ,
171171 0x6b6bd6bd , 0x6f6fdeb1 , 0xc5c59154 , 0x30306050 , 0x01010203 ,
172172 0x6767cea9 , 0x2b2b567d , 0xfefee719 , 0xd7d7b562 , 0xabab4de6 ,
@@ -218,10 +218,9 @@ public class AESFastEngine
218218 0x8c8c038f , 0xa1a159f8 , 0x89890980 , 0x0d0d1a17 , 0xbfbf65da ,
219219 0xe6e6d731 , 0x424284c6 , 0x6868d0b8 , 0x414182c3 , 0x999929b0 ,
220220 0x2d2d5a77 , 0x0f0f1e11 , 0xb0b07bcb , 0x5454a8fc , 0xbbbb6dd6 ,
221- 0x16162c3a };
221+ 0x16162c3a ,
222222
223- private static final int [] T2 =
224- {
223+ // T2
225224 0x63c6a563 , 0x7cf8847c , 0x77ee9977 , 0x7bf68d7b , 0xf2ff0df2 ,
226225 0x6bd6bd6b , 0x6fdeb16f , 0xc59154c5 , 0x30605030 , 0x01020301 ,
227226 0x67cea967 , 0x2b567d2b , 0xfee719fe , 0xd7b562d7 , 0xab4de6ab ,
@@ -273,10 +272,9 @@ public class AESFastEngine
273272 0x8c038f8c , 0xa159f8a1 , 0x89098089 , 0x0d1a170d , 0xbf65dabf ,
274273 0xe6d731e6 , 0x4284c642 , 0x68d0b868 , 0x4182c341 , 0x9929b099 ,
275274 0x2d5a772d , 0x0f1e110f , 0xb07bcbb0 , 0x54a8fc54 , 0xbb6dd6bb ,
276- 0x162c3a16 };
275+ 0x162c3a16 ,
277276
278- private static final int [] T3 =
279- {
277+ // T3
280278 0xc6a56363 , 0xf8847c7c , 0xee997777 , 0xf68d7b7b , 0xff0df2f2 ,
281279 0xd6bd6b6b , 0xdeb16f6f , 0x9154c5c5 , 0x60503030 , 0x02030101 ,
282280 0xcea96767 , 0x567d2b2b , 0xe719fefe , 0xb562d7d7 , 0x4de6abab ,
@@ -330,8 +328,9 @@ public class AESFastEngine
330328 0x5a772d2d , 0x1e110f0f , 0x7bcbb0b0 , 0xa8fc5454 , 0x6dd6bbbb ,
331329 0x2c3a1616 };
332330
333- private static final int [] Tinv0 =
331+ private static final int [] Tinv =
334332 {
333+ // Tinv0
335334 0x50a7f451 , 0x5365417e , 0xc3a4171a , 0x965e273a , 0xcb6bab3b ,
336335 0xf1459d1f , 0xab58faac , 0x9303e34b , 0x55fa3020 , 0xf66d76ad ,
337336 0x9176cc88 , 0x254c02f5 , 0xfcd7e54f , 0xd7cb2ac5 , 0x80443526 ,
@@ -383,10 +382,9 @@ public class AESFastEngine
383382 0x81f3afca , 0x3ec468b9 , 0x2c342438 , 0x5f40a3c2 , 0x72c31d16 ,
384383 0x0c25e2bc , 0x8b493c28 , 0x41950dff , 0x7101a839 , 0xdeb30c08 ,
385384 0x9ce4b4d8 , 0x90c15664 , 0x6184cb7b , 0x70b632d5 , 0x745c6c48 ,
386- 0x4257b8d0 };
385+ 0x4257b8d0 ,
387386
388- private static final int [] Tinv1 =
389- {
387+ // Tinv1
390388 0xa7f45150 , 0x65417e53 , 0xa4171ac3 , 0x5e273a96 , 0x6bab3bcb ,
391389 0x459d1ff1 , 0x58faacab , 0x03e34b93 , 0xfa302055 , 0x6d76adf6 ,
392390 0x76cc8891 , 0x4c02f525 , 0xd7e54ffc , 0xcb2ac5d7 , 0x44352680 ,
@@ -438,10 +436,9 @@ public class AESFastEngine
438436 0xf3afca81 , 0xc468b93e , 0x3424382c , 0x40a3c25f , 0xc31d1672 ,
439437 0x25e2bc0c , 0x493c288b , 0x950dff41 , 0x01a83971 , 0xb30c08de ,
440438 0xe4b4d89c , 0xc1566490 , 0x84cb7b61 , 0xb632d570 , 0x5c6c4874 ,
441- 0x57b8d042 };
439+ 0x57b8d042 ,
442440
443- private static final int [] Tinv2 =
444- {
441+ // Tinv2
445442 0xf45150a7 , 0x417e5365 , 0x171ac3a4 , 0x273a965e , 0xab3bcb6b ,
446443 0x9d1ff145 , 0xfaacab58 , 0xe34b9303 , 0x302055fa , 0x76adf66d ,
447444 0xcc889176 , 0x02f5254c , 0xe54ffcd7 , 0x2ac5d7cb , 0x35268044 ,
@@ -493,10 +490,9 @@ public class AESFastEngine
493490 0xafca81f3 , 0x68b93ec4 , 0x24382c34 , 0xa3c25f40 , 0x1d1672c3 ,
494491 0xe2bc0c25 , 0x3c288b49 , 0x0dff4195 , 0xa8397101 , 0x0c08deb3 ,
495492 0xb4d89ce4 , 0x566490c1 , 0xcb7b6184 , 0x32d570b6 , 0x6c48745c ,
496- 0xb8d04257 };
493+ 0xb8d04257 ,
497494
498- private static final int [] Tinv3 =
499- {
495+ // Tinv3
500496 0x5150a7f4 , 0x7e536541 , 0x1ac3a417 , 0x3a965e27 , 0x3bcb6bab ,
501497 0x1ff1459d , 0xacab58fa , 0x4b9303e3 , 0x2055fa30 , 0xadf66d76 ,
502498 0x889176cc , 0xf5254c02 , 0x4ffcd7e5 , 0xc5d7cb2a , 0x26804435 ,
@@ -804,37 +800,49 @@ private void packBlock(
804800 private void encryptBlock (int [][] KW )
805801 {
806802 int r , r0 , r1 , r2 , r3 ;
807-
808- C0 ^= KW [0 ][0 ];
809- C1 ^= KW [0 ][1 ];
810- C2 ^= KW [0 ][2 ];
811- C3 ^= KW [0 ][3 ];
812803
804+ // Registerising helps fast engine encrypt on x64, but hurts decrypt and AESEngine
805+ int C0 = this .C0 ^= KW [0 ][0 ];
806+ int C1 = this .C1 ^= KW [0 ][1 ];
807+ int C2 = this .C2 ^= KW [0 ][2 ];
808+ int C3 = this .C3 ^= KW [0 ][3 ];
809+
810+ /*
811+ * Fast engine has precomputed rotr(T0, 8/16/24) tables T1/T2/T3.
812+ *
813+ * Placing all precomputes in one array requires offsets additions for 8/16/24 rotations but
814+ * avoids additional array range checks on 3 more arrays (which on HotSpot are more
815+ * expensive than the offset additions).
816+ */
813817 r = 1 ;
814818 while (r < ROUNDS - 1 )
815819 {
816- r0 = T0 [C0 &255 ] ^ T1 [( C1 >>8 )&255 ] ^ T2 [( C2 >>16 )&255 ] ^ T3 [( C3 >>24 )&255 ] ^ KW [r ][0 ];
817- r1 = T0 [C1 &255 ] ^ T1 [( C2 >>8 )&255 ] ^ T2 [( C3 >>16 )&255 ] ^ T3 [( C0 >>24 )&255 ] ^ KW [r ][1 ];
818- r2 = T0 [C2 &255 ] ^ T1 [( C3 >>8 )&255 ] ^ T2 [( C0 >>16 )&255 ] ^ T3 [( C1 >>24 )&255 ] ^ KW [r ][2 ];
819- r3 = T0 [C3 &255 ] ^ T1 [( C0 >>8 )&255 ] ^ T2 [( C1 >>16 )&255 ] ^ T3 [( C2 >>24 )&255 ] ^ KW [r ++][3 ];
820- C0 = T0 [r0 &255 ] ^ T1 [( r1 >>8 )&255 ] ^ T2 [( r2 >>16 )&255 ] ^ T3 [( r3 >>24 )&255 ] ^ KW [r ][0 ];
821- C1 = T0 [r1 &255 ] ^ T1 [( r2 >>8 )&255 ] ^ T2 [( r3 >>16 )&255 ] ^ T3 [( r0 >>24 )&255 ] ^ KW [r ][1 ];
822- C2 = T0 [r2 &255 ] ^ T1 [( r3 >>8 )&255 ] ^ T2 [( r0 >>16 )&255 ] ^ T3 [( r1 >>24 )&255 ] ^ KW [r ][2 ];
823- C3 = T0 [r3 &255 ] ^ T1 [( r0 >>8 )&255 ] ^ T2 [( r1 >>16 )&255 ] ^ T3 [( r2 >>24 )&255 ] ^ KW [r ++][3 ];
820+ r0 = T [C0 &255 ] ^ T [(( C1 >>8 )&255 )| 256 ] ^ T [(( C2 >>16 )&255 )| 512 ] ^ T [(( C3 >>24 )&255 )| 768 ] ^ KW [r ][0 ];
821+ r1 = T [C1 &255 ] ^ T [(( C2 >>8 )&255 )| 256 ] ^ T [(( C3 >>16 )&255 )| 512 ] ^ T [(( C0 >>24 )&255 )| 768 ] ^ KW [r ][1 ];
822+ r2 = T [C2 &255 ] ^ T [(( C3 >>8 )&255 )| 256 ] ^ T [(( C0 >>16 )&255 )| 512 ] ^ T [(( C1 >>24 )&255 )| 768 ] ^ KW [r ][2 ];
823+ r3 = T [C3 &255 ] ^ T [(( C0 >>8 )&255 )| 256 ] ^ T [(( C1 >>16 )&255 )| 512 ] ^ T [(( C2 >>24 )&255 )| 768 ] ^ KW [r ++][3 ];
824+ C0 = T [r0 &255 ] ^ T [(( r1 >>8 )&255 )| 256 ] ^ T [(( r2 >>16 )&255 )| 512 ] ^ T [(( r3 >>24 )&255 )| 768 ] ^ KW [r ][0 ];
825+ C1 = T [r1 &255 ] ^ T [(( r2 >>8 )&255 )| 256 ] ^ T [(( r3 >>16 )&255 )| 512 ] ^ T [(( r0 >>24 )&255 )| 768 ] ^ KW [r ][1 ];
826+ C2 = T [r2 &255 ] ^ T [(( r3 >>8 )&255 )| 256 ] ^ T [(( r0 >>16 )&255 )| 512 ] ^ T [(( r1 >>24 )&255 )| 768 ] ^ KW [r ][2 ];
827+ C3 = T [r3 &255 ] ^ T [(( r0 >>8 )&255 )| 256 ] ^ T [(( r1 >>16 )&255 )| 512 ] ^ T [(( r2 >>24 )&255 )| 768 ] ^ KW [r ++][3 ];
824828 }
825829
826- r0 = T0 [C0 &255 ] ^ T1 [( C1 >>8 )&255 ] ^ T2 [( C2 >>16 )&255 ] ^ T3 [( C3 >>24 )&255 ] ^ KW [r ][0 ];
827- r1 = T0 [C1 &255 ] ^ T1 [( C2 >>8 )&255 ] ^ T2 [( C3 >>16 )&255 ] ^ T3 [( C0 >>24 )&255 ] ^ KW [r ][1 ];
828- r2 = T0 [C2 &255 ] ^ T1 [( C3 >>8 )&255 ] ^ T2 [( C0 >>16 )&255 ] ^ T3 [( C1 >>24 )&255 ] ^ KW [r ][2 ];
829- r3 = T0 [C3 &255 ] ^ T1 [( C0 >>8 )&255 ] ^ T2 [( C1 >>16 )&255 ] ^ T3 [( C2 >>24 )&255 ] ^ KW [r ++][3 ];
830-
830+ r0 = T [C0 &255 ] ^ T [(( C1 >>8 )&255 )| 256 ] ^ T [(( C2 >>16 )&255 )| 512 ] ^ T [(( C3 >>24 )&255 )| 768 ] ^ KW [r ][0 ];
831+ r1 = T [C1 &255 ] ^ T [(( C2 >>8 )&255 )| 256 ] ^ T [(( C3 >>16 )&255 )| 512 ] ^ T [(( C0 >>24 )&255 )| 768 ] ^ KW [r ][1 ];
832+ r2 = T [C2 &255 ] ^ T [(( C3 >>8 )&255 )| 256 ] ^ T [(( C0 >>16 )&255 )| 512 ] ^ T [(( C1 >>24 )&255 )| 768 ] ^ KW [r ][2 ];
833+ r3 = T [C3 &255 ] ^ T [(( C0 >>8 )&255 )| 256 ] ^ T [(( C1 >>16 )&255 )| 512 ] ^ T [(( C2 >>24 )&255 )| 768 ] ^ KW [r ++][3 ];
834+
831835 // the final round's table is a simple function of S so we don't use a whole other four tables for it
832836
833837 C0 = (S [r0 &255 ]&255 ) ^ ((S [(r1 >>8 )&255 ]&255 )<<8 ) ^ ((S [(r2 >>16 )&255 ]&255 )<<16 ) ^ (S [(r3 >>24 )&255 ]<<24 ) ^ KW [r ][0 ];
834838 C1 = (S [r1 &255 ]&255 ) ^ ((S [(r2 >>8 )&255 ]&255 )<<8 ) ^ ((S [(r3 >>16 )&255 ]&255 )<<16 ) ^ (S [(r0 >>24 )&255 ]<<24 ) ^ KW [r ][1 ];
835839 C2 = (S [r2 &255 ]&255 ) ^ ((S [(r3 >>8 )&255 ]&255 )<<8 ) ^ ((S [(r0 >>16 )&255 ]&255 )<<16 ) ^ (S [(r1 >>24 )&255 ]<<24 ) ^ KW [r ][2 ];
836840 C3 = (S [r3 &255 ]&255 ) ^ ((S [(r0 >>8 )&255 ]&255 )<<8 ) ^ ((S [(r1 >>16 )&255 ]&255 )<<16 ) ^ (S [(r2 >>24 )&255 ]<<24 ) ^ KW [r ][3 ];
837841
842+ this .C0 = C0 ;
843+ this .C1 = C1 ;
844+ this .C2 = C2 ;
845+ this .C3 = C3 ;
838846 }
839847
840848 private void decryptBlock (int [][] KW )
@@ -846,25 +854,25 @@ private void decryptBlock(int[][] KW)
846854 C2 ^= KW [ROUNDS ][2 ];
847855 C3 ^= KW [ROUNDS ][3 ];
848856
849- int r = ROUNDS -1 ;
850-
851- while (r >1 )
857+ int r = ROUNDS -1 ;
858+
859+ while (r >1 )
852860 {
853- r0 = Tinv0 [C0 &255 ] ^ Tinv1 [( C3 >>8 )&255 ] ^ Tinv2 [( C2 >>16 )&255 ] ^ Tinv3 [( C1 >>24 )&255 ] ^ KW [r ][0 ];
854- r1 = Tinv0 [C1 &255 ] ^ Tinv1 [( C0 >>8 )&255 ] ^ Tinv2 [( C3 >>16 )&255 ] ^ Tinv3 [( C2 >>24 )&255 ] ^ KW [r ][1 ];
855- r2 = Tinv0 [C2 &255 ] ^ Tinv1 [( C1 >>8 )&255 ] ^ Tinv2 [( C0 >>16 )&255 ] ^ Tinv3 [( C3 >>24 )&255 ] ^ KW [r ][2 ];
856- r3 = Tinv0 [C3 &255 ] ^ Tinv1 [( C2 >>8 )&255 ] ^ Tinv2 [( C1 >>16 )&255 ] ^ Tinv3 [( C0 >>24 )&255 ] ^ KW [r --][3 ];
857- C0 = Tinv0 [r0 &255 ] ^ Tinv1 [( r3 >>8 )&255 ] ^ Tinv2 [( r2 >>16 )&255 ] ^ Tinv3 [( r1 >>24 )&255 ] ^ KW [r ][0 ];
858- C1 = Tinv0 [r1 &255 ] ^ Tinv1 [( r0 >>8 )&255 ] ^ Tinv2 [( r3 >>16 )&255 ] ^ Tinv3 [( r2 >>24 )&255 ] ^ KW [r ][1 ];
859- C2 = Tinv0 [r2 &255 ] ^ Tinv1 [( r1 >>8 )&255 ] ^ Tinv2 [( r0 >>16 )&255 ] ^ Tinv3 [( r3 >>24 )&255 ] ^ KW [r ][2 ];
860- C3 = Tinv0 [r3 &255 ] ^ Tinv1 [( r2 >>8 )&255 ] ^ Tinv2 [( r1 >>16 )&255 ] ^ Tinv3 [( r0 >>24 )&255 ] ^ KW [r --][3 ];
861+ r0 = Tinv [C0 &255 ] ^ Tinv [(( C3 >>8 )&255 )| 256 ] ^ Tinv [(( C2 >>16 )&255 )| 512 ] ^ Tinv [(( C1 >>24 )&255 )| 768 ] ^ KW [r ][0 ];
862+ r1 = Tinv [C1 &255 ] ^ Tinv [(( C0 >>8 )&255 )| 256 ] ^ Tinv [(( C3 >>16 )&255 )| 512 ] ^ Tinv [(( C2 >>24 )&255 )| 768 ] ^ KW [r ][1 ];
863+ r2 = Tinv [C2 &255 ] ^ Tinv [(( C1 >>8 )&255 )| 256 ] ^ Tinv [(( C0 >>16 )&255 )| 512 ] ^ Tinv [(( C3 >>24 )&255 )| 768 ] ^ KW [r ][2 ];
864+ r3 = Tinv [C3 &255 ] ^ Tinv [(( C2 >>8 )&255 )| 256 ] ^ Tinv [(( C1 >>16 )&255 )| 512 ] ^ Tinv [(( C0 >>24 )&255 )| 768 ] ^ KW [r --][3 ];
865+ C0 = Tinv [r0 &255 ] ^ Tinv [(( r3 >>8 )&255 )| 256 ] ^ Tinv [(( r2 >>16 )&255 )| 512 ] ^ Tinv [(( r1 >>24 )&255 )| 768 ] ^ KW [r ][0 ];
866+ C1 = Tinv [r1 &255 ] ^ Tinv [(( r0 >>8 )&255 )| 256 ] ^ Tinv [(( r3 >>16 )&255 )| 512 ] ^ Tinv [(( r2 >>24 )&255 )| 768 ] ^ KW [r ][1 ];
867+ C2 = Tinv [r2 &255 ] ^ Tinv [(( r1 >>8 )&255 )| 256 ] ^ Tinv [(( r0 >>16 )&255 )| 512 ] ^ Tinv [(( r3 >>24 )&255 )| 768 ] ^ KW [r ][2 ];
868+ C3 = Tinv [r3 &255 ] ^ Tinv [(( r2 >>8 )&255 )| 256 ] ^ Tinv [(( r1 >>16 )&255 )| 512 ] ^ Tinv [(( r0 >>24 )&255 )| 768 ] ^ KW [r --][3 ];
861869 }
862870
863- r0 = Tinv0 [C0 &255 ] ^ Tinv1 [( C3 >>8 )&255 ] ^ Tinv2 [( C2 >>16 )&255 ] ^ Tinv3 [( C1 >>24 )&255 ] ^ KW [r ][0 ];
864- r1 = Tinv0 [C1 &255 ] ^ Tinv1 [( C0 >>8 )&255 ] ^ Tinv2 [( C3 >>16 )&255 ] ^ Tinv3 [( C2 >>24 )&255 ] ^ KW [r ][1 ];
865- r2 = Tinv0 [C2 &255 ] ^ Tinv1 [( C1 >>8 )&255 ] ^ Tinv2 [( C0 >>16 )&255 ] ^ Tinv3 [( C3 >>24 )&255 ] ^ KW [r ][2 ];
866- r3 = Tinv0 [C3 &255 ] ^ Tinv1 [( C2 >>8 )&255 ] ^ Tinv2 [( C1 >>16 )&255 ] ^ Tinv3 [( C0 >>24 )&255 ] ^ KW [r ][3 ];
867-
871+ r0 = Tinv [C0 &255 ] ^ Tinv [(( C3 >>8 )&255 )| 256 ] ^ Tinv [(( C2 >>16 )&255 )| 512 ] ^ Tinv [(( C1 >>24 )&255 )| 768 ] ^ KW [r ][0 ];
872+ r1 = Tinv [C1 &255 ] ^ Tinv [(( C0 >>8 )&255 )| 256 ] ^ Tinv [(( C3 >>16 )&255 )| 512 ] ^ Tinv [(( C2 >>24 )&255 )| 768 ] ^ KW [r ][1 ];
873+ r2 = Tinv [C2 &255 ] ^ Tinv [(( C1 >>8 )&255 )| 256 ] ^ Tinv [(( C0 >>16 )&255 )| 512 ] ^ Tinv [(( C3 >>24 )&255 )| 768 ] ^ KW [r ][2 ];
874+ r3 = Tinv [C3 &255 ] ^ Tinv [(( C2 >>8 )&255 )| 256 ] ^ Tinv [(( C1 >>16 )&255 )| 512 ] ^ Tinv [(( C0 >>24 )&255 )| 768 ] ^ KW [r ][3 ];
875+
868876 // the final round's table is a simple function of Si so we don't use a whole other four tables for it
869877
870878 C0 = (Si [r0 &255 ]&255 ) ^ ((Si [(r3 >>8 )&255 ]&255 )<<8 ) ^ ((Si [(r2 >>16 )&255 ]&255 )<<16 ) ^ (Si [(r1 >>24 )&255 ]<<24 ) ^ KW [0 ][0 ];
0 commit comments