@@ -7030,7 +7030,7 @@ void MacroAssembler::fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, X
70307030
70317031// Helper function for AVX 512 CRC32
70327032// Compute CRC32 for < 256B buffers
7033- void MacroAssembler::kernel_crc32_avx512_256B (Register crc, Register buf, Register len, Register key , Register pos,
7033+ void MacroAssembler::kernel_crc32_avx512_256B (Register crc, Register buf, Register len, Register table , Register pos,
70347034 Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop,
70357035 Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup) {
70367036
@@ -7043,7 +7043,7 @@ void MacroAssembler::kernel_crc32_avx512_256B(Register crc, Register buf, Regist
70437043 jcc (Assembler::less, L_less_than_32);
70447044
70457045 // if there is, load the constants
7046- movdqu (xmm10, Address (key , 1 * 16 )); // rk1 and rk2 in xmm10
7046+ movdqu (xmm10, Address (table , 1 * 16 )); // rk1 and rk2 in xmm10
70477047 movdl (xmm0, crc); // get the initial crc value
70487048 movdqu (xmm7, Address (buf, pos, Address::times_1, 0 * 16 )); // load the plaintext
70497049 pxor (xmm7, xmm0);
@@ -7070,7 +7070,7 @@ void MacroAssembler::kernel_crc32_avx512_256B(Register crc, Register buf, Regist
70707070 pxor (xmm7, xmm0); // xor the initial crc value
70717071 addl (pos, 16 );
70727072 subl (len, 16 );
7073- movdqu (xmm10, Address (key , 1 * 16 )); // rk1 and rk2 in xmm10
7073+ movdqu (xmm10, Address (table , 1 * 16 )); // rk1 and rk2 in xmm10
70747074 jmp (L_get_last_two_xmms);
70757075
70767076 bind (L_less_than_16_left);
@@ -7190,12 +7190,17 @@ void MacroAssembler::kernel_crc32_avx512_256B(Register crc, Register buf, Regist
71907190* param crc register containing existing CRC (32-bit)
71917191* param buf register pointing to input byte buffer (byte*)
71927192* param len register containing number of bytes
7193+ * param table address of crc or crc32c table
71937194* param tmp1 scratch register
71947195* param tmp2 scratch register
71957196* return rax result register
7197+ *
7198+ * This routine is identical for crc32c with the exception of the precomputed constant
7199+ * table which will be passed as the table argument. The calculation steps are
7200+ * the same for both variants.
71967201*/
7197- void MacroAssembler::kernel_crc32_avx512 (Register crc, Register buf, Register len, Register key , Register tmp1, Register tmp2) {
7198- assert_different_registers (crc, buf, len, key , tmp1, tmp2, rax);
7202+ void MacroAssembler::kernel_crc32_avx512 (Register crc, Register buf, Register len, Register table , Register tmp1, Register tmp2) {
7203+ assert_different_registers (crc, buf, len, table , tmp1, tmp2, rax, r12 );
71997204
72007205 Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned;
72017206 Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop;
@@ -7210,7 +7215,6 @@ void MacroAssembler::kernel_crc32_avx512(Register crc, Register buf, Register le
72107215 // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
72117216 // context for the registers used, where all instructions below are using 128-bit mode
72127217 // On EVEX without VL and BW, these instructions will all be AVX.
7213- lea (key, ExternalAddress (StubRoutines::x86::crc_table_avx512_addr ()));
72147218 notl (crc);
72157219 movl (pos, 0 );
72167220
@@ -7225,15 +7229,15 @@ void MacroAssembler::kernel_crc32_avx512(Register crc, Register buf, Register le
72257229 evmovdquq (xmm0, Address (buf, pos, Address::times_1, 0 * 64 ), Assembler::AVX_512bit);
72267230 evmovdquq (xmm4, Address (buf, pos, Address::times_1, 1 * 64 ), Assembler::AVX_512bit);
72277231 evpxorq (xmm0, xmm0, xmm10, Assembler::AVX_512bit);
7228- evbroadcasti32x4 (xmm10, Address (key , 2 * 16 ), Assembler::AVX_512bit); // zmm10 has rk3 and rk4
7232+ evbroadcasti32x4 (xmm10, Address (table , 2 * 16 ), Assembler::AVX_512bit); // zmm10 has rk3 and rk4
72297233
72307234 subl (len, 256 );
72317235 cmpl (len, 256 );
72327236 jcc (Assembler::less, L_fold_128_B_loop);
72337237
72347238 evmovdquq (xmm7, Address (buf, pos, Address::times_1, 2 * 64 ), Assembler::AVX_512bit);
72357239 evmovdquq (xmm8, Address (buf, pos, Address::times_1, 3 * 64 ), Assembler::AVX_512bit);
7236- evbroadcasti32x4 (xmm16, Address (key , 0 * 16 ), Assembler::AVX_512bit); // zmm16 has rk-1 and rk-2
7240+ evbroadcasti32x4 (xmm16, Address (table , 0 * 16 ), Assembler::AVX_512bit); // zmm16 has rk-1 and rk-2
72377241 subl (len, 256 );
72387242
72397243 bind (L_fold_256_B_loop);
@@ -7279,8 +7283,8 @@ void MacroAssembler::kernel_crc32_avx512(Register crc, Register buf, Register le
72797283 // at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
72807284 // the 128B of folded data is in 8 of the xmm registers : xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
72817285 bind (L_fold_128_B_register);
7282- evmovdquq (xmm16, Address (key , 5 * 16 ), Assembler::AVX_512bit); // multiply by rk9-rk16
7283- evmovdquq (xmm11, Address (key , 9 * 16 ), Assembler::AVX_512bit); // multiply by rk17-rk20, rk1,rk2, 0,0
7286+ evmovdquq (xmm16, Address (table , 5 * 16 ), Assembler::AVX_512bit); // multiply by rk9-rk16
7287+ evmovdquq (xmm11, Address (table , 9 * 16 ), Assembler::AVX_512bit); // multiply by rk17-rk20, rk1,rk2, 0,0
72847288 evpclmulqdq (xmm1, xmm0, xmm16, 0x01 , Assembler::AVX_512bit);
72857289 evpclmulqdq (xmm2, xmm0, xmm16, 0x10 , Assembler::AVX_512bit);
72867290 // save last that has no multiplicand
@@ -7289,7 +7293,7 @@ void MacroAssembler::kernel_crc32_avx512(Register crc, Register buf, Register le
72897293 evpclmulqdq (xmm5, xmm4, xmm11, 0x01 , Assembler::AVX_512bit);
72907294 evpclmulqdq (xmm6, xmm4, xmm11, 0x10 , Assembler::AVX_512bit);
72917295 // Needed later in reduction loop
7292- movdqu (xmm10, Address (key , 1 * 16 ));
7296+ movdqu (xmm10, Address (table , 1 * 16 ));
72937297 vpternlogq (xmm1, 0x96 , xmm2, xmm5, Assembler::AVX_512bit); // xor ABC
72947298 vpternlogq (xmm1, 0x96 , xmm6, xmm7, Assembler::AVX_512bit); // xor ABC
72957299
@@ -7305,7 +7309,7 @@ void MacroAssembler::kernel_crc32_avx512(Register crc, Register buf, Register le
73057309 jcc (Assembler::less, L_final_reduction_for_128);
73067310
73077311 bind (L_16B_reduction_loop);
7308- vpclmulqdq (xmm8, xmm7, xmm10, 0x1 );
7312+ vpclmulqdq (xmm8, xmm7, xmm10, 0x01 );
73097313 vpclmulqdq (xmm7, xmm7, xmm10, 0x10 );
73107314 vpxor (xmm7, xmm7, xmm8, Assembler::AVX_128bit);
73117315 movdqu (xmm0, Address (buf, pos, Address::times_1, 0 * 16 ));
@@ -7336,14 +7340,14 @@ void MacroAssembler::kernel_crc32_avx512(Register crc, Register buf, Register le
73367340 vpshufb (xmm2, xmm2, xmm0, Assembler::AVX_128bit);
73377341
73387342 blendvpb (xmm2, xmm2, xmm1, xmm0, Assembler::AVX_128bit);
7339- vpclmulqdq (xmm8, xmm7, xmm10, 0x1 );
7343+ vpclmulqdq (xmm8, xmm7, xmm10, 0x01 );
73407344 vpclmulqdq (xmm7, xmm7, xmm10, 0x10 );
73417345 vpxor (xmm7, xmm7, xmm8, Assembler::AVX_128bit);
73427346 vpxor (xmm7, xmm7, xmm2, Assembler::AVX_128bit);
73437347
73447348 bind (L_128_done);
73457349 // compute crc of a 128-bit value
7346- movdqu (xmm10, Address (key , 3 * 16 ));
7350+ movdqu (xmm10, Address (table , 3 * 16 ));
73477351 movdqu (xmm0, xmm7);
73487352
73497353 // 64b fold
@@ -7359,14 +7363,14 @@ void MacroAssembler::kernel_crc32_avx512(Register crc, Register buf, Register le
73597363 jmp (L_barrett);
73607364
73617365 bind (L_less_than_256);
7362- kernel_crc32_avx512_256B (crc, buf, len, key , pos, tmp1, tmp2, L_barrett, L_16B_reduction_loop, L_get_last_two_xmms, L_128_done, L_cleanup);
7366+ kernel_crc32_avx512_256B (crc, buf, len, table , pos, tmp1, tmp2, L_barrett, L_16B_reduction_loop, L_get_last_two_xmms, L_128_done, L_cleanup);
73637367
73647368 // barrett reduction
73657369 bind (L_barrett);
73667370 vpand (xmm7, xmm7, ExternalAddress (StubRoutines::x86::crc_by128_masks_avx512_addr () + 1 * 16 ), Assembler::AVX_128bit, tmp2);
73677371 movdqu (xmm1, xmm7);
73687372 movdqu (xmm2, xmm7);
7369- movdqu (xmm10, Address (key , 4 * 16 ));
7373+ movdqu (xmm10, Address (table , 4 * 16 ));
73707374
73717375 pclmulqdq (xmm7, xmm10, 0x0 );
73727376 pxor (xmm7, xmm2);
0 commit comments