From 39eecbf02aeb0451f02c3425eabb7f9559e58802 Mon Sep 17 00:00:00 2001 From: Goetz Lindenmaier Date: Mon, 18 Jul 2022 09:33:49 +0000 Subject: [PATCH] 8271078: jdk/incubator/vector/Float128VectorTests.java failed a subtest Backport-of: 25059b286d4a0026eb79942f96707d443ab9e65c --- src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp | 54 ++++++++++---------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp index b08b4862134..b7fc7a1f9aa 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp @@ -139,8 +139,8 @@ class RegisterSaver { }; public: - static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors); - static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors); + static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false); // Offsets into the register save area // Used by deoptimization when it is managing result register @@ -157,19 +157,19 @@ class RegisterSaver { static void restore_result_registers(MacroAssembler* masm); }; -OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) { int off = 0; int num_xmm_regs = XMMRegisterImpl::number_of_registers; if (UseAVX < 3) { num_xmm_regs = num_xmm_regs/2; } #if COMPILER2_OR_JVMCI - if (save_vectors && UseAVX == 0) { - save_vectors = false; // vectors larger than 16 byte long are supported only with AVX + if (save_wide_vectors && UseAVX == 0) { + save_wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX } - assert(!save_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); + assert(!save_wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); #else - save_vectors = false; // vectors are generated only by C2 and JVMCI + save_wide_vectors = false; // vectors are generated only by C2 and JVMCI #endif // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated @@ -190,7 +190,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ __ push_CPU_state(); // Push a multiple of 16 bytes // push cpu state handles this on EVEX enabled targets - if (save_vectors) { + if (save_wide_vectors) { // Save upper half of YMM registers(0..15) int base_addr = XSAVE_AREA_YMM_BEGIN; for (int n = 0; n < 16; n++) { @@ -212,11 +212,12 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ } } else { if (VM_Version::supports_evex()) { - // Save upper bank of ZMM registers(16..31) for double/float usage + // Save upper bank of XMM registers(16..31) for scalar or 16-byte vector usage int base_addr = XSAVE_AREA_UPPERBANK; off = 0; + int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; for (int n = 16; n < num_xmm_regs; n++) { - __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n)); + __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); } } } @@ -273,7 +274,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ } #if COMPILER2_OR_JVMCI - if (save_vectors) { + if (save_wide_vectors) { // Save upper half of YMM registers(0..15) off = ymm0_off; delta = ymm1_off - ymm0_off; @@ -336,7 +337,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ return map; } -void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) { int num_xmm_regs = XMMRegisterImpl::number_of_registers; if (UseAVX < 3) { num_xmm_regs = num_xmm_regs/2; @@ -347,18 +348,18 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve } #if COMPILER2_OR_JVMCI - if (restore_vectors) { + if (restore_wide_vectors) { assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); } #else - assert(!restore_vectors, "vectors are generated only by C2"); + assert(!restore_wide_vectors, "vectors are generated only by C2"); #endif __ vzeroupper(); // On EVEX enabled targets everything is handled in pop fpu state - if (restore_vectors) { + if (restore_wide_vectors) { // Restore upper half of YMM registers (0..15) int base_addr = XSAVE_AREA_YMM_BEGIN; for (int n = 0; n < 16; n++) { @@ -380,11 +381,12 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve } } else { if (VM_Version::supports_evex()) { - // Restore upper bank of ZMM registers(16..31) for double/float usage + // Restore upper bank of XMM registers(16..31) for scalar or 16-byte vector usage int base_addr = XSAVE_AREA_UPPERBANK; int off = 0; + int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; for (int n = 16; n < num_xmm_regs; n++) { - __ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64))); + __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); } } } @@ -2863,7 +2865,7 @@ void SharedRuntime::generate_deopt_blob() { // Prolog for non exception case! // Save everything in sight. - map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); // Normal deoptimization. Save exec mode for unpack_frames. __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved @@ -2881,7 +2883,7 @@ void SharedRuntime::generate_deopt_blob() { // return address is the pc describes what bci to do re-execute at // No need to update map as each call to save_live_registers will produce identical oopmap - (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved __ jmp(cont); @@ -2900,7 +2902,7 @@ void SharedRuntime::generate_deopt_blob() { uncommon_trap_offset = __ pc() - start; // Save everything in sight. - RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); + RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); // fetch_unroll_info needs to call last_java_frame() __ set_last_Java_frame(noreg, noreg, NULL); @@ -2947,7 +2949,7 @@ void SharedRuntime::generate_deopt_blob() { __ push(0); // Save everything in sight. - map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); // Now it is safe to overwrite any register @@ -3383,7 +3385,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t address call_pc = NULL; int frame_size_in_words; bool cause_return = (poll_type == POLL_AT_RETURN); - bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); + bool save_wide_vectors = (poll_type == POLL_AT_VECTOR_LOOP); if (UseRTMLocking) { // Abort RTM transaction before calling runtime @@ -3398,7 +3400,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t } // Save registers, fpu state, and flags - map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors); + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_wide_vectors); // The following is basically a call_VM. However, we need the precise // address of the call in order to generate an oopmap. Hence, we do all the @@ -3437,7 +3439,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t // Exception pending - RegisterSaver::restore_live_registers(masm, save_vectors); + RegisterSaver::restore_live_registers(masm, save_wide_vectors); __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); @@ -3505,7 +3507,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t __ bind(no_adjust); // Normal exit, restore registers and exit. - RegisterSaver::restore_live_registers(masm, save_vectors); + RegisterSaver::restore_live_registers(masm, save_wide_vectors); __ ret(0); #ifdef ASSERT @@ -3545,7 +3547,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha int start = __ offset(); // No need to save vector registers since they are caller-saved anyway. - map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false); + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ false); int frame_complete = __ offset();