diff --git a/.github/scripts/gen-build-failure-report.sh b/.github/scripts/gen-build-failure-report.sh index fd3215fc7fe2d..c8336a5f7a1f9 100644 --- a/.github/scripts/gen-build-failure-report.sh +++ b/.github/scripts/gen-build-failure-report.sh @@ -24,12 +24,19 @@ # questions. # +# Import common utils +. report-utils.sh + GITHUB_STEP_SUMMARY="$1" BUILD_DIR="$(ls -d build/*)" # Send signal to the do-build action that we failed touch "$BUILD_DIR/build-failure" +# Collect hs_errs for build-time crashes, e.g. javac, jmod, jlink, CDS. +# These usually land in make/ +hs_err_files=$(ls make/hs_err*.log 2> /dev/null || true) + ( echo '### :boom: Build failure summary' echo '' @@ -46,6 +53,20 @@ touch "$BUILD_DIR/build-failure" echo '' echo '' + for hs_err in $hs_err_files; do + echo "
View HotSpot error log: "$hs_err"" + echo '' + echo '```' + echo "$hs_err:" + echo '' + cat "$hs_err" + echo '```' + echo '
' + echo '' + done + echo '' echo ':arrow_right: To see the entire test log, click the job in the list to the left. To download logs, see the `failure-logs` [artifact above](#artifacts).' ) >> $GITHUB_STEP_SUMMARY + +truncate_summary diff --git a/.github/scripts/gen-test-results.sh b/.github/scripts/gen-test-results.sh index 9e85eef4dc08d..6c6cbaa3740f6 100644 --- a/.github/scripts/gen-test-results.sh +++ b/.github/scripts/gen-test-results.sh @@ -24,6 +24,9 @@ # questions. # +# Import common utils +. report-utils.sh + GITHUB_STEP_SUMMARY="$1" test_suite_name=$(cat build/run-test-prebuilt/test-support/test-last-ids.txt) @@ -89,18 +92,6 @@ for test in $failures $errors; do fi done >> $GITHUB_STEP_SUMMARY -# With many failures, the summary can easily exceed 1024 kB, the limit set by Github -# Trim it down if so. -summary_size=$(wc -c < $GITHUB_STEP_SUMMARY) -if [[ $summary_size -gt 1000000 ]]; then - # Trim to below 1024 kB, and cut off after the last detail group - head -c 1000000 $GITHUB_STEP_SUMMARY | tac | sed -n -e '/<\/details>/,$ p' | tac > $GITHUB_STEP_SUMMARY.tmp - mv $GITHUB_STEP_SUMMARY.tmp $GITHUB_STEP_SUMMARY - ( - echo '' - echo ':x: **WARNING: Summary is too large and has been truncated.**' - echo '' - ) >> $GITHUB_STEP_SUMMARY -fi - echo ':arrow_right: To see the entire test log, click the job in the list to the left.' >> $GITHUB_STEP_SUMMARY + +truncate_summary diff --git a/.github/scripts/report-utils.sh b/.github/scripts/report-utils.sh new file mode 100644 index 0000000000000..da5b6c04b3cbe --- /dev/null +++ b/.github/scripts/report-utils.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# +# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. Oracle designates this +# particular file as subject to the "Classpath" exception as provided +# by Oracle in the LICENSE file that accompanied this code. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +function truncate_summary() { + # With large hs_errs, the summary can easily exceed 1024 kB, the limit set by Github + # Trim it down if so. + summary_size=$(wc -c < $GITHUB_STEP_SUMMARY) + if [[ $summary_size -gt 1000000 ]]; then + # Trim to below 1024 kB, and cut off after the last detail group + head -c 1000000 $GITHUB_STEP_SUMMARY | tac | sed -n -e '/<\/details>/,$ p' | tac > $GITHUB_STEP_SUMMARY.tmp + mv $GITHUB_STEP_SUMMARY.tmp $GITHUB_STEP_SUMMARY + ( + echo '' + echo ':x: **WARNING: Summary is too large and has been truncated.**' + echo '' + ) >> $GITHUB_STEP_SUMMARY + fi +} diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000000..f4c5e7e67cb46 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,3 @@ +# JDK Vulnerabilities + +Please follow the process outlined in the [OpenJDK Vulnerability Policy](https://openjdk.org/groups/vulnerability/report) to disclose vulnerabilities in the JDK. diff --git a/doc/building.html b/doc/building.html index 707531553124b..c91d876246cde 100644 --- a/doc/building.html +++ b/doc/building.html @@ -614,10 +614,9 @@

clang

--with-toolchain-type=clang.

Apple Xcode

The oldest supported version of Xcode is 13.0.

-

You will need the Xcode command line developer tools to be able to -build the JDK. (Actually, only the command line tools are -needed, not the IDE.) The simplest way to install these is to run:

-
xcode-select --install
+

You will need to download Xcode either from the App Store or specific +versions can be easily located via the Xcode Releases website.

When updating Xcode, it is advisable to keep an older version for building the JDK. To use a specific version of Xcode you have multiple options:

diff --git a/doc/building.md b/doc/building.md index 51ac0cad7d98b..47ad9e7c72b4c 100644 --- a/doc/building.md +++ b/doc/building.md @@ -422,13 +422,9 @@ To use clang instead of gcc on Linux, use `--with-toolchain-type=clang`. The oldest supported version of Xcode is 13.0. -You will need the Xcode command line developer tools to be able to build the -JDK. (Actually, *only* the command line tools are needed, not the IDE.) The -simplest way to install these is to run: - -``` -xcode-select --install -``` +You will need to download Xcode either from the App Store or specific versions +can be easily located via the [Xcode Releases](https://xcodereleases.com) +website. When updating Xcode, it is advisable to keep an older version for building the JDK. To use a specific version of Xcode you have multiple options: diff --git a/make/conf/github-actions.conf b/make/conf/github-actions.conf index eca6c05033d88..a6b383daa8fd4 100644 --- a/make/conf/github-actions.conf +++ b/make/conf/github-actions.conf @@ -29,21 +29,21 @@ GTEST_VERSION=1.14.0 JTREG_VERSION=7.4+1 LINUX_X64_BOOT_JDK_EXT=tar.gz -LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk22.0.2/c9ecb94cd31b495da20a27d4581645e8/9/GPL/openjdk-22.0.2_linux-x64_bin.tar.gz -LINUX_X64_BOOT_JDK_SHA256=41536f115668308ecf4eba92aaf6acaeb0936225828b741efd83b6173ba82963 +LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk23/3c5b90190c68498b986a97f276efd28a/37/GPL/openjdk-23_linux-x64_bin.tar.gz +LINUX_X64_BOOT_JDK_SHA256=08fea92724127c6fa0f2e5ea0b07ff4951ccb1e2f22db3c21eebbd7347152a67 ALPINE_LINUX_X64_BOOT_JDK_EXT=tar.gz -ALPINE_LINUX_X64_BOOT_JDK_URL=https://github.com/adoptium/temurin22-binaries/releases/download/jdk-22.0.2%2B9/OpenJDK22U-jdk_x64_alpine-linux_hotspot_22.0.2_9.tar.gz -ALPINE_LINUX_X64_BOOT_JDK_SHA256=49f73414824b1a7c268a611225fa4d7ce5e25600201e0f1cd59f94d1040b5264 +ALPINE_LINUX_X64_BOOT_JDK_URL=https://github.com/adoptium/temurin23-binaries/releases/download/jdk-23%2B37/OpenJDK23U-jdk_x64_alpine-linux_hotspot_23_37.tar.gz +ALPINE_LINUX_X64_BOOT_JDK_SHA256=bff4c78f30d8d173e622bf2f40c36113df47337fc6d1ee5105ed2459841165aa MACOS_AARCH64_BOOT_JDK_EXT=tar.gz -MACOS_AARCH64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk22.0.2/c9ecb94cd31b495da20a27d4581645e8/9/GPL/openjdk-22.0.2_macos-aarch64_bin.tar.gz -MACOS_AARCH64_BOOT_JDK_SHA256=3dab98730234e1a87aec14bcb8171d2cae101e96ff4eed1dab96abbb08e843fd +MACOS_AARCH64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk23/3c5b90190c68498b986a97f276efd28a/37/GPL/openjdk-23_macos-aarch64_bin.tar.gz +MACOS_AARCH64_BOOT_JDK_SHA256=9527bf080a74ae6dca51df413aa826f0c011c6048885e4c8ad112172be8815f3 MACOS_X64_BOOT_JDK_EXT=tar.gz -MACOS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk22.0.2/c9ecb94cd31b495da20a27d4581645e8/9/GPL/openjdk-22.0.2_macos-x64_bin.tar.gz -MACOS_X64_BOOT_JDK_SHA256=e8b3ec7a7077711223d31156e771f11723cd7af31c2017f1bd2eda20855940fb +MACOS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk23/3c5b90190c68498b986a97f276efd28a/37/GPL/openjdk-23_macos-x64_bin.tar.gz +MACOS_X64_BOOT_JDK_SHA256=5c3a909fd2079d0e376dd43c85c4f7d02d08914866f196480bd47784b2a0121e WINDOWS_X64_BOOT_JDK_EXT=zip -WINDOWS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk22.0.2/c9ecb94cd31b495da20a27d4581645e8/9/GPL/openjdk-22.0.2_windows-x64_bin.zip -WINDOWS_X64_BOOT_JDK_SHA256=f2a9b9ab944e71a64637fcdc6b13a1188cf02d4eb9ecf71dc927e98b3e45f5dc +WINDOWS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk23/3c5b90190c68498b986a97f276efd28a/37/GPL/openjdk-23_windows-x64_bin.zip +WINDOWS_X64_BOOT_JDK_SHA256=cba5013874ba50cae543c86fe6423453816c77281e2751a8a9a633d966f1dc04 diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js index 30c45d4cde161..a85c20b2098ea 100644 --- a/make/conf/jib-profiles.js +++ b/make/conf/jib-profiles.js @@ -390,8 +390,8 @@ var getJibProfilesCommon = function (input, data) { }; }; - common.boot_jdk_version = "22"; - common.boot_jdk_build_number = "36"; + common.boot_jdk_version = "23"; + common.boot_jdk_build_number = "37"; common.boot_jdk_home = input.get("boot_jdk", "install_path") + "/jdk-" + common.boot_jdk_version + (input.build_os == "macosx" ? ".jdk/Contents/Home" : ""); diff --git a/make/conf/version-numbers.conf b/make/conf/version-numbers.conf index 1d47c2cddd001..055f9ca886618 100644 --- a/make/conf/version-numbers.conf +++ b/make/conf/version-numbers.conf @@ -37,6 +37,6 @@ DEFAULT_VERSION_DATE=2025-03-18 DEFAULT_VERSION_CLASSFILE_MAJOR=68 # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`" DEFAULT_VERSION_CLASSFILE_MINOR=0 DEFAULT_VERSION_DOCS_API_SINCE=11 -DEFAULT_ACCEPTABLE_BOOT_VERSIONS="22 23 24" +DEFAULT_ACCEPTABLE_BOOT_VERSIONS="23 24" DEFAULT_JDK_SOURCE_TARGET_VERSION=24 DEFAULT_PROMOTED_VERSION_PRE=ea diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk index 8dada3cec0a1d..ddb2c3e33e513 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk @@ -200,6 +200,13 @@ ifeq ($(call check-jvm-feature, compiler2), true) ))) endif + ifeq ($(call check-jvm-feature, g1gc), true) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/g1/g1_$(HOTSPOT_TARGET_CPU).ad \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/g1/g1_$(HOTSPOT_TARGET_CPU_ARCH).ad \ + ))) + endif + SINGLE_AD_SRCFILE := $(ADLC_SUPPORT_DIR)/all-ad-src.ad INSERT_FILENAME_AWK_SCRIPT := \ diff --git a/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java b/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java index 630d3a390d18a..426d0bb10ede1 100644 --- a/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java +++ b/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -273,7 +273,7 @@ private void outputFile(Path dstFile, String version, // link version-region-rules out.writeShort(builtZones.size()); for (Map.Entry entry : builtZones.entrySet()) { - int regionIndex = Arrays.binarySearch(regionArray, entry.getKey()); + int regionIndex = findRegionIndex(regionArray, entry.getKey()); int rulesIndex = rulesList.indexOf(entry.getValue()); out.writeShort(regionIndex); out.writeShort(rulesIndex); @@ -281,8 +281,8 @@ private void outputFile(Path dstFile, String version, // alias-region out.writeShort(links.size()); for (Map.Entry entry : links.entrySet()) { - int aliasIndex = Arrays.binarySearch(regionArray, entry.getKey()); - int regionIndex = Arrays.binarySearch(regionArray, entry.getValue()); + int aliasIndex = findRegionIndex(regionArray, entry.getKey()); + int regionIndex = findRegionIndex(regionArray, entry.getValue()); out.writeShort(aliasIndex); out.writeShort(regionIndex); } @@ -294,6 +294,14 @@ private void outputFile(Path dstFile, String version, } } + private static int findRegionIndex(String[] regionArray, String region) { + int index = Arrays.binarySearch(regionArray, region); + if (index < 0) { + throw new IllegalArgumentException("Unknown region: " + region); + } + return index; + } + /** Whether to output verbose messages. */ private boolean verbose; diff --git a/make/modules/jdk.incubator.vector/Lib.gmk b/make/modules/jdk.incubator.vector/Lib.gmk index 0620549f05cd7..bf6ace6f97f7c 100644 --- a/make/modules/jdk.incubator.vector/Lib.gmk +++ b/make/modules/jdk.incubator.vector/Lib.gmk @@ -37,3 +37,21 @@ ifeq ($(call isTargetOs, linux windows)+$(call isTargetCpu, x86_64)+$(INCLUDE_CO TARGETS += $(BUILD_LIBJSVML) endif + +################################################################################ +## Build libsleef +################################################################################ + +ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2), true+true+true) + $(eval $(call SetupJdkLibrary, BUILD_LIBSLEEF, \ + NAME := sleef, \ + OPTIMIZATION := HIGH, \ + SRC := libsleef/lib, \ + EXTRA_SRC := libsleef/generated, \ + DISABLED_WARNINGS_gcc := unused-function sign-compare tautological-compare ignored-qualifiers, \ + DISABLED_WARNINGS_clang := unused-function sign-compare tautological-compare ignored-qualifiers, \ + CFLAGS := -march=rv64gcv, \ + )) + + TARGETS += $(BUILD_LIBSLEEF) +endif diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 39eae43a287e7..c7cae54d14c0a 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -2620,7 +2620,8 @@ static bool is_vector_bitwise_not_pattern(Node* n, Node* m) { bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { if (is_vshift_con_pattern(n, m) || is_vector_bitwise_not_pattern(n, m) || - is_valid_sve_arith_imm_pattern(n, m)) { + is_valid_sve_arith_imm_pattern(n, m) || + is_encode_and_store_pattern(n, m)) { mstack.push(m, Visit); return true; } @@ -2720,7 +2721,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, { Address addr = mem2address(opcode, base, index, scale, disp); if (addr.getMode() == Address::base_plus_offset) { - // Fix up any out-of-range offsets. + /* Fix up any out-of-range offsets. */ assert_different_registers(rscratch1, base); assert_different_registers(rscratch1, reg); addr = __ legitimize_address(addr, size_in_memory, rscratch1); @@ -2761,11 +2762,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, int opcode, Register base, int index, int size, int disp) { if (index == -1) { - // Fix up any out-of-range offsets. - assert_different_registers(rscratch1, base); - Address addr = Address(base, disp); - addr = __ legitimize_address(addr, (1 << T), rscratch1); - (masm->*insn)(reg, T, addr); + (masm->*insn)(reg, T, Address(base, disp)); } else { assert(disp == 0, "unsupported address mode"); (masm->*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size))); @@ -2820,7 +2817,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{ + enc_class aarch64_enc_ldrsbw(iRegI dst, memory1 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1); @@ -2828,7 +2825,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{ + enc_class aarch64_enc_ldrsb(iRegI dst, memory1 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldrsb, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1); @@ -2836,7 +2833,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{ + enc_class aarch64_enc_ldrb(iRegI dst, memory1 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldrb, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1); @@ -2844,7 +2841,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{ + enc_class aarch64_enc_ldrb(iRegL dst, memory1 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldrb, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1); @@ -2852,7 +2849,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{ + enc_class aarch64_enc_ldrshw(iRegI dst, memory2 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldrshw, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2); @@ -2860,7 +2857,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{ + enc_class aarch64_enc_ldrsh(iRegI dst, memory2 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldrsh, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2); @@ -2868,7 +2865,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{ + enc_class aarch64_enc_ldrh(iRegI dst, memory2 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldrh, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2); @@ -2876,7 +2873,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{ + enc_class aarch64_enc_ldrh(iRegL dst, memory2 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldrh, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2); @@ -2884,7 +2881,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{ + enc_class aarch64_enc_ldrw(iRegI dst, memory4 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldrw, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4); @@ -2892,7 +2889,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{ + enc_class aarch64_enc_ldrw(iRegL dst, memory4 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldrw, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4); @@ -2900,7 +2897,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{ + enc_class aarch64_enc_ldrsw(iRegL dst, memory4 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldrsw, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4); @@ -2908,7 +2905,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{ + enc_class aarch64_enc_ldr(iRegL dst, memory8 mem) %{ Register dst_reg = as_Register($dst$$reg); loadStore(masm, &MacroAssembler::ldr, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8); @@ -2916,7 +2913,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{ + enc_class aarch64_enc_ldrs(vRegF dst, memory4 mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); loadStore(masm, &MacroAssembler::ldrs, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4); @@ -2924,7 +2921,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{ + enc_class aarch64_enc_ldrd(vRegD dst, memory8 mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); loadStore(masm, &MacroAssembler::ldrd, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8); @@ -2932,7 +2929,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_strb(iRegI src, memory mem) %{ + enc_class aarch64_enc_strb(iRegI src, memory1 mem) %{ Register src_reg = as_Register($src$$reg); loadStore(masm, &MacroAssembler::strb, src_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1); @@ -2940,14 +2937,14 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_strb0(memory mem) %{ + enc_class aarch64_enc_strb0(memory1 mem) %{ loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1); %} // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_strh(iRegI src, memory mem) %{ + enc_class aarch64_enc_strh(iRegI src, memory2 mem) %{ Register src_reg = as_Register($src$$reg); loadStore(masm, &MacroAssembler::strh, src_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2); @@ -2955,14 +2952,14 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_strh0(memory mem) %{ + enc_class aarch64_enc_strh0(memory2 mem) %{ loadStore(masm, &MacroAssembler::strh, zr, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2); %} // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_strw(iRegI src, memory mem) %{ + enc_class aarch64_enc_strw(iRegI src, memory4 mem) %{ Register src_reg = as_Register($src$$reg); loadStore(masm, &MacroAssembler::strw, src_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4); @@ -2970,14 +2967,14 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_strw0(memory mem) %{ + enc_class aarch64_enc_strw0(memory4 mem) %{ loadStore(masm, &MacroAssembler::strw, zr, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4); %} // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_str(iRegL src, memory mem) %{ + enc_class aarch64_enc_str(iRegL src, memory8 mem) %{ Register src_reg = as_Register($src$$reg); // we sometimes get asked to store the stack pointer into the // current thread -- we cannot do that directly on AArch64 @@ -2992,14 +2989,14 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_str0(memory mem) %{ + enc_class aarch64_enc_str0(memory8 mem) %{ loadStore(masm, &MacroAssembler::str, zr, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8); %} // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_strs(vRegF src, memory mem) %{ + enc_class aarch64_enc_strs(vRegF src, memory4 mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); loadStore(masm, &MacroAssembler::strs, src_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4); @@ -3007,7 +3004,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_strd(vRegD src, memory mem) %{ + enc_class aarch64_enc_strd(vRegD src, memory8 mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); loadStore(masm, &MacroAssembler::strd, src_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8); @@ -3015,7 +3012,7 @@ encode %{ // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_strb0_ordered(memory mem) %{ + enc_class aarch64_enc_strb0_ordered(memory4 mem) %{ __ membar(Assembler::StoreStore); loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1); @@ -3217,7 +3214,7 @@ encode %{ // synchronized read/update encodings - enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{ + enc_class aarch64_enc_ldaxr(iRegL dst, memory8 mem) %{ Register dst_reg = as_Register($dst$$reg); Register base = as_Register($mem$$base); int index = $mem$$index; @@ -3245,7 +3242,7 @@ encode %{ } %} - enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{ + enc_class aarch64_enc_stlxr(iRegLNoSp src, memory8 mem) %{ Register src_reg = as_Register($src$$reg); Register base = as_Register($mem$$base); int index = $mem$$index; @@ -4173,10 +4170,60 @@ operand immIU7() interface(CONST_INTER); %} -// Offset for immediate loads and stores +// Offset for scaled or unscaled immediate loads and stores operand immIOffset() %{ - predicate(n->get_int() >= -256 && n->get_int() <= 65520); + predicate(Address::offset_ok_for_immed(n->get_int(), 0)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immIOffset1() +%{ + predicate(Address::offset_ok_for_immed(n->get_int(), 0)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immIOffset2() +%{ + predicate(Address::offset_ok_for_immed(n->get_int(), 1)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immIOffset4() +%{ + predicate(Address::offset_ok_for_immed(n->get_int(), 2)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immIOffset8() +%{ + predicate(Address::offset_ok_for_immed(n->get_int(), 3)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immIOffset16() +%{ + predicate(Address::offset_ok_for_immed(n->get_int(), 4)); match(ConI); op_cost(0); @@ -4194,6 +4241,56 @@ operand immLOffset() interface(CONST_INTER); %} +operand immLoffset1() +%{ + predicate(Address::offset_ok_for_immed(n->get_long(), 0)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLoffset2() +%{ + predicate(Address::offset_ok_for_immed(n->get_long(), 1)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLoffset4() +%{ + predicate(Address::offset_ok_for_immed(n->get_long(), 2)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLoffset8() +%{ + predicate(Address::offset_ok_for_immed(n->get_long(), 3)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLoffset16() +%{ + predicate(Address::offset_ok_for_immed(n->get_long(), 4)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // 5 bit signed long integer operand immL5() %{ @@ -5106,7 +5203,21 @@ operand indIndex(iRegP reg, iRegL lreg) %} %} -operand indOffI(iRegP reg, immIOffset off) +operand indOffI1(iRegP reg, immIOffset1 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffI2(iRegP reg, immIOffset2 off) %{ constraint(ALLOC_IN_RC(ptr_reg)); match(AddP reg off); @@ -5120,7 +5231,105 @@ operand indOffI(iRegP reg, immIOffset off) %} %} -operand indOffL(iRegP reg, immLOffset off) +operand indOffI4(iRegP reg, immIOffset4 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffI8(iRegP reg, immIOffset8 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffI16(iRegP reg, immIOffset16 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffL1(iRegP reg, immLoffset1 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffL2(iRegP reg, immLoffset2 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffL4(iRegP reg, immLoffset4 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffL8(iRegP reg, immLoffset8 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffL16(iRegP reg, immLoffset16 off) %{ constraint(ALLOC_IN_RC(ptr_reg)); match(AddP reg off); @@ -5496,7 +5705,10 @@ operand iRegL2P(iRegL reg) %{ interface(REG_INTER) %} -opclass vmem(indirect, indIndex, indOffI, indOffL, indOffIN, indOffLN); +opclass vmem2(indirect, indIndex, indOffI2, indOffL2); +opclass vmem4(indirect, indIndex, indOffI4, indOffL4); +opclass vmem8(indirect, indIndex, indOffI8, indOffL8); +opclass vmem16(indirect, indIndex, indOffI16, indOffL16); //----------OPERAND CLASSES---------------------------------------------------- // Operand Classes are groups of operands that are used as to simplify @@ -5508,9 +5720,23 @@ opclass vmem(indirect, indIndex, indOffI, indOffL, indOffIN, indOffLN); // memory is used to define read/write location for load/store // instruction defs. we can turn a memory op into an Address -opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL, - indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, - indOffLN, indirectX2P, indOffX2P); +opclass memory1(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI1, indOffL1, + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indirectX2P, indOffX2P); + +opclass memory2(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI2, indOffL2, + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indirectX2P, indOffX2P); + +opclass memory4(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI4, indOffL4, + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P); + +opclass memory8(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI8, indOffL8, + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P); + +// All of the memory operands. For the pipeline description. +opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, + indOffI1, indOffL1, indOffI2, indOffL2, indOffI4, indOffL4, indOffI8, indOffL8, + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P); + // iRegIorL2I is used for src inputs in rules for 32 bit int (I) // operations. it allows the src to be either an iRegI or a (ConvL2I @@ -6212,7 +6438,7 @@ define %{ // Load Instructions // Load Byte (8 bit signed) -instruct loadB(iRegINoSp dst, memory mem) +instruct loadB(iRegINoSp dst, memory1 mem) %{ match(Set dst (LoadB mem)); predicate(!needs_acquiring_load(n)); @@ -6226,7 +6452,7 @@ instruct loadB(iRegINoSp dst, memory mem) %} // Load Byte (8 bit signed) into long -instruct loadB2L(iRegLNoSp dst, memory mem) +instruct loadB2L(iRegLNoSp dst, memory1 mem) %{ match(Set dst (ConvI2L (LoadB mem))); predicate(!needs_acquiring_load(n->in(1))); @@ -6240,7 +6466,7 @@ instruct loadB2L(iRegLNoSp dst, memory mem) %} // Load Byte (8 bit unsigned) -instruct loadUB(iRegINoSp dst, memory mem) +instruct loadUB(iRegINoSp dst, memory1 mem) %{ match(Set dst (LoadUB mem)); predicate(!needs_acquiring_load(n)); @@ -6254,7 +6480,7 @@ instruct loadUB(iRegINoSp dst, memory mem) %} // Load Byte (8 bit unsigned) into long -instruct loadUB2L(iRegLNoSp dst, memory mem) +instruct loadUB2L(iRegLNoSp dst, memory1 mem) %{ match(Set dst (ConvI2L (LoadUB mem))); predicate(!needs_acquiring_load(n->in(1))); @@ -6268,7 +6494,7 @@ instruct loadUB2L(iRegLNoSp dst, memory mem) %} // Load Short (16 bit signed) -instruct loadS(iRegINoSp dst, memory mem) +instruct loadS(iRegINoSp dst, memory2 mem) %{ match(Set dst (LoadS mem)); predicate(!needs_acquiring_load(n)); @@ -6282,7 +6508,7 @@ instruct loadS(iRegINoSp dst, memory mem) %} // Load Short (16 bit signed) into long -instruct loadS2L(iRegLNoSp dst, memory mem) +instruct loadS2L(iRegLNoSp dst, memory2 mem) %{ match(Set dst (ConvI2L (LoadS mem))); predicate(!needs_acquiring_load(n->in(1))); @@ -6296,7 +6522,7 @@ instruct loadS2L(iRegLNoSp dst, memory mem) %} // Load Char (16 bit unsigned) -instruct loadUS(iRegINoSp dst, memory mem) +instruct loadUS(iRegINoSp dst, memory2 mem) %{ match(Set dst (LoadUS mem)); predicate(!needs_acquiring_load(n)); @@ -6310,7 +6536,7 @@ instruct loadUS(iRegINoSp dst, memory mem) %} // Load Short/Char (16 bit unsigned) into long -instruct loadUS2L(iRegLNoSp dst, memory mem) +instruct loadUS2L(iRegLNoSp dst, memory2 mem) %{ match(Set dst (ConvI2L (LoadUS mem))); predicate(!needs_acquiring_load(n->in(1))); @@ -6324,7 +6550,7 @@ instruct loadUS2L(iRegLNoSp dst, memory mem) %} // Load Integer (32 bit signed) -instruct loadI(iRegINoSp dst, memory mem) +instruct loadI(iRegINoSp dst, memory4 mem) %{ match(Set dst (LoadI mem)); predicate(!needs_acquiring_load(n)); @@ -6338,7 +6564,7 @@ instruct loadI(iRegINoSp dst, memory mem) %} // Load Integer (32 bit signed) into long -instruct loadI2L(iRegLNoSp dst, memory mem) +instruct loadI2L(iRegLNoSp dst, memory4 mem) %{ match(Set dst (ConvI2L (LoadI mem))); predicate(!needs_acquiring_load(n->in(1))); @@ -6352,7 +6578,7 @@ instruct loadI2L(iRegLNoSp dst, memory mem) %} // Load Integer (32 bit unsigned) into long -instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) +instruct loadUI2L(iRegLNoSp dst, memory4 mem, immL_32bits mask) %{ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load())); @@ -6366,7 +6592,7 @@ instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) %} // Load Long (64 bit signed) -instruct loadL(iRegLNoSp dst, memory mem) +instruct loadL(iRegLNoSp dst, memory8 mem) %{ match(Set dst (LoadL mem)); predicate(!needs_acquiring_load(n)); @@ -6380,7 +6606,7 @@ instruct loadL(iRegLNoSp dst, memory mem) %} // Load Range -instruct loadRange(iRegINoSp dst, memory mem) +instruct loadRange(iRegINoSp dst, memory4 mem) %{ match(Set dst (LoadRange mem)); @@ -6393,7 +6619,7 @@ instruct loadRange(iRegINoSp dst, memory mem) %} // Load Pointer -instruct loadP(iRegPNoSp dst, memory mem) +instruct loadP(iRegPNoSp dst, memory8 mem) %{ match(Set dst (LoadP mem)); predicate(!needs_acquiring_load(n) && (n->as_Load()->barrier_data() == 0)); @@ -6407,10 +6633,10 @@ instruct loadP(iRegPNoSp dst, memory mem) %} // Load Compressed Pointer -instruct loadN(iRegNNoSp dst, memory mem) +instruct loadN(iRegNNoSp dst, memory4 mem) %{ match(Set dst (LoadN mem)); - predicate(!needs_acquiring_load(n)); + predicate(!needs_acquiring_load(n) && n->as_Load()->barrier_data() == 0); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# compressed ptr" %} @@ -6421,7 +6647,7 @@ instruct loadN(iRegNNoSp dst, memory mem) %} // Load Klass Pointer -instruct loadKlass(iRegPNoSp dst, memory mem) +instruct loadKlass(iRegPNoSp dst, memory8 mem) %{ match(Set dst (LoadKlass mem)); predicate(!needs_acquiring_load(n)); @@ -6435,7 +6661,7 @@ instruct loadKlass(iRegPNoSp dst, memory mem) %} // Load Narrow Klass Pointer -instruct loadNKlass(iRegNNoSp dst, memory mem) +instruct loadNKlass(iRegNNoSp dst, memory4 mem) %{ match(Set dst (LoadNKlass mem)); predicate(!needs_acquiring_load(n)); @@ -6449,7 +6675,7 @@ instruct loadNKlass(iRegNNoSp dst, memory mem) %} // Load Float -instruct loadF(vRegF dst, memory mem) +instruct loadF(vRegF dst, memory4 mem) %{ match(Set dst (LoadF mem)); predicate(!needs_acquiring_load(n)); @@ -6463,7 +6689,7 @@ instruct loadF(vRegF dst, memory mem) %} // Load Double -instruct loadD(vRegD dst, memory mem) +instruct loadD(vRegD dst, memory8 mem) %{ match(Set dst (LoadD mem)); predicate(!needs_acquiring_load(n)); @@ -6667,7 +6893,7 @@ instruct loadConD(vRegD dst, immD con) %{ // Store Instructions // Store CMS card-mark Immediate -instruct storeimmCM0(immI0 zero, memory mem) +instruct storeimmCM0(immI0 zero, memory1 mem) %{ match(Set mem (StoreCM mem zero)); @@ -6682,7 +6908,7 @@ instruct storeimmCM0(immI0 zero, memory mem) // Store CMS card-mark Immediate with intervening StoreStore // needed when using CMS with no conditional card marking -instruct storeimmCM0_ordered(immI0 zero, memory mem) +instruct storeimmCM0_ordered(immI0 zero, memory1 mem) %{ match(Set mem (StoreCM mem zero)); @@ -6697,7 +6923,7 @@ instruct storeimmCM0_ordered(immI0 zero, memory mem) %} // Store Byte -instruct storeB(iRegIorL2I src, memory mem) +instruct storeB(iRegIorL2I src, memory1 mem) %{ match(Set mem (StoreB mem src)); predicate(!needs_releasing_store(n)); @@ -6711,7 +6937,7 @@ instruct storeB(iRegIorL2I src, memory mem) %} -instruct storeimmB0(immI0 zero, memory mem) +instruct storeimmB0(immI0 zero, memory1 mem) %{ match(Set mem (StoreB mem zero)); predicate(!needs_releasing_store(n)); @@ -6725,7 +6951,7 @@ instruct storeimmB0(immI0 zero, memory mem) %} // Store Char/Short -instruct storeC(iRegIorL2I src, memory mem) +instruct storeC(iRegIorL2I src, memory2 mem) %{ match(Set mem (StoreC mem src)); predicate(!needs_releasing_store(n)); @@ -6738,7 +6964,7 @@ instruct storeC(iRegIorL2I src, memory mem) ins_pipe(istore_reg_mem); %} -instruct storeimmC0(immI0 zero, memory mem) +instruct storeimmC0(immI0 zero, memory2 mem) %{ match(Set mem (StoreC mem zero)); predicate(!needs_releasing_store(n)); @@ -6753,7 +6979,7 @@ instruct storeimmC0(immI0 zero, memory mem) // Store Integer -instruct storeI(iRegIorL2I src, memory mem) +instruct storeI(iRegIorL2I src, memory4 mem) %{ match(Set mem(StoreI mem src)); predicate(!needs_releasing_store(n)); @@ -6766,7 +6992,7 @@ instruct storeI(iRegIorL2I src, memory mem) ins_pipe(istore_reg_mem); %} -instruct storeimmI0(immI0 zero, memory mem) +instruct storeimmI0(immI0 zero, memory4 mem) %{ match(Set mem(StoreI mem zero)); predicate(!needs_releasing_store(n)); @@ -6780,7 +7006,7 @@ instruct storeimmI0(immI0 zero, memory mem) %} // Store Long (64 bit signed) -instruct storeL(iRegL src, memory mem) +instruct storeL(iRegL src, memory8 mem) %{ match(Set mem (StoreL mem src)); predicate(!needs_releasing_store(n)); @@ -6794,7 +7020,7 @@ instruct storeL(iRegL src, memory mem) %} // Store Long (64 bit signed) -instruct storeimmL0(immL0 zero, memory mem) +instruct storeimmL0(immL0 zero, memory8 mem) %{ match(Set mem (StoreL mem zero)); predicate(!needs_releasing_store(n)); @@ -6808,7 +7034,7 @@ instruct storeimmL0(immL0 zero, memory mem) %} // Store Pointer -instruct storeP(iRegP src, memory mem) +instruct storeP(iRegP src, memory8 mem) %{ match(Set mem (StoreP mem src)); predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0); @@ -6822,7 +7048,7 @@ instruct storeP(iRegP src, memory mem) %} // Store Pointer -instruct storeimmP0(immP0 zero, memory mem) +instruct storeimmP0(immP0 zero, memory8 mem) %{ match(Set mem (StoreP mem zero)); predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0); @@ -6836,10 +7062,10 @@ instruct storeimmP0(immP0 zero, memory mem) %} // Store Compressed Pointer -instruct storeN(iRegN src, memory mem) +instruct storeN(iRegN src, memory4 mem) %{ match(Set mem (StoreN mem src)); - predicate(!needs_releasing_store(n)); + predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# compressed ptr" %} @@ -6849,10 +7075,10 @@ instruct storeN(iRegN src, memory mem) ins_pipe(istore_reg_mem); %} -instruct storeImmN0(immN0 zero, memory mem) +instruct storeImmN0(immN0 zero, memory4 mem) %{ match(Set mem (StoreN mem zero)); - predicate(!needs_releasing_store(n)); + predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0); ins_cost(INSN_COST); format %{ "strw zr, $mem\t# compressed ptr" %} @@ -6863,7 +7089,7 @@ instruct storeImmN0(immN0 zero, memory mem) %} // Store Float -instruct storeF(vRegF src, memory mem) +instruct storeF(vRegF src, memory4 mem) %{ match(Set mem (StoreF mem src)); predicate(!needs_releasing_store(n)); @@ -6880,7 +7106,7 @@ instruct storeF(vRegF src, memory mem) // implement storeImmF0 and storeFImmPacked // Store Double -instruct storeD(vRegD src, memory mem) +instruct storeD(vRegD src, memory8 mem) %{ match(Set mem (StoreD mem src)); predicate(!needs_releasing_store(n)); @@ -6894,7 +7120,7 @@ instruct storeD(vRegD src, memory mem) %} // Store Compressed Klass Pointer -instruct storeNKlass(iRegN src, memory mem) +instruct storeNKlass(iRegN src, memory4 mem) %{ predicate(!needs_releasing_store(n)); match(Set mem (StoreNKlass mem src)); @@ -6913,7 +7139,7 @@ instruct storeNKlass(iRegN src, memory mem) // prefetch instructions // Must be safe to execute with invalid address (cannot fault). -instruct prefetchalloc( memory mem ) %{ +instruct prefetchalloc( memory8 mem ) %{ match(PrefetchAllocation mem); ins_cost(INSN_COST); @@ -7086,6 +7312,7 @@ instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem) instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem) %{ match(Set dst (LoadN mem)); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(VOLATILE_REF_COST); format %{ "ldarw $dst, $mem\t# compressed ptr" %} @@ -7253,6 +7480,7 @@ instruct storeimmP0_volatile(immP0 zero, /* sync_memory*/indirect mem) instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem) %{ match(Set mem (StoreN mem src)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(VOLATILE_REF_COST); format %{ "stlrw $src, $mem\t# compressed ptr" %} @@ -7265,6 +7493,7 @@ instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem) instruct storeimmN0_volatile(immN0 zero, /* sync_memory*/indirect mem) %{ match(Set mem (StoreN mem zero)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(VOLATILE_REF_COST); format %{ "stlrw zr, $mem\t# compressed ptr" %} @@ -7482,7 +7711,7 @@ instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{ ins_pipe(pipe_class_default); %} -instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{ +instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{ match(Set dst (PopCountI (LoadI mem))); effect(TEMP tmp); ins_cost(INSN_COST * 13); @@ -7523,7 +7752,7 @@ instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{ ins_pipe(pipe_class_default); %} -instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{ +instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{ match(Set dst (PopCountL (LoadL mem))); effect(TEMP tmp); ins_cost(INSN_COST * 13); @@ -8061,6 +8290,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapN mem (Binary oldval newval))); + predicate(n->as_LoadStore()->barrier_data() == 0); ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); @@ -8175,7 +8405,7 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndSwapN mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); @@ -8280,6 +8510,7 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne // This pattern is generated automatically from cas.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr); @@ -8389,7 +8620,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL // This pattern is generated automatically from cas.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr); @@ -8501,6 +8732,7 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne // This pattern is generated automatically from cas.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); @@ -8620,7 +8852,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL // This pattern is generated automatically from cas.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); effect(KILL cr); @@ -8681,6 +8913,7 @@ instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{ %} instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set prev (GetAndSetN mem newv)); ins_cost(2 * VOLATILE_REF_COST); format %{ "atomic_xchgw $prev, $newv, [$mem]" %} @@ -8724,7 +8957,7 @@ instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{ %} instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0); match(Set prev (GetAndSetN mem newv)); ins_cost(VOLATILE_REF_COST); format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %} @@ -16672,7 +16905,7 @@ instruct compressBitsI_reg(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, ins_pipe(pipe_slow); %} -instruct compressBitsI_memcon(iRegINoSp dst, memory mem, immI mask, +instruct compressBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask, vRegF tdst, vRegF tsrc, vRegF tmask) %{ match(Set dst (CompressBits (LoadI mem) mask)); effect(TEMP tdst, TEMP tsrc, TEMP tmask); @@ -16709,7 +16942,7 @@ instruct compressBitsL_reg(iRegLNoSp dst, iRegL src, iRegL mask, ins_pipe(pipe_slow); %} -instruct compressBitsL_memcon(iRegLNoSp dst, memory mem, immL mask, +instruct compressBitsL_memcon(iRegLNoSp dst, memory8 mem, immL mask, vRegF tdst, vRegF tsrc, vRegF tmask) %{ match(Set dst (CompressBits (LoadL mem) mask)); effect(TEMP tdst, TEMP tsrc, TEMP tmask); @@ -16746,7 +16979,7 @@ instruct expandBitsI_reg(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, ins_pipe(pipe_slow); %} -instruct expandBitsI_memcon(iRegINoSp dst, memory mem, immI mask, +instruct expandBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask, vRegF tdst, vRegF tsrc, vRegF tmask) %{ match(Set dst (ExpandBits (LoadI mem) mask)); effect(TEMP tdst, TEMP tsrc, TEMP tmask); @@ -16784,7 +17017,7 @@ instruct expandBitsL_reg(iRegLNoSp dst, iRegL src, iRegL mask, %} -instruct expandBitsL_memcon(iRegINoSp dst, memory mem, immL mask, +instruct expandBitsL_memcon(iRegINoSp dst, memory8 mem, immL mask, vRegF tdst, vRegF tsrc, vRegF tmask) %{ match(Set dst (ExpandBits (LoadL mem) mask)); effect(TEMP tdst, TEMP tsrc, TEMP tmask); diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index cdbc4103df89a..0d3a240cecfd3 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -345,7 +345,7 @@ source %{ // ------------------------------ Vector load/store ---------------------------- // Load Vector (16 bits) -instruct loadV2(vReg dst, vmem mem) %{ +instruct loadV2(vReg dst, vmem2 mem) %{ predicate(n->as_LoadVector()->memory_size() == 2); match(Set dst (LoadVector mem)); format %{ "loadV2 $dst, $mem\t# vector (16 bits)" %} @@ -354,7 +354,7 @@ instruct loadV2(vReg dst, vmem mem) %{ %} // Store Vector (16 bits) -instruct storeV2(vReg src, vmem mem) %{ +instruct storeV2(vReg src, vmem2 mem) %{ predicate(n->as_StoreVector()->memory_size() == 2); match(Set mem (StoreVector mem src)); format %{ "storeV2 $mem, $src\t# vector (16 bits)" %} @@ -363,7 +363,7 @@ instruct storeV2(vReg src, vmem mem) %{ %} // Load Vector (32 bits) -instruct loadV4(vReg dst, vmem mem) %{ +instruct loadV4(vReg dst, vmem4 mem) %{ predicate(n->as_LoadVector()->memory_size() == 4); match(Set dst (LoadVector mem)); format %{ "loadV4 $dst, $mem\t# vector (32 bits)" %} @@ -372,7 +372,7 @@ instruct loadV4(vReg dst, vmem mem) %{ %} // Store Vector (32 bits) -instruct storeV4(vReg src, vmem mem) %{ +instruct storeV4(vReg src, vmem4 mem) %{ predicate(n->as_StoreVector()->memory_size() == 4); match(Set mem (StoreVector mem src)); format %{ "storeV4 $mem, $src\t# vector (32 bits)" %} @@ -381,7 +381,7 @@ instruct storeV4(vReg src, vmem mem) %{ %} // Load Vector (64 bits) -instruct loadV8(vReg dst, vmem mem) %{ +instruct loadV8(vReg dst, vmem8 mem) %{ predicate(n->as_LoadVector()->memory_size() == 8); match(Set dst (LoadVector mem)); format %{ "loadV8 $dst, $mem\t# vector (64 bits)" %} @@ -390,7 +390,7 @@ instruct loadV8(vReg dst, vmem mem) %{ %} // Store Vector (64 bits) -instruct storeV8(vReg src, vmem mem) %{ +instruct storeV8(vReg src, vmem8 mem) %{ predicate(n->as_StoreVector()->memory_size() == 8); match(Set mem (StoreVector mem src)); format %{ "storeV8 $mem, $src\t# vector (64 bits)" %} @@ -399,7 +399,7 @@ instruct storeV8(vReg src, vmem mem) %{ %} // Load Vector (128 bits) -instruct loadV16(vReg dst, vmem mem) %{ +instruct loadV16(vReg dst, vmem16 mem) %{ predicate(n->as_LoadVector()->memory_size() == 16); match(Set dst (LoadVector mem)); format %{ "loadV16 $dst, $mem\t# vector (128 bits)" %} @@ -408,7 +408,7 @@ instruct loadV16(vReg dst, vmem mem) %{ %} // Store Vector (128 bits) -instruct storeV16(vReg src, vmem mem) %{ +instruct storeV16(vReg src, vmem16 mem) %{ predicate(n->as_StoreVector()->memory_size() == 16); match(Set mem (StoreVector mem src)); format %{ "storeV16 $mem, $src\t# vector (128 bits)" %} diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index 020a75b51fa8f..99708e9ef317d 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -338,7 +338,7 @@ dnl VECTOR_LOAD_STORE($1, $2, $3, $4, $5 ) dnl VECTOR_LOAD_STORE(type, nbytes, arg_name, nbits, size) define(`VECTOR_LOAD_STORE', ` // ifelse(load, $1, Load, Store) Vector ($4 bits) -instruct $1V$2(vReg $3, vmem mem) %{ +instruct $1V$2(vReg $3, vmem$2 mem) %{ predicate(`n->as_'ifelse(load, $1, Load, Store)Vector()->memory_size() == $2); match(Set ifelse(load, $1, dst (LoadVector mem), mem (StoreVector mem src))); format %{ "$1V$2 ifelse(load, $1, `$dst, $mem', `$mem, $src')\t# vector ($4 bits)" %} diff --git a/src/hotspot/cpu/aarch64/ad_encode.m4 b/src/hotspot/cpu/aarch64/ad_encode.m4 index e3d8ea661b60a..008dbd2c9369c 100644 --- a/src/hotspot/cpu/aarch64/ad_encode.m4 +++ b/src/hotspot/cpu/aarch64/ad_encode.m4 @@ -34,7 +34,7 @@ define(access, ` define(load,` // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_$2($1 dst, memory mem) %{dnl + enc_class aarch64_enc_$2($1 dst, memory$5 mem) %{dnl access(dst,$2,$3,$4,$5)')dnl load(iRegI,ldrsbw,,,1) load(iRegI,ldrsb,,,1) @@ -53,12 +53,12 @@ load(vRegD,ldrd,Float,,8) define(STORE,` // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_$2($1 src, memory mem) %{dnl + enc_class aarch64_enc_$2($1 src, memory$5 mem) %{dnl access(src,$2,$3,$4,$5)')dnl define(STORE0,` // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_$2`'0(memory mem) %{ + enc_class aarch64_enc_$2`'0(memory$4 mem) %{ choose(masm,zr,$2,$mem->opcode(), as_$3Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp,$4)')dnl STORE(iRegI,strb,,,1) @@ -82,7 +82,7 @@ STORE(vRegD,strd,Float,,8) // This encoding class is generated automatically from ad_encode.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE - enc_class aarch64_enc_strb0_ordered(memory mem) %{ + enc_class aarch64_enc_strb0_ordered(memory4 mem) %{ __ membar(Assembler::StoreStore); loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1); diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index 5e116d82761ac..1385366d8793b 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1168,8 +1168,8 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { if (op->init_check()) { - __ ldrb(rscratch1, Address(op->klass()->as_register(), - InstanceKlass::init_state_offset())); + __ lea(rscratch1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset())); + __ ldarb(rscratch1, rscratch1); __ cmpw(rscratch1, InstanceKlass::fully_initialized); add_debug_info_for_null_check_here(op->stub()->info()); __ br(Assembler::NE, *op->stub()->entry()); diff --git a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp index dabafb9288b83..4bd509880f29c 100644 --- a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp @@ -64,31 +64,4 @@ void C2EntryBarrierStub::emit(C2_MacroAssembler& masm) { __ emit_int32(0); // nmethod guard value } -int C2HandleAnonOMOwnerStub::max_size() const { - // Max size of stub has been determined by testing with 0, in which case - // C2CodeStubList::emit() will throw an assertion and report the actual size that - // is needed. - return 24; -} - -void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { - __ bind(entry()); - Register mon = monitor(); - Register t = tmp(); - assert(t != noreg, "need tmp register"); - - // Fix owner to be the current thread. - __ str(rthread, Address(mon, ObjectMonitor::owner_offset())); - - // Pop owner object from lock-stack. - __ ldrw(t, Address(rthread, JavaThread::lock_stack_top_offset())); - __ subw(t, t, oopSize); -#ifdef ASSERT - __ str(zr, Address(rthread, t)); -#endif - __ strw(t, Address(rthread, JavaThread::lock_stack_top_offset())); - - __ b(continuation()); -} - #undef __ diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index b4c12ecd4a849..62831ee72ba05 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -150,10 +150,12 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe Register oop = objectReg; Register box = boxReg; Register disp_hdr = tmpReg; + Register owner_addr = tmpReg; Register tmp = tmp2Reg; Label cont; Label object_has_monitor; Label count, no_count; + Label unlocked; assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight"); assert_different_registers(oop, box, tmp, disp_hdr); @@ -204,14 +206,40 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe b(cont); bind(notRecursive); + + // Compute owner address. + lea(owner_addr, Address(tmp, ObjectMonitor::owner_offset())); + + // Set owner to null. + // Release to satisfy the JMM + stlr(zr, owner_addr); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + + // Check if the entry lists are empty. ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset())); - ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset())); - orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0. - cmp(rscratch1, zr); // Sets flags for result - cbnz(rscratch1, cont); - // need a release store here - lea(tmp, Address(tmp, ObjectMonitor::owner_offset())); - stlr(zr, tmp); // set unowned + ldr(tmpReg, Address(tmp, ObjectMonitor::cxq_offset())); + orr(rscratch1, rscratch1, tmpReg); + cmp(rscratch1, zr); + br(Assembler::EQ, cont); // If so we are done. + + // Check if there is a successor. + ldr(rscratch1, Address(tmp, ObjectMonitor::succ_offset())); + cmp(rscratch1, zr); + br(Assembler::NE, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + str(tmp, Address(rthread, JavaThread::unlocked_inflated_monitor_offset())); + + cmp(zr, rthread); // Set Flag to NE => slow path + b(cont); + + bind(unlocked); + cmp(zr, zr); // Set Flag to EQ => fast path + + // Intentional fall-through bind(cont); // flag == EQ indicates success @@ -498,33 +526,41 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Regi bind(not_recursive); - Label release; const Register t2_owner_addr = t2; // Compute owner address. lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset())); + // Set owner to null. + // Release to satisfy the JMM + stlr(zr, t2_owner_addr); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + // Check if the entry lists are empty. ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset())); ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset())); orr(rscratch1, rscratch1, t3_t); cmp(rscratch1, zr); - br(Assembler::EQ, release); + br(Assembler::EQ, unlocked); // If so we are done. - // The owner may be anonymous and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - str(rthread, Address(t2_owner_addr)); - b(slow_path); + // Check if there is a successor. + ldr(rscratch1, Address(t1_monitor, ObjectMonitor::succ_offset())); + cmp(rscratch1, zr); + br(Assembler::NE, unlocked); // If so we are done. - bind(release); - // Set owner to null. - // Release to satisfy the JMM - stlr(zr, t2_owner_addr); + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + str(t1_monitor, Address(rthread, JavaThread::unlocked_inflated_monitor_offset())); + + cmp(zr, rthread); // Set Flag to NE => slow path + b(slow_path); } bind(unlocked); decrement(Address(rthread, JavaThread::held_monitor_count_offset())); + cmp(zr, zr); // Set Flags to EQ => fast path #ifdef ASSERT // Check that unlocked label is reached with Flags == EQ. diff --git a/src/hotspot/cpu/aarch64/cas.m4 b/src/hotspot/cpu/aarch64/cas.m4 index f8aac0c4939fa..7e13e153db18a 100644 --- a/src/hotspot/cpu/aarch64/cas.m4 +++ b/src/hotspot/cpu/aarch64/cas.m4 @@ -45,7 +45,9 @@ define(`CAS_INSN', // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct compareAndExchange$1$6(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{ ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));), + $1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);), $1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);), + $1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);), $6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));), `dnl') match(Set res (CompareAndExchange$1 mem (Binary oldval newval))); @@ -122,7 +124,9 @@ define(`CAS_INSN3', // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct weakCompareAndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{ ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));), + $1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);), $1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);), + $1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);), $6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));), `dnl') match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval))); diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp index d02038b6e9193..b978c350ce131 100644 --- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp @@ -38,7 +38,10 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> @@ -95,6 +98,54 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas __ pop(saved_regs, sp); } +static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register thread, const Register value, const Register temp1, const Register temp2) { + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address) + __ cbz(temp1, runtime); // jump to runtime if index == 0 (full buffer) + // The buffer is not full, store value into it. + __ sub(temp1, temp1, wordSize); // temp1 := next index + __ str(temp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index + __ ldr(temp2, Address(thread, in_bytes(buffer_offset))); // temp2 := buffer address + __ str(value, Address(temp2, temp1)); // *(buffer address + next index) := value +} + +static void generate_pre_barrier_fast_path(MacroAssembler* masm, + const Register thread, + const Register tmp1) { + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ldrw(tmp1, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ldrb(tmp1, in_progress); + } +} + +static void generate_pre_barrier_slow_path(MacroAssembler* masm, + const Register obj, + const Register pre_val, + const Register thread, + const Register tmp1, + const Register tmp2, + Label& done, + Label& runtime) { + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } + // Is the previous value null? + __ cbz(pre_val, done); + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, + thread, pre_val, tmp1, tmp2); + __ b(done); +} + void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Register obj, Register pre_val, @@ -115,43 +166,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, assert_different_registers(obj, pre_val, tmp1, tmp2); assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); - Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); - Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); - - // Is marking active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ ldrw(tmp1, in_progress); - } else { - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ ldrb(tmp1, in_progress); - } + generate_pre_barrier_fast_path(masm, thread, tmp1); + // If marking is not active (*(mark queue active address) == 0), jump to done __ cbzw(tmp1, done); - - // Do we need to load the previous value? - if (obj != noreg) { - __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); - } - - // Is the previous value null? - __ cbz(pre_val, done); - - // Can we store original value in the thread's buffer? - // Is index == 0? - // (The index field is typed as size_t.) - - __ ldr(tmp1, index); // tmp := *index_adr - __ cbz(tmp1, runtime); // tmp == 0? - // If yes, goto runtime - - __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize - __ str(tmp1, index); // *index_adr := tmp - __ ldr(tmp2, buffer); - __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr - - // Record the previous value - __ str(pre_val, Address(tmp1, 0)); - __ b(done); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime); __ bind(runtime); @@ -182,6 +200,50 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, } +static void generate_post_barrier_fast_path(MacroAssembler* masm, + const Register store_addr, + const Register new_val, + const Register tmp1, + const Register tmp2, + Label& done, + bool new_val_may_be_null) { + // Does store cross heap regions? + __ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value + __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes) + __ cbz(tmp1, done); + // Crosses regions, storing null? + if (new_val_may_be_null) { + __ cbz(new_val, done); + } + // Storing region crossing non-null, is card young? + __ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base + __ load_byte_map_base(tmp2); // tmp2 := card table base address + __ add(tmp1, tmp1, tmp2); // tmp1 := card address + __ ldrb(tmp2, Address(tmp1)); // tmp2 := card + __ cmpw(tmp2, (int)G1CardTable::g1_young_card_val()); // tmp2 := card == young_card_val? +} + +static void generate_post_barrier_slow_path(MacroAssembler* masm, + const Register thread, + const Register tmp1, + const Register tmp2, + Label& done, + Label& runtime) { + __ membar(Assembler::StoreLoad); // StoreLoad membar + __ ldrb(tmp2, Address(tmp1)); // tmp2 := card + __ cbzw(tmp2, done); + // Storing a region crossing, non-null oop, card is clean. + // Dirty card and log. + STATIC_ASSERT(CardTable::dirty_card_val() == 0); + __ strb(zr, Address(tmp1)); // *(card address) := dirty_card_val + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, + thread, tmp1, tmp2, rscratch1); + __ b(done); +} + void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, @@ -194,70 +256,116 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); - Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); - - BarrierSet* bs = BarrierSet::barrier_set(); - CardTableBarrierSet* ctbs = barrier_set_cast(bs); - CardTable* ct = ctbs->card_table(); - Label done; Label runtime; - // Does store cross heap regions? + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */); + // If card is young, jump to done + __ br(Assembler::EQ, done); + generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime); - __ eor(tmp1, store_addr, new_val); - __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); - __ cbz(tmp1, done); + __ bind(runtime); + // save the live input values + RegSet saved = RegSet::of(store_addr); + __ push(saved, sp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread); + __ pop(saved, sp); - // crosses regions, storing null? + __ bind(done); +} - __ cbz(new_val, done); +#if defined(COMPILER2) - // storing region crossing non-null, is card already dirty? +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { + SaveLiveRegisters save_registers(masm, stub); + if (c_rarg0 != arg) { + __ mov(c_rarg0, arg); + } + __ mov(c_rarg1, rthread); + __ mov(rscratch1, runtime_path); + __ blr(rscratch1); +} - const Register card_addr = tmp1; +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* stub) { + assert(thread == rthread, "must be"); + assert_different_registers(obj, pre_val, tmp1, tmp2); + assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); - __ lsr(card_addr, store_addr, CardTable::card_shift()); + stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2); - // get the address of the card - __ load_byte_map_base(tmp2); - __ add(card_addr, card_addr, tmp2); - __ ldrb(tmp2, Address(card_addr)); - __ cmpw(tmp2, (int)G1CardTable::g1_young_card_val()); - __ br(Assembler::EQ, done); + generate_pre_barrier_fast_path(masm, thread, tmp1); + // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) + __ cbnzw(tmp1, *stub->entry()); - assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + __ bind(*stub->continuation()); +} - __ membar(Assembler::StoreLoad); +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); + Register tmp2 = stub->tmp2(); - __ ldrb(tmp2, Address(card_addr)); - __ cbzw(tmp2, done); + __ bind(*stub->entry()); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime); - // storing a region crossing, non-null oop, card is clean. - // dirty card and log. + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); + __ b(*stub->continuation()); +} - __ strb(zr, Address(card_addr)); +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* stub) { + assert(thread == rthread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, + rscratch1); + assert(store_addr != noreg && new_val != noreg && tmp1 != noreg + && tmp2 != noreg, "expecting a register"); - __ ldr(rscratch1, queue_index); - __ cbz(rscratch1, runtime); - __ sub(rscratch1, rscratch1, wordSize); - __ str(rscratch1, queue_index); + stub->initialize_registers(thread, tmp1, tmp2); - __ ldr(tmp2, buffer); - __ str(card_addr, Address(tmp2, rscratch1)); - __ b(done); + bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0; + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null); + // If card is not young, jump to stub (slow path) + __ br(Assembler::NE, *stub->entry()); - __ bind(runtime); - // save the live input values - RegSet saved = RegSet::of(store_addr); - __ push(saved, sp); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); - __ pop(saved, sp); + __ bind(*stub->continuation()); +} - __ bind(done); +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); // tmp1 holds the card address. + Register tmp2 = stub->tmp2(); + assert(stub->tmp3() == noreg, "not needed in this platform"); + + __ bind(*stub->entry()); + generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)); + __ b(*stub->continuation()); } +#endif // COMPILER2 + void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp2) { bool on_oop = is_reference_type(type); diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp index 7b4bc8cdc49de..4baa18cb94544 100644 --- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp @@ -33,6 +33,8 @@ class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -69,6 +71,27 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); #endif +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* c2_stub); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif + void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp2); }; diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad new file mode 100644 index 0000000000000..081a67d68807b --- /dev/null +++ b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad @@ -0,0 +1,680 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_aarch64.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void write_barrier_pre(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2, + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, rthread, tmp1, tmp2, stub); +} + +static void write_barrier_post(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, stub); +} + +%} + +// BEGIN This section of the file is automatically generated. Do not edit -------------- + +// This section is generated from g1_aarch64.m4 + + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StoreP(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(INSN_COST); + format %{ "str $src, $mem\t# ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ str($src$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StorePVolatile(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "stlr $src, $mem\t# ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ stlr($src$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_class_memory); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StoreN(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(INSN_COST); + format %{ "strw $src, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ strw($src$$Register, $mem$$Register); + if ((barrier_data() & G1C2BarrierPost) != 0) { + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ decode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + } + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StoreNVolatile(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "stlrw $src, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ stlrw($src$$Register, $mem$$Register); + if ((barrier_data() & G1C2BarrierPost) != 0) { + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ decode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + } + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_class_memory); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(INSN_COST); + format %{ "encode_heap_oop $tmp1, $src\n\t" + "strw $tmp1, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ encode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + __ strw($tmp1$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1EncodePAndStoreNVolatile(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "encode_heap_oop $tmp1, $src\n\t" + "stlrw $tmp1, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ encode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + __ stlrw($tmp1$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_class_memory); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $res = $mem, $oldval, $newval\t# ptr" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP and its Acq variant. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + false /* acquire */, true /* release */, false /* weak */, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# ptr" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP and its Acq variant. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + true /* acquire */, true /* release */, false /* weak */, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $res = $mem, $oldval, $newval\t# narrow oop" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + false /* acquire */, true /* release */, false /* weak */, $res$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# narrow oop" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + true /* acquire */, true /* release */, false /* weak */, $res$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapP(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $mem, $oldval, $newval\t# (ptr)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + false /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + true /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapN(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $mem, $oldval, $newval\t# (narrow oop)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + false /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + true /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetP(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchg $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchg($preval$$Register, $newval$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetPAcq(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchg_acq $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgal($preval$$Register, $newval$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetN(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetN mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchgw $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgw($preval$$Register, $newval$$Register, $mem$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetNAcq(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetN mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchgw_acq $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgalw($preval$$Register, $newval$$Register, $mem$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr) +%{ + // This instruction does not need an acquiring counterpart because it is only + // used for reference loading (Reference::get()). The same holds for g1LoadN. + predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(4 * INSN_COST); + format %{ "ldr $dst, $mem\t# ptr" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(iload_reg_mem); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadN mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(4 * INSN_COST); + format %{ "ldrw $dst, $mem\t# compressed ptr" %} + ins_encode %{ + __ ldrw($dst$$Register, $mem$$Register); + if ((barrier_data() & G1C2BarrierPre) != 0) { + __ decode_heap_oop($tmp1$$Register, $dst$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + } + %} + ins_pipe(iload_reg_mem); +%} + +// END This section of the file is automatically generated. Do not edit -------------- diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4 b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4 new file mode 100644 index 0000000000000..8fb1f7e8e428b --- /dev/null +++ b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4 @@ -0,0 +1,384 @@ +dnl Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +dnl +dnl This code is free software; you can redistribute it and/or modify it +dnl under the terms of the GNU General Public License version 2 only, as +dnl published by the Free Software Foundation. +dnl +dnl This code is distributed in the hope that it will be useful, but WITHOUT +dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +dnl FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl version 2 for more details (a copy is included in the LICENSE file that +dnl accompanied this code). +dnl +dnl You should have received a copy of the GNU General Public License version +dnl 2 along with this work; if not, write to the Free Software Foundation, +dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +dnl +dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +dnl or visit www.oracle.com if you need additional information or have any +dnl questions. +dnl +// BEGIN This section of the file is automatically generated. Do not edit -------------- + +// This section is generated from g1_aarch64.m4 + +define(`STOREP_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StoreP$1(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST)); + format %{ "$2 $src, $mem\t# ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ $2($src$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem)); +%}')dnl +STOREP_INSN(,str) +STOREP_INSN(Volatile,stlr) +dnl +define(`STOREN_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StoreN$1(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST)); + format %{ "$2 $src, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ $2($src$$Register, $mem$$Register); + if ((barrier_data() & G1C2BarrierPost) != 0) { + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ decode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + } + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem)); +%}')dnl +STOREN_INSN(,strw) +STOREN_INSN(Volatile,stlrw) +dnl +define(`ENCODESTOREN_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1EncodePAndStoreN$1(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST)); + format %{ "encode_heap_oop $tmp1, $src\n\t" + "$2 $tmp1, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ encode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + __ $2($tmp1$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem)); +%}')dnl +ENCODESTOREN_INSN(,strw) +ENCODESTOREN_INSN(Volatile,stlrw) +dnl +define(`CAEP_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangeP$1(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "cmpxchg$2 $res = $mem, $oldval, $newval\t# ptr" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP and its Acq variant. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + $3 /* acquire */, true /* release */, false /* weak */, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%}')dnl +CAEP_INSN(,,false) +CAEP_INSN(Acq,_acq,true) +dnl +define(`CAEN_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangeN$1(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "cmpxchg$2 $res = $mem, $oldval, $newval\t# narrow oop" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + $3 /* acquire */, true /* release */, false /* weak */, $res$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%}')dnl +CAEN_INSN(,,false) +CAEN_INSN(Acq,_acq,true) +dnl +define(`CASP_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapP$1(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "cmpxchg$2 $mem, $oldval, $newval\t# (ptr)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + $3 /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%}')dnl +CASP_INSN(,,false) +CASP_INSN(Acq,_acq,true) +dnl +define(`CASN_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapN$1(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "cmpxchg$2 $mem, $oldval, $newval\t# (narrow oop)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + $3 /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%}')dnl +CASN_INSN(,,false) +CASN_INSN(Acq,_acq,true) +dnl +define(`XCHGP_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetP$1(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "atomic_xchg$2 $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ $3($preval$$Register, $newval$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%}')dnl +XCHGP_INSN(,,atomic_xchg) +XCHGP_INSN(Acq,_acq,atomic_xchgal) +dnl +define(`XCHGN_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetN$1(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetN mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "$2 $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ $3($preval$$Register, $newval$$Register, $mem$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%}')dnl +XCHGN_INSN(,atomic_xchgw,atomic_xchgw) +XCHGN_INSN(Acq,atomic_xchgw_acq,atomic_xchgalw) + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr) +%{ + // This instruction does not need an acquiring counterpart because it is only + // used for reference loading (Reference::get()). The same holds for g1LoadN. + predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(4 * INSN_COST); + format %{ "ldr $dst, $mem\t# ptr" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(iload_reg_mem); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadN mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(4 * INSN_COST); + format %{ "ldrw $dst, $mem\t# compressed ptr" %} + ins_encode %{ + __ ldrw($dst$$Register, $mem$$Register); + if ((barrier_data() & G1C2BarrierPre) != 0) { + __ decode_heap_oop($tmp1$$Register, $dst$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + } + %} + ins_pipe(iload_reg_mem); +%} + +// END This section of the file is automatically generated. Do not edit -------------- diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index 06f4382015603..84d06dbcc7bfd 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -67,9 +67,9 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec __ push(saved_regs, sp); if (UseCompressedOops) { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), src, dst, count); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop), src, dst, count); } else { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count); } __ pop(saved_regs, sp); __ bind(done); @@ -164,9 +164,9 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, if (expand_call) { assert(pre_val != c_rarg1, "smashed arg"); - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread); } else { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread); } __ pop(saved, sp); @@ -698,7 +698,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss __ bind(runtime); __ push_call_clobbered_registers(); __ load_parameter(0, pre_val); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread); __ pop_call_clobbered_registers(); __ bind(done); diff --git a/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad b/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad index 5e690a8e47b94..6e401724baa82 100644 --- a/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad @@ -51,7 +51,7 @@ static void x_load_barrier_slow_path(MacroAssembler* masm, const MachNode* node, %} // Load Pointer -instruct xLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr) +instruct xLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr) %{ match(Set dst (LoadP mem)); predicate(UseZGC && !ZGenerational && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() != 0)); diff --git a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad index 1510b42bfe97d..56d4538477920 100644 --- a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad @@ -100,7 +100,7 @@ static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Address %} // Load Pointer -instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr) +instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr) %{ match(Set dst (LoadP mem)); predicate(UseZGC && ZGenerational && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index c5c02619d446e..16473b09fff42 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1838,7 +1838,8 @@ void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_f L_slow_path = &L_fallthrough; } // Fast path check: class is fully initialized - ldrb(scratch, Address(klass, InstanceKlass::init_state_offset())); + lea(scratch, Address(klass, InstanceKlass::init_state_offset())); + ldarb(scratch, scratch); subs(zr, scratch, InstanceKlass::fully_initialized); br(Assembler::EQ, *L_fast_path); diff --git a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp index 68800d04d69ba..aa6a9d14ff176 100644 --- a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -27,6 +27,7 @@ #include "asm/macroAssembler.hpp" #include "classfile/javaClasses.inline.hpp" #include "classfile/vmClasses.hpp" +#include "compiler/disassembler.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterRuntime.hpp" #include "memory/allocation.inline.hpp" @@ -36,7 +37,7 @@ #include "runtime/frame.inline.hpp" #include "runtime/stubRoutines.hpp" -#define __ _masm-> +#define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index 3117c75149854..52996f4c4a503 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -49,6 +49,7 @@ #include "runtime/sharedRuntime.hpp" #include "runtime/signature.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/timerTrace.hpp" #include "runtime/vframeArray.hpp" #include "utilities/align.hpp" #include "utilities/formatBuffer.hpp" @@ -2233,7 +2234,7 @@ void SharedRuntime::generate_deopt_blob() { int reexecute_offset = __ pc() - start; #if INCLUDE_JVMCI && !defined(COMPILER1) - if (EnableJVMCI && UseJVMCICompiler) { + if (UseJVMCICompiler) { // JVMCI does not use this kind of deoptimization __ should_not_reach_here(); } diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index b3513a586de35..31116e006f025 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -3417,15 +3417,15 @@ class StubGenerator: public StubCodeGenerator { Register rscratch3 = r10; Register rscratch4 = r11; - __ andw(rscratch3, r2, r4); - __ bicw(rscratch4, r3, r4); reg_cache.extract_u32(rscratch1, k); __ movw(rscratch2, t); - __ orrw(rscratch3, rscratch3, rscratch4); __ addw(rscratch4, r1, rscratch2); __ addw(rscratch4, rscratch4, rscratch1); - __ addw(rscratch3, rscratch3, rscratch4); - __ rorw(rscratch2, rscratch3, 32 - s); + __ bicw(rscratch2, r3, r4); + __ andw(rscratch3, r2, r4); + __ addw(rscratch2, rscratch2, rscratch4); + __ addw(rscratch2, rscratch2, rscratch3); + __ rorw(rscratch2, rscratch2, 32 - s); __ addw(r1, rscratch2, r2); } @@ -7320,6 +7320,28 @@ class StubGenerator: public StubCodeGenerator { return start; } + // load Method* target of MethodHandle + // j_rarg0 = jobject receiver + // rmethod = result + address generate_upcall_stub_load_target() { + StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target"); + address start = __ pc(); + + __ resolve_global_jobject(j_rarg0, rscratch1, rscratch2); + // Load target method from receiver + __ load_heap_oop(rmethod, Address(j_rarg0, java_lang_invoke_MethodHandle::form_offset()), rscratch1, rscratch2); + __ load_heap_oop(rmethod, Address(rmethod, java_lang_invoke_LambdaForm::vmentry_offset()), rscratch1, rscratch2); + __ load_heap_oop(rmethod, Address(rmethod, java_lang_invoke_MemberName::method_offset()), rscratch1, rscratch2); + __ access_load_at(T_ADDRESS, IN_HEAP, rmethod, + Address(rmethod, java_lang_invoke_ResolvedMethodName::vmtarget_offset()), + noreg, noreg); + __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized + + __ ret(lr); + + return start; + } + #undef __ #define __ masm-> @@ -8241,6 +8263,7 @@ class StubGenerator: public StubCodeGenerator { #endif StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler(); + StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target(); StubRoutines::aarch64::set_completed(); // Inidicate that arraycopy and zero_blocks stubs are generated } diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index 3210789bbbdfa..9894841e933d8 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -26,6 +26,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.inline.hpp" #include "classfile/javaClasses.hpp" +#include "compiler/disassembler.hpp" #include "compiler/compiler_globals.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "interpreter/bytecodeHistogram.hpp" @@ -67,7 +68,7 @@ // Max size with JVMTI int TemplateInterpreter::InterpreterCodeSize = 200 * 1024; -#define __ _masm-> +#define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> address TemplateInterpreterGenerator::generate_slow_signature_handler() { address entry = __ pc(); @@ -1998,13 +1999,21 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, address& vep) { assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); Label L; - aep = __ pc(); __ push_ptr(); __ b(L); - fep = __ pc(); __ push_f(); __ b(L); - dep = __ pc(); __ push_d(); __ b(L); - lep = __ pc(); __ push_l(); __ b(L); - bep = cep = sep = - iep = __ pc(); __ push_i(); - vep = __ pc(); + aep = __ pc(); // atos entry point + __ push_ptr(); + __ b(L); + fep = __ pc(); // ftos entry point + __ push_f(); + __ b(L); + dep = __ pc(); // dtos entry point + __ push_d(); + __ b(L); + lep = __ pc(); // ltos entry point + __ push_l(); + __ b(L); + bep = cep = sep = iep = __ pc(); // [bcsi]tos entry point + __ push_i(); + vep = __ pc(); // vtos entry point __ bind(L); generate_and_dispatch(t); } diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp index 25eb339bfce71..48ff356f9a558 100644 --- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.inline.hpp" +#include "compiler/disassembler.hpp" #include "compiler/compilerDefinitions.inline.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/collectedHeap.hpp" @@ -49,7 +50,7 @@ #include "runtime/synchronizer.hpp" #include "utilities/powerOfTwo.hpp" -#define __ _masm-> +#define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> // Address computation: local variables diff --git a/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp b/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp index 28ec07815be5c..517fccb2d1aa5 100644 --- a/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.hpp" +#include "classfile/javaClasses.hpp" #include "logging/logStream.hpp" #include "memory/resourceArea.hpp" #include "prims/upcallLinker.hpp" @@ -117,7 +118,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr static const int upcall_stub_code_base_size = 1024; static const int upcall_stub_size_per_arg = 16; -address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, +address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, BasicType* out_sig_bt, int total_out_args, BasicType ret_type, jobject jabi, jobject jconv, @@ -222,7 +223,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, __ block_comment("{ on_entry"); __ lea(c_rarg0, Address(sp, frame_data_offset)); - __ movptr(c_rarg1, (intptr_t)receiver); __ movptr(rscratch1, CAST_FROM_FN_PTR(uint64_t, UpcallLinker::on_entry)); __ blr(rscratch1); __ mov(rthread, r0); @@ -238,12 +238,10 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, arg_shuffle.generate(_masm, as_VMStorage(shuffle_reg), abi._shadow_space_bytes, 0); __ block_comment("} argument shuffle"); - __ block_comment("{ receiver "); - __ get_vm_result(j_rarg0, rthread); - __ block_comment("} receiver "); - - __ mov_metadata(rmethod, entry); - __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized + __ block_comment("{ load target "); + __ movptr(j_rarg0, (intptr_t)receiver); + __ far_call(RuntimeAddress(StubRoutines::upcall_stub_load_target()), rscratch1); // puts target Method* in rmethod + __ block_comment("} load target "); __ push_cont_fastpath(rthread); @@ -318,7 +316,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, #ifndef PRODUCT stringStream ss; - ss.print("upcall_stub_%s", entry->signature()->as_C_string()); + ss.print("upcall_stub_%s", signature->as_C_string()); const char* name = _masm->code_string(ss.as_string()); #else // PRODUCT const char* name = "upcall_stub"; diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index 2c7de0a58a204..716f6d87230e1 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -3890,6 +3890,7 @@ instruct loadRange(iRegI dst, memoryI mem) %{ instruct loadP(iRegP dst, memoryP mem) %{ + predicate(!(UseG1GC && n->as_Load()->barrier_data() != 0)); match(Set dst (LoadP mem)); ins_cost(MEMORY_REF_COST); size(4); @@ -4356,6 +4357,7 @@ instruct movSP(store_ptr_RegP dst, SPRegP src) %{ instruct storeP(memoryP mem, store_ptr_RegP src) %{ + predicate(!(UseG1GC && n->as_Store()->barrier_data() != 0)); match(Set mem (StoreP mem src)); ins_cost(MEMORY_REF_COST); size(4); @@ -5390,6 +5392,7 @@ instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI re %} instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr ) %{ + predicate(!(UseG1GC && n->as_LoadStore()->barrier_data() != 0)); match(Set res (CompareAndSwapP mem (Binary oldval newval))); effect( KILL ccr, TEMP tmp); size(28); @@ -5659,6 +5662,7 @@ instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr) %} instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %{ + predicate(!(UseG1GC && n->as_LoadStore()->barrier_data() != 0)); match(Set res (GetAndSetP mem newval)); effect(KILL ccr, TEMP tmp, TEMP res); size(16); diff --git a/src/hotspot/cpu/arm/assembler_arm_32.hpp b/src/hotspot/cpu/arm/assembler_arm_32.hpp index dd04ad1ab3a3c..e53eefac097ef 100644 --- a/src/hotspot/cpu/arm/assembler_arm_32.hpp +++ b/src/hotspot/cpu/arm/assembler_arm_32.hpp @@ -119,8 +119,9 @@ class RegisterSet { } friend RegisterSet operator | (const RegisterSet set1, const RegisterSet set2) { - assert((set1._encoding & set2._encoding) == 0, - "encoding constraint"); +// why so strong constraint? +// assert((set1._encoding & set2._encoding) == 0, +// "encoding constraint"); return RegisterSet(set1._encoding | set2._encoding); } @@ -142,6 +143,11 @@ class RegisterSet { } return count; } + + static RegisterSet from(RegSet set) { + assert(set.size(), "RegSet must not be empty"); + return RegisterSet(set.bits()); + } }; #if R9_IS_SCRATCHED @@ -157,6 +163,10 @@ class FloatRegisterSet { public: + FloatRegisterSet() { + _encoding = 0; + } + FloatRegisterSet(FloatRegister reg) { if (reg->hi_bit() == 0) { _encoding = reg->hi_bits() << 12 | reg->lo_bit() << 22 | 1; @@ -185,6 +195,15 @@ class FloatRegisterSet { return (_encoding & 0xFFFFFF00) | ((_encoding & 0xFF) << 1); } + static FloatRegisterSet from(FloatRegSet set) { + assert(set.size(), "FloatRegSet must not be empty"); + // the vector load/store instructions operate on a set of consecutive registers. + // for the sake of simplicity, write all registers between the first and last in the set + size_t range = (*set.rbegin())->encoding() - (*set.begin())->encoding() + 1; + // push_float stores float regisgters by pairs + return FloatRegisterSet(*set.begin(), (range+1)/2); + } + }; diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp index bb6a93e6f8da7..b14e6f0b4ca0c 100644 --- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp @@ -948,6 +948,7 @@ void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { if (op->init_check()) { Register tmp = op->tmp1()->as_register(); __ ldrb(tmp, Address(op->klass()->as_register(), InstanceKlass::init_state_offset())); + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), Rtemp); add_debug_info_for_null_check_here(op->stub()->info()); __ cmp(tmp, InstanceKlass::fully_initialized); __ b(*op->stub()->entry(), ne); diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp index 3c5e29aa8710f..56ae7707fbf38 100644 --- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp @@ -39,8 +39,10 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif - +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> #ifdef PRODUCT @@ -106,70 +108,87 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas #endif // !R9_IS_SCRATCHED } -// G1 pre-barrier. -// Blows all volatile registers R0-R3, Rtemp, LR). -// If store_addr != noreg, then previous value is loaded from [store_addr]; -// in such case store_addr and new_val registers are preserved; -// otherwise pre_val register is preserved. -void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, - Register store_addr, - Register new_val, - Register pre_val, - Register tmp1, - Register tmp2) { - Label done; - Label runtime; - - if (store_addr != noreg) { - assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); - } else { - assert (new_val == noreg, "should be"); - assert_different_registers(pre_val, tmp1, tmp2, noreg); - } - - Address in_progress(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); - Address index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); - Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); +static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register thread, const Register value, const Register temp1, const Register temp2) { + assert_different_registers(value, temp1, temp2); + // Can we store original value in the thread's buffer? + // (The index field is typed as size_t.) + __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address) + __ cbz(temp1, runtime); // jump to runtime if index == 0 (full buffer) + // The buffer is not full, store value into it. + __ sub(temp1, temp1, wordSize); // temp1 := next index + __ str(temp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index + __ ldr(temp2, Address(thread, in_bytes(buffer_offset))); // temp2 := buffer address + // Record the previous value + __ str(value, Address(temp2, temp1)); // *(buffer address + next index) := value + } +static void generate_pre_barrier_fast_path(MacroAssembler* masm, + const Register thread, + const Register tmp1) { + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); // Is marking active? assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); __ ldrb(tmp1, in_progress); - __ cbz(tmp1, done); +} +static void generate_pre_barrier_slow_path(MacroAssembler* masm, + const Register obj, + const Register pre_val, + const Register thread, + const Register tmp1, + const Register tmp2, + Label& done, + Label& runtime) { // Do we need to load the previous value? - if (store_addr != noreg) { - __ load_heap_oop(pre_val, Address(store_addr, 0)); + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0)); } // Is the previous value null? __ cbz(pre_val, done); - // Can we store original value in the thread's buffer? - // Is index == 0? - // (The index field is typed as size_t.) + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, + thread, pre_val, tmp1, tmp2); + __ b(done); +} - __ ldr(tmp1, index); // tmp1 := *index_adr - __ ldr(tmp2, buffer); +// G1 pre-barrier. +// Blows all volatile registers R0-R3, LR). +// If obj != noreg, then previous value is loaded from [obj]; +// in such case obj and pre_val registers is preserved; +// otherwise pre_val register is preserved. +void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2) { + Label done; + Label runtime; - __ subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize - __ b(runtime, lt); // If negative, goto runtime + assert_different_registers(obj, pre_val, tmp1, tmp2, noreg); - __ str(tmp1, index); // *index_adr := tmp1 + generate_pre_barrier_fast_path(masm, Rthread, tmp1); + // If marking is not active (*(mark queue active address) == 0), jump to done + __ cbz(tmp1, done); - // Record the previous value - __ str(pre_val, Address(tmp2, tmp1)); - __ b(done); + generate_pre_barrier_slow_path(masm, obj, pre_val, Rthread, tmp1, tmp2, done, runtime); __ bind(runtime); // save the live input values - if (store_addr != noreg) { - // avoid raw_push to support any ordering of store_addr and new_val - __ push(RegisterSet(store_addr) | RegisterSet(new_val)); - } else { - __ push(pre_val); + RegisterSet set = RegisterSet(pre_val) | RegisterSet(R0, R3) | RegisterSet(R12); + // save the live input values + if (obj != noreg) { + // avoid raw_push to support any ordering of store_addr and pre_val + set = set | RegisterSet(obj); } + __ push(set); + if (pre_val != R0) { __ mov(R0, pre_val); } @@ -177,33 +196,17 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), R0, R1); - if (store_addr != noreg) { - __ pop(RegisterSet(store_addr) | RegisterSet(new_val)); - } else { - __ pop(pre_val); - } - + __ pop(set); __ bind(done); } -// G1 post-barrier. -// Blows all volatile registers R0-R3, Rtemp, LR). -void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, - Register store_addr, - Register new_val, - Register tmp1, - Register tmp2, - Register tmp3) { - - Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); - Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); - - BarrierSet* bs = BarrierSet::barrier_set(); - CardTableBarrierSet* ctbs = barrier_set_cast(bs); - CardTable* ct = ctbs->card_table(); - Label done; - Label runtime; - +static void generate_post_barrier_fast_path(MacroAssembler* masm, + const Register store_addr, + const Register new_val, + const Register tmp1, + const Register tmp2, + Label& done, + bool new_val_may_be_null) { // Does store cross heap regions? __ eor(tmp1, store_addr, new_val); @@ -211,22 +214,31 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, __ b(done, eq); // crosses regions, storing null? - - __ cbz(new_val, done); - + if (new_val_may_be_null) { + __ cbz(new_val, done); + } // storing region crossing non-null, is card already dirty? const Register card_addr = tmp1; - __ mov_address(tmp2, (address)ct->byte_map_base()); + CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); + __ mov_address(tmp2, (address)ct->card_table()->byte_map_base()); __ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift())); __ ldrb(tmp2, Address(card_addr)); __ cmp(tmp2, (int)G1CardTable::g1_young_card_val()); - __ b(done, eq); +} +static void generate_post_barrier_slow_path(MacroAssembler* masm, + const Register thread, + const Register tmp1, + const Register tmp2, + const Register tmp3, + Label& done, + Label& runtime) { __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); - assert(CardTable::dirty_card_val() == 0, "adjust this code"); + // card_addr is loaded by generate_post_barrier_fast_path + const Register card_addr = tmp1; __ ldrb(tmp2, Address(card_addr)); __ cbz(tmp2, done); @@ -234,29 +246,139 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, // dirty card and log. __ strb(__ zero_register(tmp2), Address(card_addr)); + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, + thread, card_addr, tmp2, tmp3); + __ b(done); +} - __ ldr(tmp2, queue_index); - __ ldr(tmp3, buffer); - __ subs(tmp2, tmp2, wordSize); - __ b(runtime, lt); // go to runtime if now negative - - __ str(tmp2, queue_index); +// G1 post-barrier. +// Blows all volatile registers R0-R3, LR). +void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + Register tmp3) { + Label done; + Label runtime; - __ str(card_addr, Address(tmp3, tmp2)); - __ b(done); + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */); + // If card is young, jump to done + // card_addr and card are loaded by generate_post_barrier_fast_path + const Register card = tmp2; + const Register card_addr = tmp1; + __ b(done, eq); + generate_post_barrier_slow_path(masm, Rthread, card_addr, tmp2, tmp3, done, runtime); __ bind(runtime); + RegisterSet set = RegisterSet(store_addr) | RegisterSet(R0, R3) | RegisterSet(R12); + __ push(set); + if (card_addr != R0) { __ mov(R0, card_addr); } __ mov(R1, Rthread); __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), R0, R1); + __ pop(set); + __ bind(done); } +#if defined(COMPILER2) + +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path, Register tmp1) { + SaveLiveRegisters save_registers(masm, stub); + if (c_rarg0 != arg) { + __ mov(c_rarg0, arg); + } + __ mov(c_rarg1, Rthread); + __ call_VM_leaf(runtime_path, R0, R1); +} + +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* stub) { + assert(thread == Rthread, "must be"); + assert_different_registers(obj, pre_val, tmp1, tmp2); + assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); + + stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2); + + generate_pre_barrier_fast_path(masm, thread, tmp1); + // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) + __ cbnz(tmp1, *stub->entry()); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); + Register tmp2 = stub->tmp2(); + + __ bind(*stub->entry()); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), tmp1); + __ b(*stub->continuation()); +} + +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + Register tmp3, + G1PostBarrierStubC2* stub) { + assert(thread == Rthread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg); + + stub->initialize_registers(thread, tmp1, tmp2, tmp3); + + bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0; + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null); + // If card is not young, jump to stub (slow path) + __ b(*stub->entry(), ne); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); // tmp1 holds the card address. + Register tmp2 = stub->tmp2(); + Register tmp3 = stub->tmp3(); + + __ bind(*stub->entry()); + generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, tmp3, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp2); + __ b(*stub->continuation()); +} + +#endif // COMPILER2 + void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) { bool on_oop = type == T_OBJECT || type == T_ARRAY; @@ -268,7 +390,7 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator if (on_oop && on_reference) { // Generate the G1 pre-barrier code to log the value of // the referent field in an SATB buffer. - g1_write_barrier_pre(masm, noreg, noreg, dst, tmp1, tmp2); + g1_write_barrier_pre(masm, noreg, dst, tmp1, tmp2); } } @@ -295,7 +417,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco } if (needs_pre_barrier) { - g1_write_barrier_pre(masm, store_addr, new_val, tmp1, tmp2, tmp3); + g1_write_barrier_pre(masm, store_addr, tmp3 /*pre_val*/, tmp1, tmp2); } if (is_null) { diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp index 52932faa3e4de..aefde19142e40 100644 --- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp @@ -33,6 +33,8 @@ class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -43,7 +45,6 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { void g1_write_barrier_pre(MacroAssembler* masm, Register store_addr, - Register new_val, Register pre_val, Register tmp1, Register tmp2); @@ -70,6 +71,29 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); #endif + +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + Register tmp3, + G1PostBarrierStubC2* c2_stub); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif + }; #endif // CPU_ARM_GC_G1_G1BARRIERSETASSEMBLER_ARM_HPP diff --git a/src/hotspot/cpu/arm/gc/g1/g1_arm.ad b/src/hotspot/cpu/arm/gc/g1/g1_arm.ad new file mode 100644 index 0000000000000..8a0a9e1aa531a --- /dev/null +++ b/src/hotspot/cpu/arm/gc/g1/g1_arm.ad @@ -0,0 +1,201 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_arm.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void write_barrier_pre(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2, + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, Rthread, tmp1, tmp2, stub); +} + +static void write_barrier_post(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + Register tmp3) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Rthread, tmp1, tmp2, tmp3, stub); +} + +%} + +instruct g1StoreP(indirect mem, iRegP src, iRegP tmp1, iRegP tmp2, iRegP tmp3, flagsReg icc) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL icc); + ins_cost(2 * (MEMORY_REF_COST + BRANCH_COST)); + format %{ "sd $src, $mem\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ str($src$$Register, Address($mem$$Register)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + $tmp3$$Register /* tmp3 */); + %} + ins_pipe(istore_mem_reg); +%} + +instruct g1CompareAndSwapP(iRegI res, indirect mem, iRegP newval, iRegP tmp1, iRegP tmp2, iRegP tmp3, iRegP oldval, flagsReg ccr ) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect(KILL ccr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(4 * (MEMORY_REF_COST + BRANCH_COST)); + format %{ "loop: \n\t" + "LDREX $tmp1, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP $tmp1, $oldval\n\t" + "STREX.eq $tmp1, $newval, $mem\n\t" + "MOV.ne $tmp1, 0 \n\t" + "EORS.eq $tmp1,$tmp1, 1 \n\t" + "B.eq loop \n\t" + "MOV $res, $tmp1" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + Label loop; + __ bind(loop); + __ ldrex($tmp1$$Register,$mem$$Address); + __ cmp($tmp1$$Register, $oldval$$Register); + __ strex($tmp1$$Register, $newval$$Register, $mem$$Address, eq); + __ mov($tmp1$$Register, 0, ne); + __ eors($tmp1$$Register, $tmp1$$Register, 1, eq); + __ b(loop, eq); + __ mov($res$$Register, $tmp1$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + $tmp3$$Register /* tmp3 */); + %} + ins_pipe(long_memory_op); +%} + + +instruct g1GetAndSetP(indirect mem, iRegP newval, iRegP tmp1, iRegP tmp2, iRegP tmp3, iRegP preval, flagsReg ccr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(KILL ccr, TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(4 * (MEMORY_REF_COST + BRANCH_COST)); + format %{ "loop: \n\t" + "LDREX $preval, $mem\n\t" + "STREX $tmp1, $newval, $mem\n\t" + "CMP $tmp1, 0 \n\t" + "B.ne loop \n\t" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + Label loop; + __ bind(loop); + __ ldrex($preval$$Register,$mem$$Address); + __ strex($tmp1$$Register, $newval$$Register, $mem$$Address); + __ cmp($tmp1$$Register, 0); + __ b(loop, ne); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + $tmp3$$Register /* tmp3 */); + %} + ins_pipe(long_memory_op); +%} + +instruct g1LoadP(iRegP dst, indirect mem, iRegP tmp1, iRegP tmp2, flagsReg icc) +%{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL icc); + ins_cost(MEMORY_REF_COST + BRANCH_COST); + format %{ "ld $dst, $mem\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + __ ldr($dst$$Register, Address($mem$$Register)); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(iload_mem); +%} diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp index ea19730673cb6..c13a259a1b960 100644 --- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp @@ -31,6 +31,10 @@ #include "runtime/javaThread.hpp" #include "runtime/stubRoutines.hpp" +#ifdef COMPILER2 +#include "gc/shared/c2/barrierSetC2.hpp" +#endif // COMPILER2 + #define __ masm-> void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, @@ -206,7 +210,57 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { #ifdef COMPILER2 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { - Unimplemented(); // This must be implemented to support late barrier expansion. + if (!OptoReg::is_reg(opto_reg)) { + return OptoReg::Bad; + } + + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (!vm_reg->is_valid()){ + // skip APSR and FPSCR + return OptoReg::Bad; + } + + return opto_reg; } +void SaveLiveRegisters::initialize(BarrierStubC2* stub) { + // Record registers that needs to be saved/restored + RegMaskIterator rmi(stub->preserve_set()); + while (rmi.has_next()) { + const OptoReg::Name opto_reg = rmi.next(); + if (OptoReg::is_reg(opto_reg)) { + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_Register()) { + gp_regs += RegSet::of(vm_reg->as_Register()); + } else if (vm_reg->is_FloatRegister()) { + fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); + } else { + fatal("Unknown register type"); + } + } + } + // Remove C-ABI SOE registers that will be updated + gp_regs -= RegSet::range(R4, R11) + RegSet::of(R13, R15); + + // Remove C-ABI SOE fp registers + fp_regs -= FloatRegSet::range(S16, S31); +} + +SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub) + : masm(masm), + gp_regs(), + fp_regs() { + // Figure out what registers to save/restore + initialize(stub); + + // Save registers + if (gp_regs.size() > 0) __ push(RegisterSet::from(gp_regs)); + if (fp_regs.size() > 0) __ fpush(FloatRegisterSet::from(fp_regs)); +} + +SaveLiveRegisters::~SaveLiveRegisters() { + // Restore registers + if (fp_regs.size() > 0) __ fpop(FloatRegisterSet::from(fp_regs)); + if (gp_regs.size() > 0) __ pop(RegisterSet::from(gp_regs)); +} #endif // COMPILER2 diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp index 60021390ea26f..054d172f46340 100644 --- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp @@ -31,7 +31,9 @@ #ifdef COMPILER2 #include "code/vmreg.hpp" #include "opto/optoreg.hpp" +#include "opto/regmask.hpp" +class BarrierStubC2; class Node; #endif // COMPILER2 @@ -69,4 +71,26 @@ class BarrierSetAssembler: public CHeapObj { #endif // COMPILER2 }; +#ifdef COMPILER2 +// This class saves and restores the registers that need to be preserved across +// the runtime call represented by a given C2 barrier stub. Use as follows: +// { +// SaveLiveRegisters save(masm, stub); +// .. +// __ bl(...); +// .. +// } +class SaveLiveRegisters { +private: + MacroAssembler* const masm; + RegSet gp_regs; + FloatRegSet fp_regs; + +public: + void initialize(BarrierStubC2* stub); + SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub); + ~SaveLiveRegisters(); +}; + +#endif // COMPILER2 #endif // CPU_ARM_GC_SHARED_BARRIERSETASSEMBLER_ARM_HPP diff --git a/src/hotspot/cpu/arm/register_arm.hpp b/src/hotspot/cpu/arm/register_arm.hpp index 9f486d2a62586..d8961fd293578 100644 --- a/src/hotspot/cpu/arm/register_arm.hpp +++ b/src/hotspot/cpu/arm/register_arm.hpp @@ -303,6 +303,31 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { static const int max_fpr; }; +typedef AbstractRegSet RegSet; +typedef AbstractRegSet FloatRegSet; + +template <> +inline Register AbstractRegSet::first() { + if (_bitset == 0) { return noreg; } + return as_Register(count_trailing_zeros(_bitset)); +} + + +template <> +inline FloatRegister AbstractRegSet::first() { + uint32_t first = _bitset & -_bitset; + return first ? as_FloatRegister(exact_log2(first)) : fnoreg; +} + +template <> +inline FloatRegister AbstractRegSet::last() { + if (_bitset == 0) { return fnoreg; } + int last = max_size() - 1 - count_leading_zeros(_bitset); + return as_FloatRegister(last); +} + + + class VFPSystemRegisterImpl; typedef VFPSystemRegisterImpl* VFPSystemRegister; class VFPSystemRegisterImpl : public AbstractRegisterImpl { diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp index 7648e5c5d9260..7c1f3aafe7d52 100644 --- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp +++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp @@ -38,6 +38,7 @@ #include "runtime/sharedRuntime.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/timerTrace.hpp" #include "runtime/vframeArray.hpp" #include "utilities/align.hpp" #include "utilities/powerOfTwo.hpp" diff --git a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp index 679f07a028e2c..ec9d237e50da0 100644 --- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp @@ -175,6 +175,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M break; case Interpreter::java_lang_math_fmaD: case Interpreter::java_lang_math_fmaF: + case Interpreter::java_lang_math_tanh: // TODO: Implement intrinsic break; default: diff --git a/src/hotspot/cpu/arm/templateTable_arm.cpp b/src/hotspot/cpu/arm/templateTable_arm.cpp index 80519fd89f426..0974ff1f9a9c3 100644 --- a/src/hotspot/cpu/arm/templateTable_arm.cpp +++ b/src/hotspot/cpu/arm/templateTable_arm.cpp @@ -3974,6 +3974,7 @@ void TemplateTable::_new() { // make sure klass is initialized // make sure klass is fully initialized __ ldrb(Rtemp, Address(Rklass, InstanceKlass::init_state_offset())); + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), Rtemp); __ cmp(Rtemp, InstanceKlass::fully_initialized); __ b(slow_case, ne); diff --git a/src/hotspot/cpu/arm/upcallLinker_arm.cpp b/src/hotspot/cpu/arm/upcallLinker_arm.cpp index c7645f4a03351..696b2001e6b7b 100644 --- a/src/hotspot/cpu/arm/upcallLinker_arm.cpp +++ b/src/hotspot/cpu/arm/upcallLinker_arm.cpp @@ -25,7 +25,7 @@ #include "prims/upcallLinker.hpp" #include "utilities/debug.hpp" -address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, +address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, BasicType* out_sig_bt, int total_out_args, BasicType ret_type, jobject jabi, jobject jconv, diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp index 42934dc7c3179..684c06614a97a 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp @@ -2274,6 +2274,7 @@ void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { } __ lbz(op->tmp1()->as_register(), in_bytes(InstanceKlass::init_state_offset()), op->klass()->as_register()); + // acquire barrier included in membar_storestore() which follows the allocation immediately. __ cmpwi(CCR0, op->tmp1()->as_register(), InstanceKlass::fully_initialized); __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *op->stub()->entry()); } diff --git a/src/hotspot/cpu/ppc/frame_ppc.cpp b/src/hotspot/cpu/ppc/frame_ppc.cpp index 4c1ffeb0d768e..eb16af5e9db1b 100644 --- a/src/hotspot/cpu/ppc/frame_ppc.cpp +++ b/src/hotspot/cpu/ppc/frame_ppc.cpp @@ -117,9 +117,9 @@ bool frame::safe_for_sender(JavaThread *thread) { return false; } - common_abi* sender_abi = (common_abi*) fp; + volatile common_abi* sender_abi = (common_abi*) fp; // May get updated concurrently by deoptimization! intptr_t* sender_sp = (intptr_t*) fp; - address sender_pc = (address) sender_abi->lr;; + address sender_pc = (address) sender_abi->lr; if (Continuation::is_return_barrier_entry(sender_pc)) { // If our sender_pc is the return barrier, then our "real" sender is the continuation entry @@ -134,9 +134,18 @@ bool frame::safe_for_sender(JavaThread *thread) { return false; } + intptr_t* unextended_sender_sp = is_interpreted_frame() ? interpreter_frame_sender_sp() : sender_sp; + + // If the sender is a deoptimized nmethod we need to check if the original pc is valid. + nmethod* sender_nm = sender_blob->as_nmethod_or_null(); + if (sender_nm != nullptr && sender_nm->is_deopt_pc(sender_pc)) { + address orig_pc = *(address*)((address)unextended_sender_sp + sender_nm->orig_pc_offset()); + if (!sender_nm->insts_contains_inclusive(orig_pc)) return false; + } + // It should be safe to construct the sender though it might not be valid. - frame sender(sender_sp, sender_pc, nullptr /* unextended_sp */, nullptr /* fp */, sender_blob); + frame sender(sender_sp, sender_pc, unextended_sender_sp, nullptr /* fp */, sender_blob); // Do we have a valid fp? address sender_fp = (address) sender.fp(); diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp index 7d230d301c22b..39693bdf925bf 100644 --- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp @@ -41,10 +41,20 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> +static void generate_marking_inactive_test(MacroAssembler* masm) { + int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbz(R0, active_offset, R16_thread); // tmp1 := *(mark queue active address) + __ cmpwi(CCR0, R0, 0); +} + void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register from, Register to, Register count, Register preserve1, Register preserve2) { @@ -58,13 +68,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm Label filtered; // Is marking active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ lwz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread); - } else { - guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ lbz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread); - } - __ cmpdi(CCR0, R0, 0); + generate_marking_inactive_test(masm); __ beq(CCR0, filtered); __ save_LR(R0); @@ -109,35 +113,48 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas __ restore_LR(R0); } +static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register value, const Register temp) { + assert_different_registers(value, temp); + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + __ ld(temp, in_bytes(index_offset), R16_thread); // temp := *(index address) + __ cmpdi(CCR0, temp, 0); // jump to runtime if index == 0 (full buffer) + __ beq(CCR0, runtime); + // The buffer is not full, store value into it. + __ ld(R0, in_bytes(buffer_offset), R16_thread); // R0 := buffer address + __ addi(temp, temp, -wordSize); // temp := next index + __ std(temp, in_bytes(index_offset), R16_thread); // *(index address) := next index + __ stdx(value, temp, R0); // *(buffer address + next index) := value +} + void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, DecoratorSet decorators, Register obj, RegisterOrConstant ind_or_offs, Register pre_val, Register tmp1, Register tmp2, MacroAssembler::PreservationLevel preservation_level) { + assert_different_registers(pre_val, tmp1, tmp2); + bool not_null = (decorators & IS_NOT_NULL) != 0, preloaded = obj == noreg; Register nv_save = noreg; - if (preloaded) { + // Determine necessary runtime invocation preservation measures + const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR; + const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS; + const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS; + int nbytes_save = 0; + + if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) { // We are not loading the previous value so make // sure that we don't trash the value in pre_val // with the code below. - assert_different_registers(pre_val, tmp1, tmp2); - if (pre_val->is_volatile()) { - nv_save = !tmp1->is_volatile() ? tmp1 : tmp2; - assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register"); - } + nv_save = !tmp1->is_volatile() ? tmp1 : tmp2; + assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register"); } Label runtime, filtered; - // Is marking active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ lwz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread); - } else { - guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ lbz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread); - } - __ cmpdi(CCR0, tmp1, 0); + generate_marking_inactive_test(masm); __ beq(CCR0, filtered); // Do we need to load the previous value? @@ -175,28 +192,12 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator // Can we store original value in the thread's buffer? // Is index == 0? // (The index field is typed as size_t.) - const Register Rbuffer = tmp1, Rindex = tmp2; - - __ ld(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread); - __ cmpdi(CCR0, Rindex, 0); - __ beq(CCR0, runtime); // If index == 0, goto runtime. - __ ld(Rbuffer, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread); - - __ addi(Rindex, Rindex, -wordSize); // Decrement index. - __ std(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread); - - // Record the previous value. - __ stdx(pre_val, Rbuffer, Rindex); + generate_queue_insertion(masm, G1ThreadLocalData::satb_mark_queue_index_offset(), G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, pre_val, tmp1); __ b(filtered); __ bind(runtime); - // Determine necessary runtime invocation preservation measures - const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR; - const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS; - const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS; - int nbytes_save = 0; - // May need to preserve LR. Also needed if current frame is not compatible with C calling convention. if (needs_frame) { if (preserve_gp_registers) { @@ -210,11 +211,11 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator __ push_frame_reg_args(nbytes_save, tmp2); } - if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) { + if (nv_save != noreg) { __ mr(nv_save, pre_val); // Save pre_val across C call if it was preloaded. } __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, R16_thread); - if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) { + if (nv_save != noreg) { __ mr(pre_val, nv_save); // restore } @@ -230,6 +231,26 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator __ bind(filtered); } +static void generate_region_crossing_test(MacroAssembler* masm, const Register store_addr, const Register new_val) { + __ xorr(R0, store_addr, new_val); // tmp1 := store address ^ new value + __ srdi_(R0, R0, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes) +} + +static Address generate_card_young_test(MacroAssembler* masm, const Register store_addr, const Register tmp1, const Register tmp2) { + CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); + __ load_const_optimized(tmp1, (address)(ct->card_table()->byte_map_base()), tmp2); + __ srdi(tmp2, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base + __ lbzx(R0, tmp1, tmp2); // tmp1 := card address + __ cmpwi(CCR0, R0, (int)G1CardTable::g1_young_card_val()); + return Address(tmp1, tmp2); // return card address +} + +static void generate_card_dirty_test(MacroAssembler* masm, Address card_addr) { + __ membar(Assembler::StoreLoad); // Must reload after StoreLoad membar due to concurrent refinement + __ lbzx(R0, card_addr.base(), card_addr.index()); // tmp2 := card + __ cmpwi(CCR0, R0, (int)G1CardTable::dirty_card_val()); // tmp2 := card == dirty_card_val? +} + void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators, Register store_addr, Register new_val, Register tmp1, Register tmp2, Register tmp3, @@ -241,9 +262,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); - // Does store cross heap regions? - __ xorr(tmp1, store_addr, new_val); - __ srdi_(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); + generate_region_crossing_test(masm, store_addr, new_val); __ beq(CCR0, filtered); // Crosses regions, storing null? @@ -257,43 +276,22 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato __ beq(CCR0, filtered); } - // Storing region crossing non-null, is card already dirty? - const Register Rcard_addr = tmp1; - Register Rbase = tmp2; - __ load_const_optimized(Rbase, (address)(ct->card_table()->byte_map_base()), /*temp*/ tmp3); - - __ srdi(Rcard_addr, store_addr, CardTable::card_shift()); - - // Get the address of the card. - __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr); - __ cmpwi(CCR0, tmp3, (int)G1CardTable::g1_young_card_val()); + Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2); __ beq(CCR0, filtered); - __ membar(Assembler::StoreLoad); - __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr); // Reload after membar. - __ cmpwi(CCR0, tmp3 /* card value */, (int)G1CardTable::dirty_card_val()); + generate_card_dirty_test(masm, card_addr); __ beq(CCR0, filtered); - // Storing a region crossing, non-null oop, card is clean. - // Dirty card and log. - __ li(tmp3, (int)G1CardTable::dirty_card_val()); - //release(); // G1: oops are allowed to get visible after dirty marking. - __ stbx(tmp3, Rbase, Rcard_addr); - - __ add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued. - Rbase = noreg; // end of lifetime + __ li(R0, (int)G1CardTable::dirty_card_val()); + __ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val - const Register Rqueue_index = tmp2, - Rqueue_buf = tmp3; - __ ld(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread); - __ cmpdi(CCR0, Rqueue_index, 0); - __ beq(CCR0, runtime); // index == 0 then jump to runtime - __ ld(Rqueue_buf, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()), R16_thread); + Register Rcard_addr = tmp3; + __ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued. - __ addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index - __ std(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread); - - __ stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card + generate_queue_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, Rcard_addr, tmp1); __ b(filtered); __ bind(runtime); @@ -392,6 +390,142 @@ void G1BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value __ bind(done); } +#ifdef COMPILER2 + +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { + SaveLiveRegisters save_registers(masm, stub); + __ call_VM_leaf(runtime_path, arg, R16_thread); +} + +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* stub) { + assert_different_registers(obj, tmp1, tmp2, R0); + assert_different_registers(pre_val, tmp1, R0); + assert(!UseCompressedOops || tmp2 != noreg, "tmp2 needed with CompressedOops"); + + stub->initialize_registers(obj, pre_val, R16_thread, tmp1, tmp2); + + generate_marking_inactive_test(masm); + __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *stub->entry()); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register tmp1 = stub->tmp1(); + + __ bind(*stub->entry()); + + if (obj != noreg) { + // Note: C2 currently doesn't use implicit null checks with barriers. + // Otherwise, obj could be null and the following instruction would raise a SIGSEGV. + if (UseCompressedOops) { + __ lwz(pre_val, 0, obj); + } else { + __ ld(pre_val, 0, obj); + } + } + __ cmpdi(CCR0, pre_val, 0); + __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation()); + + Register pre_val_decoded = pre_val; + if (UseCompressedOops) { + pre_val_decoded = __ decode_heap_oop_not_null(stub->tmp2(), pre_val); + } + + generate_queue_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, pre_val_decoded, tmp1); + __ b(*stub->continuation()); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, pre_val_decoded, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); + __ b(*stub->continuation()); +} + +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* stub, + bool decode_new_val) { + assert_different_registers(store_addr, new_val, tmp1, R0); + assert_different_registers(store_addr, tmp1, tmp2, R0); + + stub->initialize_registers(R16_thread, tmp1, tmp2); + + bool null_check_required = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0; + Register new_val_decoded = new_val; + + if (decode_new_val) { + assert(UseCompressedOops, "or should not be here"); + if (null_check_required && CompressedOops::base() != nullptr) { + // We prefer doing the null check after the region crossing check. + // Only compressed oop modes with base != null require a null check here. + __ cmpwi(CCR0, new_val, 0); + __ beq(CCR0, *stub->continuation()); + null_check_required = false; + } + new_val_decoded = __ decode_heap_oop_not_null(tmp2, new_val); + } + + generate_region_crossing_test(masm, store_addr, new_val_decoded); + __ beq(CCR0, *stub->continuation()); + + // crosses regions, storing null? + if (null_check_required) { + __ cmpdi(CCR0, new_val_decoded, 0); + __ beq(CCR0, *stub->continuation()); + } + + Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2); + assert(card_addr.base() == tmp1 && card_addr.index() == tmp2, "needed by post barrier stub"); + __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *stub->entry()); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Address card_addr(stub->tmp1(), stub->tmp2()); // See above. + + __ bind(*stub->entry()); + + generate_card_dirty_test(masm, card_addr); + __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation()); + + __ li(R0, (int)G1CardTable::dirty_card_val()); + __ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val + + Register Rcard_addr = stub->tmp1(); + __ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued. + + generate_queue_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, Rcard_addr, stub->tmp2()); + __ b(*stub->continuation()); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, Rcard_addr, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)); + __ b(*stub->continuation()); +} + +#endif // COMPILER2 + #ifdef COMPILER1 #undef __ @@ -470,13 +604,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* __ std(tmp2, -24, R1_SP); // Is marking still active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ lwz(tmp, satb_q_active_byte_offset, R16_thread); - } else { - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ lbz(tmp, satb_q_active_byte_offset, R16_thread); - } - __ cmpdi(CCR0, tmp, 0); + generate_marking_inactive_test(sasm); __ beq(CCR0, marking_not_active); __ bind(restart); diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp index d9a252ff6eaee..1c9fe8a5d106f 100644 --- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp @@ -30,10 +30,16 @@ #include "gc/shared/modRefBarrierSetAssembler.hpp" #include "utilities/macros.hpp" +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif + class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -59,6 +65,25 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { MacroAssembler::PreservationLevel preservation_level); public: +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* c2_stub, + bool decode_new_val); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif #ifdef COMPILER1 void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); diff --git a/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad b/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad new file mode 100644 index 0000000000000..f4163242cad7b --- /dev/null +++ b/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad @@ -0,0 +1,684 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2024 SAP SE. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_ppc.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void pre_write_barrier(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2 = noreg, // only needed with CompressedOops when pre_val needs to be preserved + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, tmp1, (tmp2 != noreg) ? tmp2 : pre_val, stub); +} + +static void post_write_barrier(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + bool decode_new_val = false) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, stub, decode_new_val); +} + +%} + +instruct g1StoreP(indirect mem, iRegPsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, KILL cr0); + ins_cost(2 * MEMORY_REF_COST); + format %{ "std $mem, $src\t# ptr" %} + ins_encode %{ + pre_write_barrier(masm, this, + $mem$$Register, + $tmp1$$Register, + $tmp2$$Register, + noreg, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ std($src$$Register, 0, $mem$$Register); + post_write_barrier(masm, this, + $mem$$Register, + $src$$Register /* new_val */, + $tmp1$$Register, + $tmp2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1StoreN(indirect mem, iRegNsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, KILL cr0); + ins_cost(2 * MEMORY_REF_COST); + format %{ "stw $mem, $src\t# ptr" %} + ins_encode %{ + pre_write_barrier(masm, this, + $mem$$Register, + $tmp1$$Register, + $tmp2$$Register, + noreg, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ stw($src$$Register, 0, $mem$$Register); + post_write_barrier(masm, this, + $mem$$Register, + $src$$Register /* new_val */, + $tmp1$$Register, + $tmp2$$Register, + true /* decode_new_val */); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1EncodePAndStoreN(indirect mem, iRegPsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, KILL cr0); + ins_cost(2 * MEMORY_REF_COST); + format %{ "encode_heap_oop $src\n\t" + "stw $mem, $src\t# ptr" %} + ins_encode %{ + pre_write_barrier(masm, this, + $mem$$Register, + $tmp1$$Register, + $tmp2$$Register, + noreg, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + Register encoded_oop = noreg; + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + encoded_oop = __ encode_heap_oop($tmp2$$Register, $src$$Register); + } else { + encoded_oop = __ encode_heap_oop_not_null($tmp2$$Register, $src$$Register); + } + __ stw(encoded_oop, 0, $mem$$Register); + post_write_barrier(masm, this, + $mem$$Register, + $src$$Register /* new_val */, + $tmp1$$Register, + $tmp2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndExchangeP(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndExchangeNode*)n)->order() != MemNode::acquire && ((CompareAndExchangeNode*)n)->order() != MemNode::seqcst)); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "cmpxchgd $newval, $mem" %} + ins_encode %{ + Label no_update; + __ cmpxchgd(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndExchangeP_acq(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndExchangeNode*)n)->order() == MemNode::acquire || ((CompareAndExchangeNode*)n)->order() == MemNode::seqcst)); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "cmpxchgd acq $newval, $mem" %} + ins_encode %{ + Label no_update; + __ cmpxchgd(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register); + __ bind(no_update); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndExchangeN(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndExchangeNode*)n)->order() != MemNode::acquire && ((CompareAndExchangeNode*)n)->order() != MemNode::seqcst)); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "cmpxchgw $newval, $mem" %} + ins_encode %{ + Label no_update; + __ cmpxchgw(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register, + true /* decode_new_val */); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndExchangeN_acq(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndExchangeNode*)n)->order() == MemNode::acquire || ((CompareAndExchangeNode*)n)->order() == MemNode::seqcst)); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "cmpxchgw acq $newval, $mem" %} + ins_encode %{ + Label no_update; + __ cmpxchgw(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register, + true /* decode_new_val */); + __ bind(no_update); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndSwapP(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst)); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "CMPXCHGD $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */); + __ li($res$$Register, 1); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndSwapP_acq(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */); + __ li($res$$Register, 1); + __ bind(no_update); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndSwapN(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst)); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "CMPXCHGW $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */, + true /* decode_new_val */); + __ li($res$$Register, 1); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndSwapN_acq(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "CMPXCHGW acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */, + true /* decode_new_val */); + __ li($res$$Register, 1); + __ bind(no_update); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct weakG1CompareAndSwapP(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst)); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "weak CMPXCHGD $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */); + __ li($res$$Register, 1); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakG1CompareAndSwapP_acq(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "weak CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */); + __ li($res$$Register, 1); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + __ bind(no_update); // weak version requires no memory barrier on failure + %} + ins_pipe(pipe_class_default); +%} + +instruct weakG1CompareAndSwapN(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst)); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "weak CMPXCHGW $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */, + true /* decode_new_val */); + __ li($res$$Register, 1); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakG1CompareAndSwapN_acq(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "weak CMPXCHGW acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */, + true /* decode_new_val */); + __ li($res$$Register, 1); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + __ bind(no_update); // weak version requires no memory barrier on failure + %} + ins_pipe(pipe_class_default); +%} + +instruct g1GetAndSetP(iRegPdst res, indirect mem, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (GetAndSetP mem newval)); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "GetAndSetP $newval, $mem" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + __ getandsetd($res$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::cmpxchgx_hint_atomic_update()); + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg /* obj */, + $res$$Register /* res */, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct g1GetAndSetN(iRegNdst res, indirect mem, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (GetAndSetN mem newval)); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "GetAndSetN $newval, $mem" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + __ getandsetw($res$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::cmpxchgx_hint_atomic_update()); + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg /* obj */, + $res$$Register /* res */, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register, + true /* decode_new_val */); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct g1LoadP(iRegPdst dst, memoryAlg4 mem, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_Load()->is_unordered() && n->as_Load()->barrier_data() != 0); + // This instruction does not need an acquiring counterpart because it is only + // used for reference loading (Reference::get()). + match(Set dst (LoadP mem)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr0); + ins_cost(2 * MEMORY_REF_COST); + format %{ "ld $dst, $mem\t# ptr" %} + ins_encode %{ + __ ld($dst$$Register, $mem$$disp, $mem$$base$$Register); + pre_write_barrier(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1LoadN(iRegNdst dst, memoryAlg4 mem, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_Load()->is_unordered() && n->as_Load()->barrier_data() != 0); + // This instruction does not need an acquiring counterpart because it is only + // used for reference loading (Reference::get()). + match(Set dst (LoadN mem)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, KILL cr0); + ins_cost(2 * MEMORY_REF_COST); + format %{ "lwz $dst, $mem\t# ptr" %} + ins_encode %{ + __ lwz($dst$$Register, $mem$$disp, $mem$$base$$Register); + pre_write_barrier(masm, this, + noreg /* obj */, + $dst$$Register, + $tmp1$$Register, + $tmp2$$Register); + %} + ins_pipe(pipe_class_default); +%} diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp index 3cb5c5a628f39..5315080721249 100644 --- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp @@ -144,9 +144,9 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler *masm, Dec // Invoke runtime. address jrt_address = nullptr; if (UseCompressedOops) { - jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry); + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop); } else { - jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry); + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop); } assert(jrt_address != nullptr, "jrt routine cannot be found"); @@ -302,7 +302,7 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_impl(MacroAssembler *masm } // Invoke runtime. - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, R16_thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, R16_thread); // Restore to-be-preserved registers. if (!preserve_gp_registers && preloaded_mode && pre_val->is_volatile()) { @@ -906,7 +906,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss __ push_frame_reg_args(nbytes_save, R11_tmp1); // Invoke runtime. - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), R0_pre_val, R16_thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), R0_pre_val, R16_thread); // Restore to-be-preserved registers. __ pop_frame(); diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index a8635af9582d1..a194c030a6124 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -2410,7 +2410,7 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); - Label L_fallthrough; + Label L_check_thread, L_fallthrough; if (L_fast_path == nullptr) { L_fast_path = &L_fallthrough; } else if (L_slow_path == nullptr) { @@ -2419,10 +2419,14 @@ void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fa // Fast path check: class is fully initialized lbz(R0, in_bytes(InstanceKlass::init_state_offset()), klass); + // acquire by cmp-branch-isync if fully_initialized cmpwi(CCR0, R0, InstanceKlass::fully_initialized); - beq(CCR0, *L_fast_path); + bne(CCR0, L_check_thread); + isync(); + b(*L_fast_path); // Fast path check: current thread is initializer thread + bind(L_check_thread); ld(R0, in_bytes(InstanceKlass::init_thread_offset()), klass); cmpd(CCR0, thread, R0); if (L_slow_path == &L_fallthrough) { @@ -2715,13 +2719,34 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe b(success); bind(notRecursive); + + // Set owner to null. + // Release to satisfy the JMM + release(); + li(temp, 0); + std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + + // Check if the entry lists are empty. ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header); ld(displaced_header, in_bytes(ObjectMonitor::cxq_offset()), current_header); orr(temp, temp, displaced_header); // Will be 0 if both are 0. cmpdi(flag, temp, 0); - bne(flag, failure); - release(); - std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header); + beq(flag, success); // If so we are done. + + // Check if there is a successor. + ld(temp, in_bytes(ObjectMonitor::succ_offset()), current_header); + cmpdi(flag, temp, 0); + bne(flag, success); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try + // to reacquire the lock in SharedRuntime::monitor_exit_helper(). + std(current_header, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread); + + crxor(flag, Assembler::equal, flag, Assembler::equal); // Set flag = NE => slow path + b(failure); // flag == EQ indicates success, decrement held monitor count // flag == NE indicates failure @@ -3028,27 +3053,39 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f bind(not_recursive); - Label release_; + Label set_eq_unlocked; const Register t2 = tmp2; + // Set owner to null. + // Release to satisfy the JMM + release(); + li(t, 0); + std(t, in_bytes(ObjectMonitor::owner_offset()), monitor); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + // Check if the entry lists are empty. ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor); ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor); orr(t, t, t2); cmpdi(CCR0, t, 0); - beq(CCR0, release_); + beq(CCR0, unlocked); // If so we are done. - // The owner may be anonymous and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - std(R16_thread, in_bytes(ObjectMonitor::owner_offset()), monitor); + // Check if there is a successor. + ld(t, in_bytes(ObjectMonitor::succ_offset()), monitor); + cmpdi(CCR0, t, 0); + bne(CCR0, set_eq_unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try + // to reacquire the lock in SharedRuntime::monitor_exit_helper(). + std(monitor, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread); + + crxor(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set flag = NE => slow path b(slow_path); - bind(release_); - // Set owner to null. - release(); - // t contains 0 - std(t, in_bytes(ObjectMonitor::owner_offset()), monitor); + bind(set_eq_unlocked); + crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set flag = EQ => fast path } bind(unlocked); diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 612b7bf898c08..d15f9929671ba 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -1000,6 +1000,10 @@ int MachNode::compute_padding(int current_offset) const { // Should the matcher clone input 'm' of node 'n'? bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { + if (is_encode_and_store_pattern(n, m)) { + mstack.push(m, Visit); + return true; + } return false; } @@ -5407,7 +5411,7 @@ instruct loadRange(iRegIdst dst, memory mem) %{ // Load Compressed Pointer instruct loadN(iRegNdst dst, memory mem) %{ match(Set dst (LoadN mem)); - predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + predicate((n->as_Load()->is_unordered() || followed_by_acquire(n)) && n->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); format %{ "LWZ $dst, $mem \t// load compressed ptr" %} @@ -5419,6 +5423,7 @@ instruct loadN(iRegNdst dst, memory mem) %{ // Load Compressed Pointer acquire. instruct loadN_ac(iRegNdst dst, memory mem) %{ match(Set dst (LoadN mem)); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(3*MEMORY_REF_COST); format %{ "LWZ $dst, $mem \t// load acquire compressed ptr\n\t" @@ -5432,7 +5437,7 @@ instruct loadN_ac(iRegNdst dst, memory mem) %{ // Load Compressed Pointer and decode it if narrow_oop_shift == 0. instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{ match(Set dst (DecodeN (LoadN mem))); - predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0); + predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0 && _kids[0]->_leaf->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); format %{ "LWZ $dst, $mem \t// DecodeN (unscaled)" %} @@ -6423,6 +6428,7 @@ instruct reinterpretX(vecX dst) %{ // Store Compressed Oop instruct storeN(memory dst, iRegN_P2N src) %{ match(Set dst (StoreN dst src)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); format %{ "STW $src, $dst \t// compressed oop" %} @@ -6598,7 +6604,7 @@ instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{ instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{ match(Set dst (EncodeP src)); predicate(CompressedOops::shift() != 0 && - CompressedOops::base() ==0); + CompressedOops::base() == nullptr); format %{ "SRDI $dst, $src, #3 \t// encodeP, $src != nullptr" %} size(4); @@ -6695,7 +6701,7 @@ instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{ predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull && n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) && CompressedOops::shift() != 0 && - CompressedOops::base() != 0); + CompressedOops::base() != nullptr); ins_cost(4 * DEFAULT_COST); // Should be more expensive than decodeN_Disjoint_isel_Ex. effect(TEMP crx); @@ -6707,7 +6713,7 @@ instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{ instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{ match(Set dst (DecodeN src)); predicate(CompressedOops::shift() != 0 && - CompressedOops::base() == 0); + CompressedOops::base() == nullptr); format %{ "SLDI $dst, $src, #3 \t// DecodeN (zerobased)" %} size(4); @@ -6825,7 +6831,7 @@ instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{ predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull || n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) && CompressedOops::shift() != 0 && - CompressedOops::base() != 0); + CompressedOops::base() != nullptr); ins_cost(2 * DEFAULT_COST); format %{ "DecodeN $dst, $src \t// $src != nullptr, postalloc expanded" %} @@ -7477,6 +7483,7 @@ instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2))); + predicate(n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ @@ -7676,7 +7683,7 @@ instruct weakCompareAndSwapI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ @@ -7690,7 +7697,7 @@ instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iReg instruct weakCompareAndSwapN_acq_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ @@ -7939,7 +7946,7 @@ instruct compareAndExchangeI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %} ins_encode %{ @@ -7953,7 +7960,7 @@ instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iReg instruct compareAndExchangeN_acq_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as narrow oop" %} ins_encode %{ @@ -8262,6 +8269,7 @@ instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetN mem_ptr src)); + predicate(n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndSetN $res, $mem_ptr, $src" %} ins_encode %{ diff --git a/src/hotspot/cpu/ppc/register_ppc.hpp b/src/hotspot/cpu/ppc/register_ppc.hpp index 302d49884fae3..b7ba4f053b5d6 100644 --- a/src/hotspot/cpu/ppc/register_ppc.hpp +++ b/src/hotspot/cpu/ppc/register_ppc.hpp @@ -27,6 +27,7 @@ #define CPU_PPC_REGISTER_PPC_HPP #include "asm/register.hpp" +#include "utilities/count_trailing_zeros.hpp" // forward declaration class VMRegImpl; @@ -555,4 +556,12 @@ constexpr Register R29_TOC = R29; constexpr Register R11_scratch1 = R11; constexpr Register R12_scratch2 = R12; +template <> +inline Register AbstractRegSet::first() { + if (_bitset == 0) { return noreg; } + return as_Register(count_trailing_zeros(_bitset)); +} + +typedef AbstractRegSet RegSet; + #endif // CPU_PPC_REGISTER_PPC_HPP diff --git a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp index ee3f1911e2082..206c161287fa2 100644 --- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp @@ -4587,6 +4587,30 @@ address generate_lookup_secondary_supers_table_stub(u1 super_klass_index) { return start; } + // load Method* target of MethodHandle + // R3_ARG1 = jobject receiver + // R19_method = result Method* + address generate_upcall_stub_load_target() { + + StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target"); + address start = __ pc(); + + __ resolve_global_jobject(R3_ARG1, R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS); + // Load target method from receiver + __ load_heap_oop(R19_method, java_lang_invoke_MethodHandle::form_offset(), R3_ARG1, + R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS, IS_NOT_NULL); + __ load_heap_oop(R19_method, java_lang_invoke_LambdaForm::vmentry_offset(), R19_method, + R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS, IS_NOT_NULL); + __ load_heap_oop(R19_method, java_lang_invoke_MemberName::method_offset(), R19_method, + R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS, IS_NOT_NULL); + __ ld(R19_method, java_lang_invoke_ResolvedMethodName::vmtarget_offset(), R19_method); + __ std(R19_method, in_bytes(JavaThread::callee_target_offset()), R16_thread); // just in case callee is deoptimized + + __ blr(); + + return start; + } + // Initialization void generate_initial_stubs() { // Generates all stubs and initializes the entry points @@ -4651,6 +4675,7 @@ address generate_lookup_secondary_supers_table_stub(u1 super_klass_index) { } StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler(); + StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target(); } void generate_compiler_stubs() { diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp index 03dca2aeb9b7b..cf3dd4cbd34c0 100644 --- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp @@ -1078,6 +1078,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M case Interpreter::java_lang_math_sin : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); break; case Interpreter::java_lang_math_cos : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); break; case Interpreter::java_lang_math_tan : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); break; + case Interpreter::java_lang_math_tanh : /* run interpreted */ break; case Interpreter::java_lang_math_abs : /* run interpreted */ break; case Interpreter::java_lang_math_sqrt : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); break; case Interpreter::java_lang_math_log : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); break; diff --git a/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp b/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp index b60fd4f16d163..635bab900d157 100644 --- a/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp +++ b/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.inline.hpp" +#include "classfile/javaClasses.hpp" #include "logging/logStream.hpp" #include "memory/resourceArea.hpp" #include "prims/upcallLinker.hpp" @@ -118,7 +119,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr static const int upcall_stub_code_base_size = 1024; static const int upcall_stub_size_per_arg = 16; // arg save & restore + move -address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, +address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, BasicType* out_sig_bt, int total_out_args, BasicType ret_type, jobject jabi, jobject jconv, @@ -221,7 +222,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, __ block_comment("{ on_entry"); __ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, UpcallLinker::on_entry), R0); __ addi(R3_ARG1, R1_SP, frame_data_offset); - __ load_const_optimized(R4_ARG2, (intptr_t)receiver, R0); __ call_c(call_target_address); __ mr(R16_thread, R3_RET); __ block_comment("} on_entry"); @@ -236,12 +236,12 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, arg_shuffle.generate(_masm, as_VMStorage(callerSP), frame::native_abi_minframe_size, frame::jit_out_preserve_size); __ block_comment("} argument shuffle"); - __ block_comment("{ receiver "); - __ get_vm_result(R3_ARG1); - __ block_comment("} receiver "); - - __ load_const_optimized(R19_method, (intptr_t)entry); - __ std(R19_method, in_bytes(JavaThread::callee_target_offset()), R16_thread); + __ block_comment("{ load target "); + __ load_const_optimized(call_target_address, StubRoutines::upcall_stub_load_target(), R0); + __ load_const_optimized(R3_ARG1, (intptr_t)receiver, R0); + __ mtctr(call_target_address); + __ bctrl(); // loads target Method* into R19_method + __ block_comment("} load target "); __ push_cont_fastpath(); @@ -326,7 +326,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, #ifndef PRODUCT stringStream ss; - ss.print("upcall_stub_%s", entry->signature()->as_C_string()); + ss.print("upcall_stub_%s", signature->as_C_string()); const char* name = _masm->code_string(ss.as_string()); #else // PRODUCT const char* name = "upcall_stub"; diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index 98ab86bf72eb6..ad3d18fa39268 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -46,8 +46,10 @@ class Argument { public: enum { - n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) - n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) + // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc + n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) + n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) + n_vector_register_parameters_c = 16, // v8, v9, ... v23 n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...) n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...) @@ -143,6 +145,10 @@ constexpr Register x19_sender_sp = x19; // Sender's SP while in interpreter constexpr Register t0 = x5; constexpr Register t1 = x6; constexpr Register t2 = x7; +constexpr Register t3 = x28; +constexpr Register t4 = x29; +constexpr Register t5 = x30; +constexpr Register t6 = x31; const Register g_INTArgReg[Argument::n_int_register_parameters_c] = { c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 @@ -705,6 +711,16 @@ class Assembler : public AbstractAssembler { emit(insn); } + void fencei() { + unsigned insn = 0; + patch((address)&insn, 6, 0, 0b0001111); // opcode + patch((address)&insn, 11, 7, 0b00000); // rd + patch((address)&insn, 14, 12, 0b001); // func + patch((address)&insn, 19, 15, 0b00000); // rs1 + patch((address)&insn, 31, 20, 0b000000000000); // fm + emit(insn); + } + #define INSN(NAME, op, funct3, funct7) \ void NAME() { \ unsigned insn = 0; \ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp index 940706b0a7376..828f70e4decee 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp @@ -980,6 +980,7 @@ void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { if (op->init_check()) { __ lbu(t0, Address(op->klass()->as_register(), InstanceKlass::init_state_offset())); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); __ mv(t1, (u1)InstanceKlass::fully_initialized); add_debug_info_for_null_check_here(op->stub()->info()); __ bne(t0, t1, *op->stub()->entry(), /* is_far */ true); diff --git a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp index 7995750aba96b..db18525b89c76 100644 --- a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp @@ -71,32 +71,4 @@ void C2EntryBarrierStub::emit(C2_MacroAssembler& masm) { __ emit_int32(0); // nmethod guard value } -int C2HandleAnonOMOwnerStub::max_size() const { - // Max size of stub has been determined by testing with 0 without using RISC-V compressed - // instruction-set extension, in which case C2CodeStubList::emit() will throw an assertion - // and report the actual size that is needed. - return 20 DEBUG_ONLY(+8); -} - -void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { - __ bind(entry()); - Register mon = monitor(); - Register t = tmp(); - assert(t != noreg, "need tmp register"); - - // Fix owner to be the current thread. - __ sd(xthread, Address(mon, ObjectMonitor::owner_offset())); - - // Pop owner object from lock-stack. - __ lwu(t, Address(xthread, JavaThread::lock_stack_top_offset())); - __ subw(t, t, oopSize); -#ifdef ASSERT - __ add(t0, xthread, t); - __ sd(zr, Address(t0, 0)); -#endif - __ sw(t, Address(xthread, JavaThread::lock_stack_top_offset())); - - __ j(continuation()); -} - #undef __ diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index e2c9b9dd609e0..75f87e35adf41 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -165,6 +165,7 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Register oop = objectReg; Register box = boxReg; Register disp_hdr = tmp1Reg; + Register owner_addr = tmp1Reg; Register tmp = tmp2Reg; Label object_has_monitor; // Finish fast lock successfully. MUST branch to with flag == 0 @@ -222,15 +223,33 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, j(unlocked); bind(notRecursive); - ld(t0, Address(tmp, ObjectMonitor::EntryList_offset())); - ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset())); - orr(t0, t0, disp_hdr); // Will be 0 if both are 0. - bnez(t0, slow_path); + // Compute owner address. + la(owner_addr, Address(tmp, ObjectMonitor::owner_offset())); - // need a release store here - la(tmp, Address(tmp, ObjectMonitor::owner_offset())); + // Set owner to null. + // Release to satisfy the JMM membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); - sd(zr, Address(tmp)); // set unowned + sd(zr, Address(owner_addr)); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + + // Check if the entry lists are empty. + ld(t0, Address(tmp, ObjectMonitor::EntryList_offset())); + ld(tmp1Reg, Address(tmp, ObjectMonitor::cxq_offset())); + orr(t0, t0, tmp1Reg); + beqz(t0, unlocked); // If so we are done. + + // Check if there is a successor. + ld(t0, Address(tmp, ObjectMonitor::succ_offset())); + bnez(t0, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + sd(tmp, Address(xthread, JavaThread::unlocked_inflated_monitor_offset())); + + mv(flag, 1); + j(slow_path); bind(unlocked); mv(flag, zr); @@ -534,28 +553,35 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, bind(not_recursive); - Label release; const Register tmp2_owner_addr = tmp2; // Compute owner address. la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset())); + // Set owner to null. + // Release to satisfy the JMM + membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + sd(zr, Address(tmp2_owner_addr)); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + // Check if the entry lists are empty. ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset())); ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset())); orr(t0, t0, tmp3_t); - beqz(t0, release); + beqz(t0, unlocked); // If so we are done. - // The owner may be anonymous and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - sd(xthread, Address(tmp2_owner_addr)); - j(slow_path); + // Check if there is a successor. + ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::succ_offset())); + bnez(tmp3_t, unlocked); // If so we are done. - bind(release); - // Set owner to null. - membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); - sd(zr, Address(tmp2_owner_addr)); + // Save the monitor pointer in the current thread, so we can try + // to reacquire the lock in SharedRuntime::monitor_exit_helper(). + sd(tmp1_monitor, Address(xthread, JavaThread::unlocked_inflated_monitor_offset())); + + mv(flag, 1); + j(slow_path); } bind(unlocked); diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp index 062f80290626f..7036c44d99dc9 100644 --- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,7 +39,10 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> @@ -96,6 +99,55 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas __ pop_reg(saved_regs, sp); } +static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register thread, const Register value, const Register tmp1, const Register tmp2) { + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + __ ld(tmp1, Address(thread, in_bytes(index_offset))); // tmp1 := *(index address) + __ beqz(tmp1, runtime); // jump to runtime if index == 0 (full buffer) + // The buffer is not full, store value into it. + __ sub(tmp1, tmp1, wordSize); // tmp1 := next index + __ sd(tmp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index + __ ld(tmp2, Address(thread, in_bytes(buffer_offset))); // tmp2 := buffer address + __ add(tmp2, tmp2, tmp1); + __ sd(value, Address(tmp2)); // *(buffer address + next index) := value +} + +static void generate_pre_barrier_fast_path(MacroAssembler* masm, + const Register thread, + const Register tmp1) { + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ lwu(tmp1, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(tmp1, in_progress); + } +} + +static void generate_pre_barrier_slow_path(MacroAssembler* masm, + const Register obj, + const Register pre_val, + const Register thread, + const Register tmp1, + const Register tmp2, + Label& done, + Label& runtime) { + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } + // Is the previous value null? + __ beqz(pre_val, done, true); + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, + thread, pre_val, tmp1, tmp2); + __ j(done); +} + void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Register obj, Register pre_val, @@ -116,43 +168,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, assert_different_registers(obj, pre_val, tmp1, tmp2); assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); - Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); - Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); - - // Is marking active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width - __ lwu(tmp1, in_progress); - } else { - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ lbu(tmp1, in_progress); - } + generate_pre_barrier_fast_path(masm, thread, tmp1); + // If marking is not active (*(mark queue active address) == 0), jump to done __ beqz(tmp1, done); - - // Do we need to load the previous value? - if (obj != noreg) { - __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); - } - - // Is the previous value null? - __ beqz(pre_val, done); - - // Can we store original value in the thread's buffer? - // Is index == 0? - // (The index field is typed as size_t.) - - __ ld(tmp1, index); // tmp := *index_adr - __ beqz(tmp1, runtime); // tmp == 0? - // If yes, goto runtime - - __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize - __ sd(tmp1, index); // *index_adr := tmp - __ ld(tmp2, buffer); - __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr - - // Record the previous value - __ sd(pre_val, Address(tmp1, 0)); - __ j(done); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime); __ bind(runtime); @@ -171,6 +190,49 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, } +static void generate_post_barrier_fast_path(MacroAssembler* masm, + const Register store_addr, + const Register new_val, + const Register tmp1, + const Register tmp2, + Label& done, + bool new_val_may_be_null) { + // Does store cross heap regions? + __ xorr(tmp1, store_addr, new_val); // tmp1 := store address ^ new value + __ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes) + __ beqz(tmp1, done); + // Crosses regions, storing null? + if (new_val_may_be_null) { + __ beqz(new_val, done); + } + // Storing region crossing non-null, is card young? + __ srli(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base + __ load_byte_map_base(tmp2); // tmp2 := card table base address + __ add(tmp1, tmp1, tmp2); // tmp1 := card address + __ lbu(tmp2, Address(tmp1)); // tmp2 := card +} + +static void generate_post_barrier_slow_path(MacroAssembler* masm, + const Register thread, + const Register tmp1, + const Register tmp2, + Label& done, + Label& runtime) { + __ membar(MacroAssembler::StoreLoad); // StoreLoad membar + __ lbu(tmp2, Address(tmp1)); // tmp2 := card + __ beqz(tmp2, done, true); + // Storing a region crossing, non-null oop, card is clean. + // Dirty card and log. + STATIC_ASSERT(CardTable::dirty_card_val() == 0); + __ sb(zr, Address(tmp1)); // *(card address) := dirty_card_val + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, + thread, tmp1, tmp2, t0); + __ j(done); +} + void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, @@ -179,73 +241,119 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register tmp2) { assert(thread == xthread, "must be"); assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0); - assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && - tmp2 != noreg, "expecting a register"); - - Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); - - BarrierSet* bs = BarrierSet::barrier_set(); - CardTableBarrierSet* ctbs = barrier_set_cast(bs); + assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, + "expecting a register"); Label done; Label runtime; - // Does store cross heap regions? + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */); + // If card is young, jump to done (tmp2 holds the card value) + __ mv(t0, (int)G1CardTable::g1_young_card_val()); + __ beq(tmp2, t0, done); // card == young_card_val? + generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime); - __ xorr(tmp1, store_addr, new_val); - __ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); - __ beqz(tmp1, done); + __ bind(runtime); + // save the live input values + RegSet saved = RegSet::of(store_addr); + __ push_reg(saved, sp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread); + __ pop_reg(saved, sp); - // crosses regions, storing null? + __ bind(done); +} - __ beqz(new_val, done); +#if defined(COMPILER2) - // storing region crossing non-null, is card already dirty? +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { + SaveLiveRegisters save_registers(masm, stub); + if (c_rarg0 != arg) { + __ mv(c_rarg0, arg); + } + __ mv(c_rarg1, xthread); + __ mv(t0, runtime_path); + __ jalr(t0); +} - const Register card_addr = tmp1; +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* stub) { + assert(thread == xthread, "must be"); + assert_different_registers(obj, pre_val, tmp1, tmp2); + assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); - __ srli(card_addr, store_addr, CardTable::card_shift()); + stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2); - // get the address of the card - __ load_byte_map_base(tmp2); - __ add(card_addr, card_addr, tmp2); - __ lbu(tmp2, Address(card_addr)); - __ mv(t0, (int)G1CardTable::g1_young_card_val()); - __ beq(tmp2, t0, done); + generate_pre_barrier_fast_path(masm, thread, tmp1); + // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) + __ bnez(tmp1, *stub->entry(), true); - assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + __ bind(*stub->continuation()); +} - __ membar(MacroAssembler::StoreLoad); +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); + Register tmp2 = stub->tmp2(); - __ lbu(tmp2, Address(card_addr)); - __ beqz(tmp2, done); + __ bind(*stub->entry()); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime); - // storing a region crossing, non-null oop, card is clean. - // dirty card and log. + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); + __ j(*stub->continuation()); +} - __ sb(zr, Address(card_addr)); +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* stub) { + assert(thread == xthread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0); + assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, + "expecting a register"); - __ ld(t0, queue_index); - __ beqz(t0, runtime); - __ sub(t0, t0, wordSize); - __ sd(t0, queue_index); + stub->initialize_registers(thread, tmp1, tmp2); - __ ld(tmp2, buffer); - __ add(t0, tmp2, t0); - __ sd(card_addr, Address(t0, 0)); - __ j(done); + bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0; + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null); + // If card is not young, jump to stub (slow path) (tmp2 holds the card value) + __ mv(t0, (int)G1CardTable::g1_young_card_val()); + __ bne(tmp2, t0, *stub->entry(), true); - __ bind(runtime); - // save the live input values - RegSet saved = RegSet::of(store_addr); - __ push_reg(saved, sp); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); - __ pop_reg(saved, sp); + __ bind(*stub->continuation()); +} - __ bind(done); +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); // tmp1 holds the card address. + Register tmp2 = stub->tmp2(); + + __ bind(*stub->entry()); + generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)); + __ j(*stub->continuation()); } +#endif // COMPILER2 + void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp2) { bool on_oop = is_reference_type(type); diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp index 96568994079dd..c7bee2ef6f3a8 100644 --- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,6 +36,8 @@ class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -72,6 +74,27 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); #endif +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* c2_stub); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif + void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp2); }; diff --git a/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad new file mode 100644 index 0000000000000..1dc5834dbdc89 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad @@ -0,0 +1,564 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_riscv.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void write_barrier_pre(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2, + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, xthread, tmp1, tmp2, stub); +} + +static void write_barrier_post(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, xthread, tmp1, tmp2, stub); +} + +%} + +instruct g1StoreP(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(STORE_COST); + format %{ "sd $src, $mem\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ sd($src$$Register, Address($mem$$Register)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +instruct g1StoreN(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(STORE_COST); + format %{ "sw $src, $mem\t# compressed ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ sw($src$$Register, Address($mem$$Register)); + if ((barrier_data() & G1C2BarrierPost) != 0) { + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ decode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + } + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(STORE_COST); + format %{ "encode_heap_oop $tmp1, $src\n\t" + "sw $tmp1, $mem\t# compressed ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ encode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + __ sw($tmp1$$Register, Address($mem$$Register)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +instruct g1CompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $res = $mem, $oldval, $newval\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP and its Acq variant. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP and its Acq variant. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $res = $mem, $oldval, $newval\t# narrow oop" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# narrow oop" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndSwapP(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $mem, $oldval, $newval\t# (ptr)\n\t" + "mv $res, $res == $oldval" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr)\n\t" + "mv $res, $res == $oldval" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndSwapN(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $mem, $oldval, $newval\t# (narrow oop)\n\t" + "mv $res, $res == $oldval" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop)\n\t" + "mv $res, $res == $oldval" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1GetAndSetP(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchg $preval, $newval, [$mem]" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchg($preval$$Register, $newval$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +instruct g1GetAndSetPAcq(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchg_acq $preval, $newval, [$mem]" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgal($preval$$Register, $newval$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +instruct g1GetAndSetN(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetN mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchgwu $preval, $newval, [$mem]" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgwu($preval$$Register, $newval$$Register, $mem$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +instruct g1GetAndSetNAcq(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetN mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchgwu_acq $preval, $newval, [$mem]" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgalwu($preval$$Register, $newval$$Register, $mem$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2); + ins_cost(LOAD_COST + BRANCH_COST); + format %{ "ld $dst, $mem\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + __ ld($dst$$Register, Address($mem$$Register)); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(iload_reg_mem); +%} + +instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadN mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(LOAD_COST + BRANCH_COST); + format %{ "lwu $dst, $mem\t# compressed ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + __ lwu($dst$$Register, Address($mem$$Register)); + if ((barrier_data() & G1C2BarrierPre) != 0) { + __ decode_heap_oop($tmp1$$Register, $dst$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + } + %} + ins_pipe(iload_reg_mem); +%} diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp index 9a79a92327723..cc73d14a756f2 100644 --- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp @@ -70,10 +70,10 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec __ push_reg(saved_regs, sp); if (UseCompressedOops) { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop), src, dst, count); } else { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count); } __ pop_reg(saved_regs, sp); __ bind(done); @@ -165,9 +165,9 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, // expand_call should be passed true. if (expand_call) { assert(pre_val != c_rarg1, "smashed arg"); - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread); } else { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread); } __ pop_reg(saved, sp); @@ -645,7 +645,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss __ bind(runtime); __ push_call_clobbered_registers(); __ load_parameter(0, pre_val); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread); __ pop_call_clobbered_registers(); __ bind(done); diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp index 8fbeaa45371d1..cbb918ade00fe 100644 --- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp @@ -636,8 +636,20 @@ void ZBarrierSetAssembler::patch_barrier_relocation(address addr, int format) { ShouldNotReachHere(); } - // A full fence is generated before icache_flush by default in invalidate_word - ICache::invalidate_range(addr, bytes); + // If we are using UseCtxFencei no ICache invalidation is needed here. + // Instead every hart will preform an fence.i either by a Java thread + // (due to patching epoch will take it to slow path), + // or by the kernel when a Java thread is moved to a hart. + // The instruction streams changes must only happen before the disarm of + // the nmethod barrier. Where the disarm have a leading full two way fence. + // If this is performed during a safepoint, all Java threads will emit a fence.i + // before transitioning to 'Java', e.g. leaving native or the safepoint wait barrier. + if (!UseCtxFencei) { + // ICache invalidation is a serialization point. + // The above patching of instructions happens before the invalidation. + // Hence it have a leading full two way fence (wr, wr). + ICache::invalidate_range(addr, bytes); + } } #ifdef COMPILER2 diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp index c2585f2d1618d..dd31de14704ab 100644 --- a/src/hotspot/cpu/riscv/globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/globals_riscv.hpp @@ -122,6 +122,8 @@ define_pd_global(intx, InlineSmallCode, 1000); product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \ "Use RVV instructions for left/right shift of BigInteger") \ product(bool, UseTrampolines, false, EXPERIMENTAL, \ - "Far calls uses jal to trampoline.") + "Far calls uses jal to trampoline.") \ + product(bool, UseCtxFencei, false, EXPERIMENTAL, \ + "Use PR_RISCV_CTX_SW_FENCEI_ON to avoid explicit icache flush") #endif // CPU_RISCV_GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index dc25172da259e..46701b6ede387 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -493,6 +493,7 @@ void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_ // Fast path check: class is fully initialized lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); + membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); sub(tmp, tmp, InstanceKlass::fully_initialized); beqz(tmp, *L_fast_path); @@ -1455,6 +1456,7 @@ void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp1, } +#ifdef COMPILER2 // This improvement (vectorization) is based on java.base/share/native/libzip/zlib/zcrc32.c. // To make it, following steps are taken: // 1. in zcrc32.c, modify N to 16 and related code, @@ -1550,6 +1552,7 @@ void MacroAssembler::vector_update_crc32(Register crc, Register buf, Register le addi(buf, buf, N*4); } } +#endif // COMPILER2 /** * @param crc register containing existing CRC (32-bit) @@ -1562,7 +1565,10 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table0, Register table1, Register table2, Register table3, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6) { assert_different_registers(crc, buf, len, table0, table1, table2, table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); - Label L_by16_loop, L_vector_entry, L_unroll_loop, L_unroll_loop_entry, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit; + Label L_vector_entry, + L_unroll_loop, + L_by4_loop_entry, L_by4_loop, + L_by1_loop, L_exit; const int64_t single_table_size = 256; const int64_t unroll = 16; @@ -1576,26 +1582,24 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, add(table2, table0, 2*single_table_size*sizeof(juint), tmp1); add(table3, table2, 1*single_table_size*sizeof(juint), tmp1); +#ifdef COMPILER2 if (UseRVV) { const int64_t tmp_limit = MaxVectorSize >= 32 ? unroll_words*3 : unroll_words*5; mv(tmp1, tmp_limit); bge(len, tmp1, L_vector_entry); } - subw(len, len, unroll_words); - bge(len, zr, L_unroll_loop_entry); +#endif // COMPILER2 + + mv(tmp1, unroll_words); + blt(len, tmp1, L_by4_loop_entry); - addiw(len, len, unroll_words-4); - bge(len, zr, L_by4_loop); - addiw(len, len, 4); - bgt(len, zr, L_by1_loop); - j(L_exit); + const Register loop_buf_end = tmp3; align(CodeEntryAlignment); - bind(L_unroll_loop_entry); - const Register buf_end = tmp3; - add(buf_end, buf, len); // buf_end will be used as endpoint for loop below + // Entry for L_unroll_loop + add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below andi(len, len, unroll_words-1); // len = (len % unroll_words) - sub(len, len, unroll_words); // Length after all iterations + sub(loop_buf_end, loop_buf_end, len); bind(L_unroll_loop); for (int i = 0; i < unroll; i++) { ld(tmp1, Address(buf, i*wordSize)); @@ -1604,57 +1608,52 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, } addi(buf, buf, unroll_words); - ble(buf, buf_end, L_unroll_loop); - addiw(len, len, unroll_words-4); - bge(len, zr, L_by4_loop); - addiw(len, len, 4); - bgt(len, zr, L_by1_loop); - j(L_exit); - + blt(buf, loop_buf_end, L_unroll_loop); + + bind(L_by4_loop_entry); + mv(tmp1, 4); + blt(len, tmp1, L_by1_loop); + add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below + andi(len, len, 3); + sub(loop_buf_end, loop_buf_end, len); bind(L_by4_loop); lwu(tmp1, Address(buf)); update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false); - subw(len, len, 4); addi(buf, buf, 4); - bge(len, zr, L_by4_loop); - addiw(len, len, 4); - ble(len, zr, L_exit); + blt(buf, loop_buf_end, L_by4_loop); bind(L_by1_loop); + beqz(len, L_exit); + subw(len, len, 1); lwu(tmp1, Address(buf)); andi(tmp2, tmp1, right_8_bits); update_byte_crc32(crc, tmp2, table0); - ble(len, zr, L_exit); + beqz(len, L_exit); subw(len, len, 1); srli(tmp2, tmp1, 8); andi(tmp2, tmp2, right_8_bits); update_byte_crc32(crc, tmp2, table0); - ble(len, zr, L_exit); + beqz(len, L_exit); subw(len, len, 1); srli(tmp2, tmp1, 16); andi(tmp2, tmp2, right_8_bits); update_byte_crc32(crc, tmp2, table0); - ble(len, zr, L_exit); - - srli(tmp2, tmp1, 24); - andi(tmp2, tmp2, right_8_bits); - update_byte_crc32(crc, tmp2, table0); +#ifdef COMPILER2 // put vector code here, otherwise "offset is too large" error occurs. if (UseRVV) { - j(L_exit); // only need to jump exit when UseRVV == true, it's a jump from end of block `L_by1_loop`. + // only need to jump exit when UseRVV == true, it's a jump from end of block `L_by1_loop`. + j(L_exit); bind(L_vector_entry); vector_update_crc32(crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp6, table0, table3); - addiw(len, len, -4); - bge(len, zr, L_by4_loop); - addiw(len, len, 4); - bgt(len, zr, L_by1_loop); + bgtz(len, L_by4_loop_entry); } +#endif // COMPILER2 bind(L_exit); andn(crc, tmp5, crc); @@ -2085,23 +2084,11 @@ void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register } void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { - if (is_simm12(-decrement)) { - addi(Rd, Rn, -decrement); - } else { - assert_different_registers(Rn, temp); - li(temp, decrement); - sub(Rd, Rn, temp); - } + add(Rd, Rn, -decrement, temp); } void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { - if (is_simm12(-decrement)) { - addiw(Rd, Rn, -decrement); - } else { - assert_different_registers(Rn, temp); - li(temp, decrement); - subw(Rd, Rn, temp); - } + addw(Rd, Rn, -decrement, temp); } void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { @@ -2961,7 +2948,7 @@ int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, return idivq_offset; } -// Look up the method for a megamorpic invkkeinterface call. +// Look up the method for a megamorphic invokeinterface call. // The target method is determined by . // The receiver klass is in recv_klass. // On success, the result will be in method_result, and execution falls through. @@ -2976,9 +2963,9 @@ void MacroAssembler::lookup_interface_method(Register recv_klass, assert_different_registers(recv_klass, intf_klass, scan_tmp); assert_different_registers(method_result, intf_klass, scan_tmp); assert(recv_klass != method_result || !return_method, - "recv_klass can be destroyed when mehtid isn't needed"); + "recv_klass can be destroyed when method isn't needed"); assert(itable_index.is_constant() || itable_index.as_register() == method_result, - "caller must be same register for non-constant itable index as for method"); + "caller must use same register for non-constant itable index as for method"); // Compute start of first itableOffsetEntry (which is at the end of the vtable). int vtable_base = in_bytes(Klass::vtable_start_offset()); @@ -3171,6 +3158,13 @@ void MacroAssembler::membar(uint32_t order_constraint) { } } +void MacroAssembler::cmodx_fence() { + BLOCK_COMMENT("cmodx fence"); + if (VM_Version::supports_fencei_barrier()) { + Assembler::fencei(); + } +} + // Form an address from base + offset in Rd. Rd my or may not // actually be used: you must use the Address that is returned. It // is up to you to ensure that the shift provided matches the size diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 43d9dc387ca20..fd174f241eb0b 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -431,6 +431,8 @@ class MacroAssembler: public Assembler { } } + void cmodx_fence(); + void pause() { Assembler::fence(w, 0); } @@ -1319,11 +1321,12 @@ class MacroAssembler: public Assembler { Register table0, Register table1, Register table2, Register table3, bool upper); void update_byte_crc32(Register crc, Register val, Register table); + +#ifdef COMPILER2 void vector_update_crc32(Register crc, Register buf, Register len, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register table0, Register table3); -#ifdef COMPILER2 void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp); void wide_mul(Register prod_lo, Register prod_hi, Register n, Register m); @@ -1353,7 +1356,7 @@ class MacroAssembler: public Assembler { Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6, Register product_hi); -#endif +#endif // COMPILER2 void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp index deeb771d83bb8..f638db9f0bfe4 100644 --- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -28,6 +28,7 @@ #include "asm/macroAssembler.hpp" #include "classfile/javaClasses.inline.hpp" #include "classfile/vmClasses.hpp" +#include "compiler/disassembler.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterRuntime.hpp" #include "memory/allocation.inline.hpp" @@ -37,7 +38,7 @@ #include "runtime/frame.inline.hpp" #include "runtime/stubRoutines.hpp" -#define __ _masm-> +#define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ @@ -444,7 +445,6 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, __ far_jump(RuntimeAddress(SharedRuntime::throw_IncompatibleClassChangeError_entry())); } } - } #ifndef PRODUCT diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp index d0903c96e2271..18b4302c7e68e 100644 --- a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp @@ -55,7 +55,21 @@ void Relocation::pd_set_data_value(address x, bool verify_only) { bytes = MacroAssembler::pd_patch_instruction_size(addr(), x); break; } - ICache::invalidate_range(addr(), bytes); + + // If we are using UseCtxFencei no ICache invalidation is needed here. + // Instead every hart will preform an fence.i either by a Java thread + // (due to patching epoch will take it to slow path), + // or by the kernel when a Java thread is moved to a hart. + // The instruction streams changes must only happen before the disarm of + // the nmethod barrier. Where the disarm have a leading full two way fence. + // If this is performed during a safepoint, all Java threads will emit a fence.i + // before transitioning to 'Java', e.g. leaving native or the safepoint wait barrier. + if (!UseCtxFencei) { + // ICache invalidation is a serialization point. + // The above patching of instructions happens before the invalidation. + // Hence it have a leading full two way fence (wr, wr). + ICache::invalidate_range(addr(), bytes); + } } address Relocation::pd_call_destination(address orig_addr) { diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 05f55fd0da7af..a76d172267004 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -1972,12 +1972,16 @@ const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) // Vector calling convention not yet implemented. bool Matcher::supports_vector_calling_convention(void) { - return false; + return EnableVectorSupport && UseVectorStubs; } OptoRegPair Matcher::vector_return_value(uint ideal_reg) { - Unimplemented(); - return OptoRegPair(0, 0); + assert(EnableVectorSupport && UseVectorStubs, "sanity"); + assert(ideal_reg == Op_VecA, "sanity"); + // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc + int lo = V8_num; + int hi = V8_K_num; + return OptoRegPair(hi, lo); } // Is this branch offset short enough that a short branch can be used? @@ -2224,7 +2228,8 @@ bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { assert_cond(m != nullptr); if (is_vshift_con_pattern(n, m) || // ShiftV src (ShiftCntV con) is_vector_bitwise_not_pattern(n, m) || - is_vector_scalar_bitwise_pattern(n, m)) { + is_vector_scalar_bitwise_pattern(n, m) || + is_encode_and_store_pattern(n, m)) { mstack.push(m, Visit); return true; } @@ -4785,6 +4790,7 @@ instruct loadP(iRegPNoSp dst, memory mem) // Load Compressed Pointer instruct loadN(iRegNNoSp dst, memory mem) %{ + predicate(n->as_Load()->barrier_data() == 0); match(Set dst (LoadN mem)); ins_cost(LOAD_COST); @@ -5220,6 +5226,7 @@ instruct storeimmP0(immP0 zero, memory mem) // Store Compressed Pointer instruct storeN(iRegN src, memory mem) %{ + predicate(n->as_Store()->barrier_data() == 0); match(Set mem (StoreN mem src)); ins_cost(STORE_COST); @@ -5234,6 +5241,7 @@ instruct storeN(iRegN src, memory mem) instruct storeImmN0(immN0 zero, memory mem) %{ + predicate(n->as_Store()->barrier_data() == 0); match(Set mem (StoreN mem zero)); ins_cost(STORE_COST); @@ -5424,6 +5432,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndSwapN mem (Binary oldval newval))); ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); @@ -5545,7 +5554,7 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndSwapN mem (Binary oldval newval))); @@ -5653,6 +5662,7 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); @@ -5786,7 +5796,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndExchangeN mem (Binary oldval newval))); @@ -5914,6 +5924,7 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); @@ -6045,7 +6056,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); @@ -6117,6 +6128,8 @@ instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set prev (GetAndSetN mem newv)); ins_cost(ALU_COST); @@ -6182,7 +6195,7 @@ instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0); match(Set prev (GetAndSetN mem newv)); @@ -10066,6 +10079,23 @@ instruct CallLeafDirect(method meth, rFlagsReg cr) ins_pipe(pipe_class_call); %} +// Call Runtime Instruction without safepoint and with vector arguments + +instruct CallLeafDirectVector(method meth, rFlagsReg cr) +%{ + match(CallLeafVector); + + effect(USE meth, KILL cr); + + ins_cost(BRANCH_COST); + + format %{ "CALL, runtime leaf vector $meth" %} + + ins_encode(riscv_enc_java_to_runtime(meth)); + + ins_pipe(pipe_class_call); +%} + // Call Runtime Instruction instruct CallLeafNoFPDirect(method meth, rFlagsReg cr) diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad index 54947f6bf9a19..510c0ff5d4646 100644 --- a/src/hotspot/cpu/riscv/riscv_v.ad +++ b/src/hotspot/cpu/riscv/riscv_v.ad @@ -4895,11 +4895,10 @@ instruct gather_loadS(vReg dst, indirect mem, vReg idx) %{ effect(TEMP_DEF dst); format %{ "gather_loadS $dst, $mem, $idx" %} ins_encode %{ - __ vmv1r_v(as_VectorRegister($dst$$reg), as_VectorRegister($idx$$reg)); BasicType bt = Matcher::vector_element_basic_type(this); Assembler::SEW sew = Assembler::elemtype_to_sew(bt); __ vsetvli_helper(bt, Matcher::vector_length(this)); - __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), (int)sew); + __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($idx$$reg), (int)sew); __ vluxei32_v(as_VectorRegister($dst$$reg), as_Register($mem$$base), as_VectorRegister($dst$$reg)); %} @@ -4929,11 +4928,10 @@ instruct gather_loadS_masked(vReg dst, indirect mem, vReg idx, vRegMask_V0 v0, v effect(TEMP_DEF dst, TEMP tmp); format %{ "gather_loadS_masked $dst, $mem, $idx, $v0\t# KILL $tmp" %} ins_encode %{ - __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg)); BasicType bt = Matcher::vector_element_basic_type(this); Assembler::SEW sew = Assembler::elemtype_to_sew(bt); __ vsetvli_helper(bt, Matcher::vector_length(this)); - __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew); + __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew); __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg)); __ vluxei32_v(as_VectorRegister($dst$$reg), as_Register($mem$$base), @@ -4969,11 +4967,10 @@ instruct scatter_storeS(indirect mem, vReg src, vReg idx, vReg tmp) %{ effect(TEMP tmp); format %{ "scatter_storeS $mem, $idx, $src\t# KILL $tmp" %} ins_encode %{ - __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg)); BasicType bt = Matcher::vector_element_basic_type(this, $src); Assembler::SEW sew = Assembler::elemtype_to_sew(bt); __ vsetvli_helper(bt, Matcher::vector_length(this, $src)); - __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew); + __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew); __ vsuxei32_v(as_VectorRegister($src$$reg), as_Register($mem$$base), as_VectorRegister($tmp$$reg)); %} @@ -5003,11 +5000,10 @@ instruct scatter_storeS_masked(indirect mem, vReg src, vReg idx, vRegMask_V0 v0, effect(TEMP tmp); format %{ "scatter_storeS_masked $mem, $idx, $src, $v0\t# KILL $tmp" %} ins_encode %{ - __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg)); BasicType bt = Matcher::vector_element_basic_type(this, $src); Assembler::SEW sew = Assembler::elemtype_to_sew(bt); __ vsetvli_helper(bt, Matcher::vector_length(this, $src)); - __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew); + __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew); __ vsuxei32_v(as_VectorRegister($src$$reg), as_Register($mem$$base), as_VectorRegister($tmp$$reg), Assembler::v0_t); %} diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp index 879fd92272279..2b629fcfcb293 100644 --- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp @@ -666,7 +666,20 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm int SharedRuntime::vector_calling_convention(VMRegPair *regs, uint num_bits, uint total_args_passed) { - Unimplemented(); + assert(total_args_passed <= Argument::n_vector_register_parameters_c, "unsupported"); + assert(num_bits >= 64 && num_bits <= 2048 && is_power_of_2(num_bits), "unsupported"); + + // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc + static const VectorRegister VEC_ArgReg[Argument::n_vector_register_parameters_c] = { + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23 + }; + + const int next_reg_val = 3; + for (uint i = 0; i < total_args_passed; i++) { + VMReg vmreg = VEC_ArgReg[i]->as_VMReg(); + regs[i].set_pair(vmreg->next(next_reg_val), vmreg); + } return 0; } @@ -2110,7 +2123,7 @@ void SharedRuntime::generate_deopt_blob() { int reexecute_offset = __ pc() - start; #if INCLUDE_JVMCI && !defined(COMPILER1) - if (EnableJVMCI && UseJVMCICompiler) { + if (UseJVMCICompiler) { // JVMCI does not use this kind of deoptimization __ should_not_reach_here(); } diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp index ee14d045407a0..bdb92e0b835f4 100644 --- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -2428,6 +2428,14 @@ class StubGenerator: public StubCodeGenerator { __ la(t1, ExternalAddress(bs_asm->patching_epoch_addr())); __ lwu(t1, t1); __ sw(t1, thread_epoch_addr); + // There are two ways this can work: + // - The writer did system icache shootdown after the instruction stream update. + // Hence do nothing. + // - The writer trust us to make sure our icache is in sync before entering. + // Hence use cmodx fence (fence.i, may change). + if (UseCtxFencei) { + __ cmodx_fence(); + } __ membar(__ LoadLoad); } @@ -4474,7 +4482,7 @@ class StubGenerator: public StubCodeGenerator { RegSet reg_cache_saved_regs = RegSet::of(x24, x25, x26, x27); // s8, s9, s10, s11 RegSet reg_cache_regs; reg_cache_regs += reg_cache_saved_regs; - reg_cache_regs += RegSet::of(x28, x29, x30, x31); // t3, t4, t5, t6 + reg_cache_regs += RegSet::of(t3, t4, t5, t6); BufRegCache reg_cache(_masm, reg_cache_regs); RegSet saved_regs; @@ -5454,8 +5462,8 @@ class StubGenerator: public StubCodeGenerator { Register isMIME = c_rarg6; Register codec = c_rarg7; - Register dstBackup = x31; - Register length = x28; // t3, total length of src data in bytes + Register dstBackup = t6; + Register length = t3; // total length of src data in bytes Label ProcessData, Exit; Label ProcessScalar, ScalarLoop; @@ -5490,7 +5498,7 @@ class StubGenerator: public StubCodeGenerator { Register stepSrcM1 = send; Register stepSrcM2 = doff; Register stepDst = isURL; - Register size = x29; // t4 + Register size = t4; __ mv(size, MaxVectorSize * 2); __ mv(stepSrcM1, MaxVectorSize * 4); @@ -5542,7 +5550,7 @@ class StubGenerator: public StubCodeGenerator { // scalar version { Register byte0 = soff, byte1 = send, byte2 = doff, byte3 = isURL; - Register combined32Bits = x29; // t5 + Register combined32Bits = t4; // encoded: [byte0[5:0] : byte1[5:0] : byte2[5:0]] : byte3[5:0]] => // plain: [byte0[5:0]+byte1[5:4] : byte1[3:0]+byte2[5:2] : byte2[1:0]+byte3[5:0]] @@ -5700,10 +5708,10 @@ class StubGenerator: public StubCodeGenerator { Register nmax = c_rarg4; Register base = c_rarg5; Register count = c_rarg6; - Register temp0 = x28; // t3 - Register temp1 = x29; // t4 - Register temp2 = x30; // t5 - Register temp3 = x31; // t6 + Register temp0 = t3; + Register temp1 = t4; + Register temp2 = t5; + Register temp3 = t6; VectorRegister vzero = v31; VectorRegister vbytes = v8; // group: v8, v9, v10, v11 @@ -6063,6 +6071,58 @@ static const int64_t right_3_bits = right_n_bits(3); return start; } + void generate_vector_math_stubs() { + if (!UseRVV) { + log_info(library)("vector is not supported, skip loading vector math (sleef) library!"); + return; + } + + // Get native vector math stub routine addresses + void* libsleef = nullptr; + char ebuf[1024]; + char dll_name[JVM_MAXPATHLEN]; + if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) { + libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf); + } + if (libsleef == nullptr) { + log_info(library)("Failed to load native vector math (sleef) library, %s!", ebuf); + return; + } + + // Method naming convention + // All the methods are named as _ + // + // Where: + // is the operation name, e.g. sin, cos + // is to indicate float/double + // "fx/dx" for vector float/double operation + // is the precision level + // "u10/u05" represents 1.0/0.5 ULP error bounds + // We use "u10" for all operations by default + // But for those functions do not have u10 support, we use "u05" instead + // rvv, indicates riscv vector extension + // + // e.g. sinfx_u10rvv is the method for computing vector float sin using rvv instructions + // + log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef)); + + for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { + int vop = VectorSupport::VECTOR_OP_MATH_START + op; + if (vop == VectorSupport::VECTOR_OP_TANH) { // skip tanh because of performance regression + continue; + } + + // The native library does not support u10 level of "hypot". + const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10"; + + snprintf(ebuf, sizeof(ebuf), "%sfx_%srvv", VectorSupport::mathname[op], ulf); + StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf); + + snprintf(ebuf, sizeof(ebuf), "%sdx_%srvv", VectorSupport::mathname[op], ulf); + StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf); + } + } + #endif // COMPILER2 /** @@ -6084,26 +6144,17 @@ static const int64_t right_3_bits = right_n_bits(3); address start = __ pc(); + // input parameters const Register crc = c_rarg0; // crc const Register buf = c_rarg1; // source java byte array address const Register len = c_rarg2; // length - const Register table0 = c_rarg3; // crc_table address - const Register table1 = c_rarg4; - const Register table2 = c_rarg5; - const Register table3 = c_rarg6; - - const Register tmp1 = c_rarg7; - const Register tmp2 = t2; - const Register tmp3 = x28; // t3 - const Register tmp4 = x29; // t4 - const Register tmp5 = x30; // t5 - const Register tmp6 = x31; // t6 BLOCK_COMMENT("Entry:"); __ enter(); // required for proper stackwalking of RuntimeStub frame - __ kernel_crc32(crc, buf, len, table0, table1, table2, - table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + __ kernel_crc32(crc, buf, len, + c_rarg3, c_rarg4, c_rarg5, c_rarg6, // tmp's for tables + c_rarg7, t2, t3, t4, t5, t6); // misc tmps __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(); @@ -6125,6 +6176,29 @@ static const int64_t right_3_bits = right_n_bits(3); return start; } + // load Method* target of MethodHandle + // j_rarg0 = jobject receiver + // xmethod = Method* result + address generate_upcall_stub_load_target() { + + StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target"); + address start = __ pc(); + + __ resolve_global_jobject(j_rarg0, t0, t1); + // Load target method from receiver + __ load_heap_oop(xmethod, Address(j_rarg0, java_lang_invoke_MethodHandle::form_offset()), t0, t1); + __ load_heap_oop(xmethod, Address(xmethod, java_lang_invoke_LambdaForm::vmentry_offset()), t0, t1); + __ load_heap_oop(xmethod, Address(xmethod, java_lang_invoke_MemberName::method_offset()), t0, t1); + __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, + Address(xmethod, java_lang_invoke_ResolvedMethodName::vmtarget_offset()), + noreg, noreg); + __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized + + __ ret(); + + return start; + } + #undef __ // Initialization @@ -6190,6 +6264,7 @@ static const int64_t right_3_bits = right_n_bits(3); #endif // COMPILER2 StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler(); + StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target(); StubRoutines::riscv::set_completed(); } @@ -6268,6 +6343,8 @@ static const int64_t right_3_bits = right_n_bits(3); generate_string_indexof_stubs(); + generate_vector_math_stubs(); + #endif // COMPILER2 } diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp index 1f32488777d57..7c811aa3a0c26 100644 --- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -27,6 +27,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.inline.hpp" #include "classfile/javaClasses.hpp" +#include "compiler/disassembler.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/bytecodeTracer.hpp" @@ -70,7 +71,7 @@ // Max size with JVMTI int TemplateInterpreter::InterpreterCodeSize = 256 * 1024; -#define __ _masm-> +#define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> //----------------------------------------------------------------------------- @@ -1748,13 +1749,21 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, address& vep) { assert(t != nullptr && t->is_valid() && t->tos_in() == vtos, "illegal template"); Label L; - aep = __ pc(); __ push_ptr(); __ j(L); - fep = __ pc(); __ push_f(); __ j(L); - dep = __ pc(); __ push_d(); __ j(L); - lep = __ pc(); __ push_l(); __ j(L); - bep = cep = sep = - iep = __ pc(); __ push_i(); - vep = __ pc(); + aep = __ pc(); // atos entry point + __ push_ptr(); + __ j(L); + fep = __ pc(); // ftos entry point + __ push_f(); + __ j(L); + dep = __ pc(); // dtos entry point + __ push_d(); + __ j(L); + lep = __ pc(); // ltos entry point + __ push_l(); + __ j(L); + bep = cep = sep = iep = __ pc(); // [bcsi]tos entry point + __ push_i(); + vep = __ pc(); // vtos entry point __ bind(L); generate_and_dispatch(t); } diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp index 078f54adc3682..2fede262057ce 100644 --- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp @@ -26,6 +26,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.inline.hpp" +#include "compiler/disassembler.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/collectedHeap.hpp" #include "gc/shared/tlab_globals.hpp" @@ -49,7 +50,7 @@ #include "runtime/synchronizer.hpp" #include "utilities/powerOfTwo.hpp" -#define __ _masm-> +#define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> // Address computation: local variables @@ -178,7 +179,6 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, __ la(temp_reg, Address(temp_reg, in_bytes(ResolvedFieldEntry::put_code_offset()))); } // Load-acquire the bytecode to match store-release in ResolvedFieldEntry::fill_in() - __ membar(MacroAssembler::AnyAny); __ lbu(temp_reg, Address(temp_reg, 0)); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); __ mv(bc_reg, bc); @@ -320,7 +320,6 @@ void TemplateTable::ldc(LdcType type) { // get type __ addi(x13, x11, tags_offset); __ add(x13, x10, x13); - __ membar(MacroAssembler::AnyAny); __ lbu(x13, Address(x13, 0)); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); @@ -2189,7 +2188,6 @@ void TemplateTable::resolve_cache_and_index_for_method(int byte_no, break; } // Load-acquire the bytecode to match store-release in InterpreterRuntime - __ membar(MacroAssembler::AnyAny); __ lbu(temp, Address(temp, 0)); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); @@ -2241,7 +2239,6 @@ void TemplateTable::resolve_cache_and_index_for_field(int byte_no, __ la(temp, Address(Rcache, in_bytes(ResolvedFieldEntry::put_code_offset()))); } // Load-acquire the bytecode to match store-release in ResolvedFieldEntry::fill_in() - __ membar(MacroAssembler::AnyAny); __ lbu(temp, Address(temp, 0)); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); __ mv(t0, (int) code); // have we resolved this bytecode? @@ -2403,7 +2400,6 @@ void TemplateTable::load_invokedynamic_entry(Register method) { Label resolved; __ load_resolved_indy_entry(cache, index); - __ membar(MacroAssembler::AnyAny); __ ld(method, Address(cache, in_bytes(ResolvedIndyEntry::method_offset()))); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); @@ -2418,7 +2414,6 @@ void TemplateTable::load_invokedynamic_entry(Register method) { __ call_VM(noreg, entry, method); // Update registers with resolved info __ load_resolved_indy_entry(cache, index); - __ membar(MacroAssembler::AnyAny); __ ld(method, Address(cache, in_bytes(ResolvedIndyEntry::method_offset()))); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); @@ -3533,7 +3528,6 @@ void TemplateTable::_new() { const int tags_offset = Array::base_offset_in_bytes(); __ add(t0, x10, x13); __ la(t0, Address(t0, tags_offset)); - __ membar(MacroAssembler::AnyAny); __ lbu(t0, t0); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); __ sub(t1, t0, (u1)JVM_CONSTANT_Class); @@ -3651,7 +3645,6 @@ void TemplateTable::checkcast() { // See if bytecode has already been quicked __ add(t0, x13, Array::base_offset_in_bytes()); __ add(x11, t0, x9); - __ membar(MacroAssembler::AnyAny); __ lbu(x11, x11); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); __ sub(t0, x11, (u1)JVM_CONSTANT_Class); @@ -3707,7 +3700,6 @@ void TemplateTable::instanceof() { // See if bytecode has already been quicked __ add(t0, x13, Array::base_offset_in_bytes()); __ add(x11, t0, x9); - __ membar(MacroAssembler::AnyAny); __ lbu(x11, x11); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); __ sub(t0, x11, (u1)JVM_CONSTANT_Class); diff --git a/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp b/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp index 383f332f8fd94..55160be99d0d8 100644 --- a/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp +++ b/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.hpp" +#include "classfile/javaClasses.hpp" #include "logging/logStream.hpp" #include "memory/resourceArea.hpp" #include "prims/upcallLinker.hpp" @@ -117,7 +118,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr static const int upcall_stub_code_base_size = 1024; static const int upcall_stub_size_per_arg = 16; -address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, +address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, BasicType* out_sig_bt, int total_out_args, BasicType ret_type, jobject jabi, jobject jconv, @@ -223,7 +224,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, __ block_comment("{ on_entry"); __ la(c_rarg0, Address(sp, frame_data_offset)); - __ movptr(c_rarg1, (address) receiver); __ rt_call(CAST_FROM_FN_PTR(address, UpcallLinker::on_entry)); __ mv(xthread, x10); __ reinit_heapbase(); @@ -260,12 +260,10 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, arg_shuffle.generate(_masm, as_VMStorage(shuffle_reg), abi._shadow_space_bytes, 0); __ block_comment("} argument shuffle"); - __ block_comment("{ receiver "); - __ get_vm_result(j_rarg0, xthread); - __ block_comment("} receiver "); - - __ mov_metadata(xmethod, entry); - __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized + __ block_comment("{ load target "); + __ movptr(j_rarg0, (address) receiver); + __ far_call(RuntimeAddress(StubRoutines::upcall_stub_load_target())); // loads Method* into xmethod + __ block_comment("} load target "); __ push_cont_fastpath(xthread); @@ -338,7 +336,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, #ifndef PRODUCT stringStream ss; - ss.print("upcall_stub_%s", entry->signature()->as_C_string()); + ss.print("upcall_stub_%s", signature->as_C_string()); const char *name = _masm->code_string(ss.as_string()); #else // PRODUCT const char* name = "upcall_stub"; diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp index bd4bfe86d9bf7..8fdde0094f40d 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp @@ -285,6 +285,7 @@ class VM_Version : public Abstract_VM_Version { // RISCV64 supports fast class initialization checks static bool supports_fast_class_init_checks() { return true; } + static bool supports_fencei_barrier() { return ext_Zifencei.enabled(); } }; #endif // CPU_RISCV_VM_VERSION_RISCV_HPP diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp index d288f4a893d0a..8990cf1663dd5 100644 --- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp @@ -2350,6 +2350,7 @@ void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr de void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { if (op->init_check()) { // Make sure klass is initialized & doesn't have finalizer. + // init_state needs acquire, but S390 is TSO, and so we are already good. const int state_offset = in_bytes(InstanceKlass::init_state_offset()); Register iklass = op->klass()->as_register(); add_debug_info_for_null_check_here(op->stub()->info()); diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp index 37631298920ca..544c82d34a769 100644 --- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, 2023 SAP SE. All rights reserved. + * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,11 +42,47 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> -#define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str) +#define BLOCK_COMMENT(str) __ block_comment(str) + +static void generate_pre_barrier_fast_path(MacroAssembler* masm, + const Register thread, + const Register tmp1) { + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ load_and_test_int(tmp1, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ load_and_test_byte(tmp1, in_progress); + } +} + +static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register Z_thread, const Register value, const Register temp) { + BLOCK_COMMENT("generate_queue_test_and_insertion {"); + + assert_different_registers(temp, value); + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + + __ load_and_test_long(temp, Address(Z_thread, in_bytes(index_offset))); // temp := *(index address) + __ branch_optimized(Assembler::bcondEqual, runtime); // jump to runtime if index == 0 (full buffer) + + // The buffer is not full, store value into it. + __ add2reg(temp, -wordSize); // temp := next index + __ z_stg(temp, in_bytes(index_offset), Z_thread); // *(index address) := next index + + __ z_ag(temp, Address(Z_thread, in_bytes(buffer_offset))); // temp := buffer address + next index + __ z_stg(value, 0, temp); // *(buffer address + next index) := value + BLOCK_COMMENT("} generate_queue_test_and_insertion"); +} void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count) { @@ -59,13 +95,8 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm assert_different_registers(addr, Z_R0_scratch); // would be destroyed by push_frame() assert_different_registers(count, Z_R0_scratch); // would be destroyed by push_frame() Register Rtmp1 = Z_R0_scratch; - const int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset)); - } else { - guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset)); - } + + generate_pre_barrier_fast_path(masm, Z_thread, Rtmp1); __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently. RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers); // Creates frame. @@ -100,6 +131,181 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas } } +#if defined(COMPILER2) + +#undef __ +#define __ masm-> + +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register pre_val, const address runtime_path) { + BLOCK_COMMENT("generate_c2_barrier_runtime_call {"); + SaveLiveRegisters save_registers(masm, stub); + __ call_VM_leaf(runtime_path, pre_val, Z_thread); + BLOCK_COMMENT("} generate_c2_barrier_runtime_call"); +} + +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + G1PreBarrierStubC2* stub) { + + BLOCK_COMMENT("g1_write_barrier_pre_c2 {"); + + assert(thread == Z_thread, "must be"); + assert_different_registers(obj, pre_val, tmp1); + assert(pre_val != noreg && tmp1 != noreg, "expecting a register"); + + stub->initialize_registers(obj, pre_val, thread, tmp1, noreg); + + generate_pre_barrier_fast_path(masm, thread, tmp1); + __ branch_optimized(Assembler::bcondNotEqual, *stub->entry()); // Activity indicator is zero, so there is no marking going on currently. + + __ bind(*stub->continuation()); + + BLOCK_COMMENT("} g1_write_barrier_pre_c2"); +} + +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + + BLOCK_COMMENT("generate_c2_pre_barrier_stub {"); + + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); + + __ bind(*stub->entry()); + + BLOCK_COMMENT("generate_pre_val_not_null_test {"); + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj), noreg, noreg, AS_RAW); + } + __ z_ltgr(pre_val, pre_val); + __ branch_optimized(Assembler::bcondEqual, *stub->continuation()); + BLOCK_COMMENT("} generate_pre_val_not_null_test"); + + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, + Z_thread, pre_val, tmp1); + + __ branch_optimized(Assembler::bcondAlways, *stub->continuation()); + + __ bind(runtime); + + generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); + + __ branch_optimized(Assembler::bcondAlways, *stub->continuation()); + + BLOCK_COMMENT("} generate_c2_pre_barrier_stub"); +} + +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* stub) { + BLOCK_COMMENT("g1_write_barrier_post_c2 {"); + + assert(thread == Z_thread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, Z_R1_scratch); + + assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); + + stub->initialize_registers(thread, tmp1, tmp2); + + BLOCK_COMMENT("generate_region_crossing_test {"); + if (VM_Version::has_DistinctOpnds()) { + __ z_xgrk(tmp1, store_addr, new_val); + } else { + __ z_lgr(tmp1, store_addr); + __ z_xgr(tmp1, new_val); + } + __ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); + __ branch_optimized(Assembler::bcondEqual, *stub->continuation()); + BLOCK_COMMENT("} generate_region_crossing_test"); + + // crosses regions, storing null? + if ((stub->barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ z_ltgr(new_val, new_val); + __ branch_optimized(Assembler::bcondEqual, *stub->continuation()); + } + + BLOCK_COMMENT("generate_card_young_test {"); + CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); + // calculate address of card + __ load_const_optimized(tmp2, (address)ct->card_table()->byte_map_base()); // Card table base. + __ z_srlg(tmp1, store_addr, CardTable::card_shift()); // Index into card table. + __ z_algr(tmp1, tmp2); // Explicit calculation needed for cli. + + // Filter young. + __ z_cli(0, tmp1, G1CardTable::g1_young_card_val()); + + BLOCK_COMMENT("} generate_card_young_test"); + + // From here on, tmp1 holds the card address. + __ branch_optimized(Assembler::bcondNotEqual, *stub->entry()); + + __ bind(*stub->continuation()); + + BLOCK_COMMENT("} g1_write_barrier_post_c2"); +} + +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + + BLOCK_COMMENT("generate_c2_post_barrier_stub {"); + + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); // tmp1 holds the card address. + Register tmp2 = stub->tmp2(); + Register Rcard_addr = tmp1; + + __ bind(*stub->entry()); + + BLOCK_COMMENT("generate_card_clean_test {"); + __ z_sync(); // Required to support concurrent cleaning. + __ z_cli(0, Rcard_addr, 0); // Reload after membar. + __ branch_optimized(Assembler::bcondEqual, *stub->continuation()); + BLOCK_COMMENT("} generate_card_clean_test"); + + BLOCK_COMMENT("generate_dirty_card {"); + // Storing a region crossing, non-null oop, card is clean. + // Dirty card and log. + STATIC_ASSERT(CardTable::dirty_card_val() == 0); + __ z_mvi(0, Rcard_addr, CardTable::dirty_card_val()); + BLOCK_COMMENT("} generate_dirty_card"); + + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, + Z_thread, tmp1, tmp2); + + __ branch_optimized(Assembler::bcondAlways, *stub->continuation()); + + __ bind(runtime); + + generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)); + + __ branch_optimized(Assembler::bcondAlways, *stub->continuation()); + + BLOCK_COMMENT("} generate_c2_post_barrier_stub"); +} + +#endif //COMPILER2 + void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, const Address& src, Register dst, Register tmp1, Register tmp2, Label *L_handle_null) { bool on_oop = is_reference_type(type); @@ -136,9 +342,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator const Register Robj = obj ? obj->base() : noreg, Roff = obj ? obj->index() : noreg; - const int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - const int buffer_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()); - const int index_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()); assert_different_registers(Rtmp1, Rtmp2, Z_R0_scratch); // None of the Rtmp must be Z_R0!! assert_different_registers(Robj, Z_R0_scratch); // Used for addressing. Furthermore, push_frame destroys Z_R0!! assert_different_registers(Rval, Z_R0_scratch); // push_frame destroys Z_R0!! @@ -147,14 +350,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator BLOCK_COMMENT("g1_write_barrier_pre {"); - // Is marking active? - // Note: value is loaded for test purposes only. No further use here. - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset)); - } else { - guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset)); - } + generate_pre_barrier_fast_path(masm, Z_thread, Rtmp1); __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently. assert(Rpre_val != noreg, "must have a real register"); @@ -194,24 +390,14 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator // We can store the original value in the thread's buffer // only if index > 0. Otherwise, we need runtime to handle. // (The index field is typed as size_t.) - Register Rbuffer = Rtmp1, Rindex = Rtmp2; - assert_different_registers(Rbuffer, Rindex, Rpre_val); - - __ z_lg(Rbuffer, buffer_offset, Z_thread); - __ load_and_test_long(Rindex, Address(Z_thread, index_offset)); - __ z_bre(callRuntime); // If index == 0, goto runtime. - - __ add2reg(Rindex, -wordSize); // Decrement index. - __ z_stg(Rindex, index_offset, Z_thread); - - // Record the previous value. - __ z_stg(Rpre_val, 0, Rbuffer, Rindex); + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + callRuntime, + Z_thread, Rpre_val, Rtmp2); __ z_bru(filtered); // We are done. - Rbuffer = noreg; // end of life - Rindex = noreg; // end of life - __ bind(callRuntime); // Save some registers (inputs and result) over runtime call @@ -326,23 +512,16 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato Register Rcard_addr_x = Rcard_addr; Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1; - Register Rqueue_buf = (Rtmp3 != Z_R0_scratch) ? Rtmp3 : Rtmp1; - const int qidx_off = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()); - const int qbuf_off = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()); - if ((Rcard_addr == Rqueue_buf) || (Rcard_addr == Rqueue_index)) { + if (Rcard_addr == Rqueue_index) { Rcard_addr_x = Z_R0_scratch; // Register shortage. We have to use Z_R0. } __ lgr_if_needed(Rcard_addr_x, Rcard_addr); - __ load_and_test_long(Rqueue_index, Address(Z_thread, qidx_off)); - __ z_bre(callRuntime); // Index == 0 then jump to runtime. - - __ z_lg(Rqueue_buf, qbuf_off, Z_thread); - - __ add2reg(Rqueue_index, -wordSize); // Decrement index. - __ z_stg(Rqueue_index, qidx_off, Z_thread); - - __ z_stg(Rcard_addr_x, 0, Rqueue_index, Rqueue_buf); // Store card. + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + callRuntime, + Z_thread, Rcard_addr_x, Rqueue_index); __ z_bru(filtered); __ bind(callRuntime); diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp index cc1d51d2fa13e..0f0bdd8b83cfd 100644 --- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018 SAP SE. All rights reserved. + * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,6 +34,8 @@ class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -62,7 +64,27 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); -#endif +#endif // COMPILER1 + +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* c2_stub); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif // COMPILER2 virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, const Address& src, Register dst, Register tmp1, Register tmp2, Label *L_handle_null = nullptr); diff --git a/src/hotspot/cpu/s390/gc/g1/g1_s390.ad b/src/hotspot/cpu/s390/gc/g1/g1_s390.ad new file mode 100644 index 0000000000000..31f60c4aeff0b --- /dev/null +++ b/src/hotspot/cpu/s390/gc/g1/g1_s390.ad @@ -0,0 +1,457 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// Copyright 2024 IBM Corporation. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_s390.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void write_barrier_pre(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp1, + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, Z_thread, tmp1, stub); +} + +static void write_barrier_post(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Z_thread, tmp1, tmp2, stub); +} + +%} // source + +// store pointer +instruct g1StoreP(indirect dst, memoryRegP src, iRegL tmp1, iRegL tmp2, flagsReg cr) %{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set dst (StoreP dst src)); + effect(TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(MEMORY_REF_COST); + format %{ "STG $src,$dst\t # ptr" %} + ins_encode %{ + __ block_comment("g1StoreP {"); + write_barrier_pre(masm, this, + $dst$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($dst$$Register, $src$$Register) /* preserve */); + + __ z_stg($src$$Register, Address($dst$$Register)); + + write_barrier_post(masm, this, + $dst$$Register, /* store_addr */ + $src$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + __ block_comment("} g1StoreP"); + %} + ins_pipe(pipe_class_dummy); +%} + +// Store Compressed Pointer +instruct g1StoreN(indirect mem, iRegN_P2N src, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(MEMORY_REF_COST); + format %{ "STY $src,$mem\t # (cOop)" %} + ins_encode %{ + __ block_comment("g1StoreN {"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + + __ z_sty($src$$Register, Address($mem$$Register)); + + if ((barrier_data() & G1C2BarrierPost) != 0) { + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ oop_decoder($tmp1$$Register, $src$$Register, true /* maybe_null */); + } else { + __ oop_decoder($tmp1$$Register, $src$$Register, false /* maybe_null */); + } + } + + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + __ block_comment("} g1StoreN"); + %} + + ins_pipe(pipe_class_dummy); +%} + +instruct g1CompareAndSwapN(indirect mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegI res, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval))); + effect(USE mem_ptr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL oldval, KILL cr); + format %{ "$res = CompareAndSwapN $oldval,$newval,$mem_ptr" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem_ptr$$Register); + assert_different_registers($newval$$Register, $mem_ptr$$Register); + __ block_comment("g1compareAndSwapN {"); + + Register Rcomp = reg_to_register_object($oldval$$reg); + Register Rnew = reg_to_register_object($newval$$reg); + Register Raddr = reg_to_register_object($mem_ptr$$reg); + Register Rres = reg_to_register_object($res$$reg); + + write_barrier_pre(masm, this, + Raddr /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of(Raddr, Rcomp, Rnew) /* preserve */, + RegSet::of(Rres) /* no_preserve */); + + __ z_cs(Rcomp, Rnew, 0, Raddr); + + assert_different_registers(Rres, Raddr); + if (VM_Version::has_LoadStoreConditional()) { + __ load_const_optimized(Z_R0_scratch, 0L); // false (failed) + __ load_const_optimized(Rres, 1L); // true (succeed) + __ z_locgr(Rres, Z_R0_scratch, Assembler::bcondNotEqual); + } else { + Label done; + __ load_const_optimized(Rres, 0L); // false (failed) + __ z_brne(done); // Assume true to be the common case. + __ load_const_optimized(Rres, 1L); // true (succeed) + __ bind(done); + } + + __ oop_decoder($tmp3$$Register, Rnew, true /* maybe_null */); + + write_barrier_post(masm, this, + Raddr /* store_addr */, + $tmp3$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + __ block_comment("} g1compareAndSwapN"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1CompareAndExchangeN(iRegP mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegN res, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem_ptr (Binary oldval newval))); + effect(USE mem_ptr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL oldval, KILL cr); + format %{ "$res = CompareAndExchangeN $oldval,$newval,$mem_ptr" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem_ptr$$Register); + assert_different_registers($newval$$Register, $mem_ptr$$Register); + __ block_comment("g1CompareAndExchangeN {"); + write_barrier_pre(masm, this, + $mem_ptr$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($mem_ptr$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + + Register Rcomp = reg_to_register_object($oldval$$reg); + Register Rnew = reg_to_register_object($newval$$reg); + Register Raddr = reg_to_register_object($mem_ptr$$reg); + + Register Rres = reg_to_register_object($res$$reg); + assert_different_registers(Rres, Raddr); + + __ z_lgr(Rres, Rcomp); // previous contents + __ z_csy(Rres, Rnew, 0, Raddr); // Try to store new value. + + __ oop_decoder($tmp1$$Register, Rnew, true /* maybe_null */); + + write_barrier_post(masm, this, + Raddr /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + __ block_comment("} g1CompareAndExchangeN"); + %} + ins_pipe(pipe_class_dummy); +%} + +// Load narrow oop +instruct g1LoadN(iRegN dst, indirect mem, iRegP tmp1, iRegP tmp2, flagsReg cr) %{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadN mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(MEMORY_REF_COST); + format %{ "LoadN $dst,$mem\t # (cOop)" %} + ins_encode %{ + __ block_comment("g1LoadN {"); + __ z_llgf($dst$$Register, Address($mem$$Register)); + if ((barrier_data() & G1C2BarrierPre) != 0) { + __ oop_decoder($tmp1$$Register, $dst$$Register, true); + write_barrier_pre(masm, this, + noreg /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register ); + } + __ block_comment("} g1LoadN"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1GetAndSetN(indirect mem, iRegN dst, iRegI tmp, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set dst (GetAndSetN mem dst)); + effect(KILL cr, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); // USE_DEF dst by match rule. + format %{ "XCHGN $dst,[$mem]\t # EXCHANGE (coop, atomic), temp $tmp" %} + ins_encode %{ + __ block_comment("g1GetAndSetN {"); + assert_different_registers($mem$$Register, $dst$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($mem$$Register, $dst$$Register) /* preserve */); + + Register Rdst = reg_to_register_object($dst$$reg); + Register Rtmp = reg_to_register_object($tmp$$reg); + guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF"); + Label retry; + + // Iterate until swap succeeds. + __ z_llgf(Rtmp, Address($mem$$Register)); // current contents + __ bind(retry); + // Calculate incremented value. + __ z_csy(Rtmp, Rdst, Address($mem$$Register)); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + + __ oop_decoder($tmp1$$Register, $dst$$Register, true /* maybe_null */); + + __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value. + + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + + __ block_comment("} g1GetAndSetN"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1CompareAndSwapP(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegI res, iRegL tmp1, iRegL tmp2, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, USE mem_ptr, USE_KILL oldval, KILL cr); + format %{ "$res = CompareAndSwapP $oldval,$newval,$mem_ptr" %} + ins_encode %{ + __ block_comment("g1CompareAndSwapP {"); + assert_different_registers($oldval$$Register, $mem_ptr$$Register); + assert_different_registers($newval$$Register, $mem_ptr$$Register); + + Register Rcomp = reg_to_register_object($oldval$$reg); + Register Rnew = reg_to_register_object($newval$$reg); + Register Raddr = reg_to_register_object($mem_ptr$$reg); + Register Rres = reg_to_register_object($res$$reg); + + write_barrier_pre(masm, this, + noreg /* obj */, + Rcomp /* pre_val */, + $tmp1$$Register /* tmp1 */, + RegSet::of(Raddr, Rcomp, Rnew) /* preserve */, + RegSet::of(Rres) /* no_preserve */); + + __ z_csg(Rcomp, Rnew, 0, Raddr); + + if (VM_Version::has_LoadStoreConditional()) { + __ load_const_optimized(Z_R0_scratch, 0L); // false (failed) + __ load_const_optimized(Rres, 1L); // true (succeed) + __ z_locgr(Rres, Z_R0_scratch, Assembler::bcondNotEqual); + } else { + Label done; + __ load_const_optimized(Rres, 0L); // false (failed) + __ z_brne(done); // Assume true to be the common case. + __ load_const_optimized(Rres, 1L); // true (succeed) + __ bind(done); + } + + write_barrier_post(masm, this, + Raddr /* store_addr */, + Rnew /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + __ block_comment("} g1CompareAndSwapP"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1CompareAndExchangeP(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegP res, iRegL tmp1, iRegL tmp2, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem_ptr (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, USE mem_ptr, USE_KILL oldval, KILL cr); + format %{ "$res = CompareAndExchangeP $oldval,$newval,$mem_ptr" %} + ins_encode %{ + __ block_comment("g1CompareAndExchangeP {"); + assert_different_registers($oldval$$Register, $mem_ptr$$Register); + assert_different_registers($newval$$Register, $mem_ptr$$Register); + + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($mem_ptr$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + + __ z_lgr($res$$Register, $oldval$$Register); // previous content + + __ z_csg($oldval$$Register, $newval$$Register, 0, $mem_ptr$$reg); + + write_barrier_post(masm, this, + $mem_ptr$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + __ block_comment("} g1CompareAndExchangeP"); + %} + ins_pipe(pipe_class_dummy); +%} + +// Load Pointer +instruct g1LoadP(iRegP dst, memory mem, iRegL tmp1, flagsReg cr) %{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp1, KILL cr); + ins_cost(MEMORY_REF_COST); + format %{ "LG $dst,$mem\t # ptr" %} + ins_encode %{ + __ block_comment("g1LoadP {"); + __ z_lg($dst$$Register, $mem$$Address); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp1$$Register ); + __ block_comment("} g1LoadP"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1GetAndSetP(indirect mem, iRegP dst, iRegL tmp, iRegL tmp1, iRegL tmp2, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set dst (GetAndSetP mem dst)); + effect(KILL cr, TEMP tmp, TEMP tmp1, TEMP tmp2); // USE_DEF dst by match rule. + format %{ "XCHGP $dst,[$mem]\t # EXCHANGE (oop, atomic), temp $tmp" %} + ins_encode %{ + __ block_comment("g1GetAndSetP {"); + + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + RegSet::of($mem$$Register, $dst$$Register) /* preserve */); + + __ z_lgr($tmp1$$Register, $dst$$Register); + Register Rdst = reg_to_register_object($dst$$reg); + Register Rtmp = reg_to_register_object($tmp$$reg); + guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF"); + Label retry; + + // Iterate until swap succeeds. + __ z_lg(Rtmp, Address($mem$$Register)); // current contents + __ bind(retry); + // Calculate incremented value. + __ z_csg(Rtmp, Rdst, Address($mem$$Register)); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value. + + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp$$Register /* tmp2 */); + __ block_comment("} g1GetAndSetP"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegL tmp1, iRegL tmp2, flagsReg cr) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, KILL cr); + // ins_cost(INSN_COST); + format %{ "encode_heap_oop $tmp1, $src\n\t" + "st $tmp1, $mem\t# compressed ptr" %} + ins_encode %{ + __ block_comment("g1EncodePAndStoreN {"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ oop_encoder($tmp1$$Register, $src$$Register, true /* maybe_null */); + } else { + __ oop_encoder($tmp1$$Register, $src$$Register, false /* maybe_null */); + } + __ z_st($tmp1$$Register, Address($mem$$Register)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + __ block_comment("} g1EncodePAndStoreN"); + %} + ins_pipe(pipe_class_dummy); +%} diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp index 28892da6ca4c1..d826b4a06f336 100644 --- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp @@ -33,6 +33,9 @@ #include "runtime/jniHandles.hpp" #include "runtime/stubRoutines.hpp" #include "utilities/macros.hpp" +#ifdef COMPILER2 +#include "gc/shared/c2/barrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> @@ -194,8 +197,93 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { #ifdef COMPILER2 -OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { - Unimplemented(); // This must be implemented to support late barrier expansion. +OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) const { + if (!OptoReg::is_reg(opto_reg)) { + return OptoReg::Bad; + } + + VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if ((vm_reg->is_Register() || vm_reg ->is_FloatRegister()) && (opto_reg & 1) != 0) { + return OptoReg::Bad; + } + + return opto_reg; +} + +#undef __ +#define __ _masm-> + +SaveLiveRegisters::SaveLiveRegisters(MacroAssembler *masm, BarrierStubC2 *stub) + : _masm(masm), _reg_mask(stub->preserve_set()) { + + const int register_save_size = iterate_over_register_mask(ACTION_COUNT_ONLY) * BytesPerWord; + + _frame_size = align_up(register_save_size, frame::alignment_in_bytes) + frame::z_abi_160_size; // FIXME: this could be restricted to argument only + + __ save_return_pc(); + __ push_frame(_frame_size, Z_R14); // FIXME: check if Z_R1_scaratch can do a job here; + + __ z_lg(Z_R14, _z_common_abi(return_pc) + _frame_size, Z_SP); + + iterate_over_register_mask(ACTION_SAVE, _frame_size); +} + +SaveLiveRegisters::~SaveLiveRegisters() { + iterate_over_register_mask(ACTION_RESTORE, _frame_size); + + __ pop_frame(); + + __ restore_return_pc(); +} + +int SaveLiveRegisters::iterate_over_register_mask(IterationAction action, int offset) { + int reg_save_index = 0; + RegMaskIterator live_regs_iterator(_reg_mask); + + while(live_regs_iterator.has_next()) { + const OptoReg::Name opto_reg = live_regs_iterator.next(); + + // Filter out stack slots (spilled registers, i.e., stack-allocated registers). + if (!OptoReg::is_reg(opto_reg)) { + continue; + } + + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_Register()) { + Register std_reg = vm_reg->as_Register(); + + if (std_reg->encoding() >= Z_R2->encoding() && std_reg->encoding() <= Z_R15->encoding()) { + reg_save_index++; + + if (action == ACTION_SAVE) { + __ z_stg(std_reg, offset - reg_save_index * BytesPerWord, Z_SP); + } else if (action == ACTION_RESTORE) { + __ z_lg(std_reg, offset - reg_save_index * BytesPerWord, Z_SP); + } else { + assert(action == ACTION_COUNT_ONLY, "Sanity"); + } + } + } else if (vm_reg->is_FloatRegister()) { + FloatRegister fp_reg = vm_reg->as_FloatRegister(); + if (fp_reg->encoding() >= Z_F0->encoding() && fp_reg->encoding() <= Z_F15->encoding() + && fp_reg->encoding() != Z_F1->encoding()) { + reg_save_index++; + + if (action == ACTION_SAVE) { + __ z_std(fp_reg, offset - reg_save_index * BytesPerWord, Z_SP); + } else if (action == ACTION_RESTORE) { + __ z_ld(fp_reg, offset - reg_save_index * BytesPerWord, Z_SP); + } else { + assert(action == ACTION_COUNT_ONLY, "Sanity"); + } + } + } else if (false /* vm_reg->is_VectorRegister() */){ + fatal("Vector register support is not there yet!"); + } else { + fatal("Register type is not known"); + } + } + return reg_save_index; } #endif // COMPILER2 diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp index de1de8a51a7f1..fb61adc55b500 100644 --- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp @@ -32,7 +32,9 @@ #ifdef COMPILER2 #include "code/vmreg.hpp" #include "opto/optoreg.hpp" +#include "opto/regmask.hpp" +class BarrierStubC2; class Node; #endif // COMPILER2 @@ -62,8 +64,42 @@ class BarrierSetAssembler: public CHeapObj { #ifdef COMPILER2 OptoReg::Name refine_register(const Node* node, - OptoReg::Name opto_reg); + OptoReg::Name opto_reg) const; #endif // COMPILER2 }; +#ifdef COMPILER2 + +// This class saves and restores the registers that need to be preserved across +// the runtime call represented by a given C2 barrier stub. Use as follows: +// { +// SaveLiveRegisters save(masm, stub); +// .. +// __ call_VM_leaf(...); +// .. +// } + +class SaveLiveRegisters { + MacroAssembler* _masm; + RegMask _reg_mask; + Register _result_reg; + int _frame_size; + + public: + SaveLiveRegisters(MacroAssembler *masm, BarrierStubC2 *stub); + + ~SaveLiveRegisters(); + + private: + enum IterationAction : int { + ACTION_SAVE, + ACTION_RESTORE, + ACTION_COUNT_ONLY + }; + + int iterate_over_register_mask(IterationAction action, int offset = 0); +}; + +#endif // COMPILER2 + #endif // CPU_S390_GC_SHARED_BARRIERSETASSEMBLER_S390_HPP diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 6c26e17d5ce3b..6bfe5125959ad 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -2127,8 +2127,9 @@ unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) { // Pop current C frame. void MacroAssembler::pop_frame() { - BLOCK_COMMENT("pop_frame:"); + BLOCK_COMMENT("pop_frame {"); Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP); + BLOCK_COMMENT("} pop_frame"); } // Pop current C frame and restore return PC register (Z_R14). @@ -3458,7 +3459,8 @@ void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fa L_slow_path = &L_fallthrough; } - // Fast path check: class is fully initialized + // Fast path check: class is fully initialized. + // init_state needs acquire, but S390 is TSO, and so we are already good. z_cli(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); z_bre(*L_fast_path); @@ -3655,12 +3657,38 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg bind(not_recursive); + NearLabel check_succ, set_eq_unlocked; + + // Set owner to null. + // Release to satisfy the JMM + z_release(); + z_lghi(temp, 0); + z_stg(temp, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); + // We need a full fence after clearing owner to avoid stranding. + z_fence(); + + // Check if the entry lists are empty. load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - z_brne(done); + z_brne(check_succ); load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - z_brne(done); - z_release(); - z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); + z_bre(done); // If so we are done. + + bind(check_succ); + + // Check if there is a successor. + load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ))); + z_brne(set_eq_unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + z_xilf(currentHeader, markWord::monitor_value); + z_stg(currentHeader, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset())); + + z_ltgr(oop, oop); // Set flag = NE + z_bru(done); + + bind(set_eq_unlocked); + z_cr(temp, temp); // Set flag = EQ bind(done); @@ -6454,6 +6482,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag}; const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag}; + const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag}; const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag}; const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag}; @@ -6471,25 +6500,40 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis bind(not_recursive); - NearLabel not_ok; + NearLabel check_succ, set_eq_unlocked; + + // Set owner to null. + // Release to satisfy the JMM + z_release(); + z_lghi(tmp2, 0); + z_stg(tmp2 /*=0*/, owner_address); + // We need a full fence after clearing owner to avoid stranding. + z_fence(); + // Check if the entry lists are empty. load_and_test_long(tmp2, EntryList_address); - z_brne(not_ok); + z_brne(check_succ); load_and_test_long(tmp2, cxq_address); - z_brne(not_ok); + z_bre(unlocked); // If so we are done. - z_release(); - z_stg(tmp2 /*=0*/, owner_address); + bind(check_succ); - z_bru(unlocked); // CC = EQ here + // Check if there is a successor. + load_and_test_long(tmp2, succ_address); + z_brne(set_eq_unlocked); // If so we are done. - bind(not_ok); + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + if (!UseObjectMonitorTable) { + z_xilf(monitor, markWord::monitor_value); + } + z_stg(monitor, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset())); + + z_ltgr(obj, obj); // Set flag = NE + z_bru(slow_path); - // The owner may be anonymous, and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - z_stg(Z_thread, owner_address); - z_bru(slow_path); // CC = NE here + bind(set_eq_unlocked); + z_cr(tmp2, tmp2); // Set flag = EQ } bind(unlocked); diff --git a/src/hotspot/cpu/s390/matcher_s390.hpp b/src/hotspot/cpu/s390/matcher_s390.hpp index 6c6cae3c58fc3..d8b1ae68f6f50 100644 --- a/src/hotspot/cpu/s390/matcher_s390.hpp +++ b/src/hotspot/cpu/s390/matcher_s390.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, 2022 SAP SE. All rights reserved. + * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -63,11 +63,16 @@ return true; } - // Suppress CMOVL. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet. - static int long_cmove_cost() { return ConditionalMoveLimit; } + // Use conditional move (CMOVL) + static int long_cmove_cost() { + // z196/z11 or later hardware support conditional moves + return VM_Version::has_LoadStoreConditional() ? 0 : ConditionalMoveLimit; + } - // Suppress CMOVF. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet. - static int float_cmove_cost() { return ConditionalMoveLimit; } + static int float_cmove_cost() { + // z196/z11 or later hardware support conditional moves + return VM_Version::has_LoadStoreConditional() ? 0 : ConditionalMoveLimit; + } // Set this as clone_shift_expressions. static bool narrow_oop_use_complex_address() { diff --git a/src/hotspot/cpu/s390/register_s390.hpp b/src/hotspot/cpu/s390/register_s390.hpp index 931e899257e92..18af232e56970 100644 --- a/src/hotspot/cpu/s390/register_s390.hpp +++ b/src/hotspot/cpu/s390/register_s390.hpp @@ -448,4 +448,12 @@ constexpr Register Z_R0_scratch = Z_R0; constexpr Register Z_R1_scratch = Z_R1; constexpr FloatRegister Z_fscratch_1 = Z_F1; +typedef AbstractRegSet RegSet; + +template <> +inline Register AbstractRegSet::first() { + if (_bitset == 0) { return noreg; } + return as_Register(count_trailing_zeros(_bitset)); +} + #endif // CPU_S390_REGISTER_S390_HPP diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index 4de1a4e7b7f35..8181e96ecfc55 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1644,6 +1644,10 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() { // Should the matcher clone input 'm' of node 'n'? bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { + if (is_encode_and_store_pattern(n, m)) { + mstack.push(m, Visit); + return true; + } return false; } @@ -3913,6 +3917,7 @@ instruct loadL_unaligned(iRegL dst, memory mem) %{ // Load Pointer instruct loadP(iRegP dst, memory mem) %{ match(Set dst (LoadP mem)); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(Z_DISP3_SIZE); format %{ "LG $dst,$mem\t # ptr" %} @@ -3924,6 +3929,7 @@ instruct loadP(iRegP dst, memory mem) %{ // LoadP + CastP2L instruct castP2X_loadP(iRegL dst, memory mem) %{ match(Set dst (CastP2X (LoadP mem))); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(Z_DISP3_SIZE); format %{ "LG $dst,$mem\t # ptr + p2x" %} @@ -4286,6 +4292,7 @@ instruct storeL(memory mem, iRegL src) %{ // Store Pointer instruct storeP(memory dst, memoryRegP src) %{ match(Set dst (StoreP dst src)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(Z_DISP3_SIZE); format %{ "STG $src,$dst\t # ptr" %} @@ -4388,6 +4395,7 @@ instruct memInitL(memoryRS mem, immL16 src) %{ // Move Immediate to 8-byte memory. instruct memInitP(memoryRS mem, immP16 src) %{ match(Set mem (StoreP mem src)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(6); format %{ "MVGHI $mem,$src\t # direct mem init 8" %} @@ -4417,6 +4425,7 @@ instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{ // Load narrow oop instruct loadN(iRegN dst, memory mem) %{ match(Set dst (LoadN mem)); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(Z_DISP3_SIZE); format %{ "LoadN $dst,$mem\t # (cOop)" %} @@ -4480,7 +4489,7 @@ instruct loadConNKlass(iRegN dst, immNKlass src) %{ instruct decodeLoadN(iRegP dst, memory mem) %{ match(Set dst (DecodeN (LoadN mem))); - predicate(false && (CompressedOops::base()==nullptr)&&(CompressedOops::shift()==0)); + predicate(false && (CompressedOops::base()==nullptr) && (CompressedOops::shift()==0)); ins_cost(MEMORY_REF_COST); size(Z_DISP3_SIZE); format %{ "DecodeLoadN $dst,$mem\t # (cOop Load+Decode)" %} @@ -4628,7 +4637,7 @@ instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{ match(Set dst (EncodeP src)); effect(KILL cr); predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) && - (CompressedOops::base() == 0 || + (CompressedOops::base() == nullptr || CompressedOops::base_disjoint() || !ExpandLoadingBaseEncode)); ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST); @@ -4651,7 +4660,7 @@ instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{ match(Set dst (EncodeP src)); effect(KILL cr); predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) && - (CompressedOops::base() == 0 || + (CompressedOops::base() == nullptr || CompressedOops::base_disjoint() || !ExpandLoadingBaseEncode_NN)); ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST); @@ -4735,6 +4744,7 @@ instruct encodeP_NN_Ex(iRegN dst, iRegP src, flagsReg cr) %{ // Store Compressed Pointer instruct storeN(memory mem, iRegN_P2N src) %{ match(Set mem (StoreN mem src)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(Z_DISP_SIZE); format %{ "ST $src,$mem\t # (cOop)" %} @@ -5146,6 +5156,7 @@ instruct compareAndSwapL_bool(iRegP mem_ptr, rarg5RegL oldval, iRegL newval, iRe instruct compareAndSwapP_bool(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegI res, flagsReg cr) %{ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + predicate(n->as_LoadStore()->barrier_data() == 0); effect(USE mem_ptr, USE_KILL oldval, KILL cr); size(18); format %{ "$res = CompareAndSwapP $oldval,$newval,$mem_ptr" %} @@ -5156,6 +5167,7 @@ instruct compareAndSwapP_bool(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, instruct compareAndSwapN_bool(iRegP mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegI res, flagsReg cr) %{ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + predicate(n->as_LoadStore()->barrier_data() == 0); effect(USE mem_ptr, USE_KILL oldval, KILL cr); size(16); format %{ "$res = CompareAndSwapN $oldval,$newval,$mem_ptr" %} @@ -5443,6 +5455,7 @@ instruct xchgL_reg_mem(memoryRSY mem, iRegL dst, iRegL tmp, flagsReg cr) %{ %} instruct xchgN_reg_mem(memoryRSY mem, iRegN dst, iRegI tmp, flagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set dst (GetAndSetN mem dst)); effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule. format %{ "XCHGN $dst,[$mem]\t # EXCHANGE (coop, atomic), temp $tmp" %} @@ -5452,6 +5465,7 @@ instruct xchgN_reg_mem(memoryRSY mem, iRegN dst, iRegI tmp, flagsReg cr) %{ instruct xchgP_reg_mem(memoryRSY mem, iRegP dst, iRegL tmp, flagsReg cr) %{ match(Set dst (GetAndSetP mem dst)); + predicate(n->as_LoadStore()->barrier_data() == 0); effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule. format %{ "XCHGP $dst,[$mem]\t # EXCHANGE (oop, atomic), temp $tmp" %} ins_encode(z_enc_SwapL(mem, dst, tmp)); @@ -5926,7 +5940,7 @@ instruct addP_regN_reg_imm20(iRegP dst, iRegP_N2P src1, iRegL src2, immL20 con) instruct addP_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{ match(Set mem (StoreP mem (AddP (LoadP mem) src))); effect(KILL cr); - predicate(VM_Version::has_MemWithImmALUOps()); + predicate(VM_Version::has_MemWithImmALUOps() && n->as_LoadStore()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(6); format %{ "AGSI $mem,$src\t # direct mem add 8 (ptr)" %} diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp index 65c94db09dcc8..468610b588e91 100644 --- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp +++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp @@ -43,6 +43,7 @@ #include "runtime/sharedRuntime.hpp" #include "runtime/signature.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/timerTrace.hpp" #include "runtime/vframeArray.hpp" #include "utilities/align.hpp" #include "utilities/macros.hpp" diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index d878731cca51f..dd9ed4c95462b 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -3053,6 +3053,29 @@ class StubGenerator: public StubCodeGenerator { return start; } + // load Method* target of MethodHandle + // Z_ARG1 = jobject receiver + // Z_method = Method* result + address generate_upcall_stub_load_target() { + StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target"); + address start = __ pc(); + + __ resolve_global_jobject(Z_ARG1, Z_tmp_1, Z_tmp_2); + // Load target method from receiver + __ load_heap_oop(Z_method, Address(Z_ARG1, java_lang_invoke_MethodHandle::form_offset()), + noreg, noreg, IS_NOT_NULL); + __ load_heap_oop(Z_method, Address(Z_method, java_lang_invoke_LambdaForm::vmentry_offset()), + noreg, noreg, IS_NOT_NULL); + __ load_heap_oop(Z_method, Address(Z_method, java_lang_invoke_MemberName::method_offset()), + noreg, noreg, IS_NOT_NULL); + __ z_lg(Z_method, Address(Z_method, java_lang_invoke_ResolvedMethodName::vmtarget_offset())); + __ z_stg(Z_method, Address(Z_thread, JavaThread::callee_target_offset())); // just in case callee is deoptimized + + __ z_br(Z_R14); + + return start; + } + void generate_initial_stubs() { // Generates all stubs and initializes the entry points. @@ -3110,6 +3133,7 @@ class StubGenerator: public StubCodeGenerator { } StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler(); + StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target(); } void generate_compiler_stubs() { diff --git a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp index c16e444904563..2c2e8ed9e3b3a 100644 --- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp @@ -1224,6 +1224,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M case Interpreter::java_lang_math_sin : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); break; case Interpreter::java_lang_math_cos : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); break; case Interpreter::java_lang_math_tan : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); break; + case Interpreter::java_lang_math_tanh : /* run interpreted */ break; case Interpreter::java_lang_math_abs : /* run interpreted */ break; case Interpreter::java_lang_math_sqrt : /* runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); not available */ break; case Interpreter::java_lang_math_log : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); break; diff --git a/src/hotspot/cpu/s390/upcallLinker_s390.cpp b/src/hotspot/cpu/s390/upcallLinker_s390.cpp index 734b4e89c7cb2..8baad40a519a4 100644 --- a/src/hotspot/cpu/s390/upcallLinker_s390.cpp +++ b/src/hotspot/cpu/s390/upcallLinker_s390.cpp @@ -23,6 +23,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.inline.hpp" +#include "classfile/javaClasses.hpp" #include "logging/logStream.hpp" #include "memory/resourceArea.hpp" #include "prims/upcallLinker.hpp" @@ -116,7 +117,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr static const int upcall_stub_code_base_size = 1024; static const int upcall_stub_size_per_arg = 16; // arg save & restore + move -address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, +address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, BasicType* out_sig_bt, int total_out_args, BasicType ret_type, jobject jabi, jobject jconv, @@ -206,7 +207,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, __ block_comment("on_entry {"); __ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, UpcallLinker::on_entry)); __ z_aghik(Z_ARG1, Z_SP, frame_data_offset); - __ load_const_optimized(Z_ARG2, (intptr_t)receiver); __ call(call_target_address); __ z_lgr(Z_thread, Z_RET); __ block_comment("} on_entry"); @@ -216,12 +216,11 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, arg_shuffle.generate(_masm, shuffle_reg, abi._shadow_space_bytes, frame::z_jit_out_preserve_size); __ block_comment("} argument_shuffle"); - __ block_comment("receiver {"); - __ get_vm_result(Z_ARG1); - __ block_comment("} receiver"); - - __ load_const_optimized(Z_method, (intptr_t)entry); - __ z_stg(Z_method, Address(Z_thread, in_bytes(JavaThread::callee_target_offset()))); + __ block_comment("load_target {"); + __ load_const_optimized(Z_ARG1, (intptr_t)receiver); + __ load_const_optimized(call_target_address, StubRoutines::upcall_stub_load_target()); + __ call(call_target_address); // load taget Method* into Z_method + __ block_comment("} load_target"); __ z_lg(call_target_address, Address(Z_method, in_bytes(Method::from_compiled_offset()))); __ call(call_target_address); @@ -274,7 +273,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, #ifndef PRODUCT stringStream ss; - ss.print("upcall_stub_%s", entry->signature()->as_C_string()); + ss.print("upcall_stub_%s", signature->as_C_string()); const char* name = _masm->code_string(ss.as_string()); #else // PRODUCT const char* name = "upcall_stub"; diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 352cfc0018848..c1679cd111f5a 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -1919,6 +1919,11 @@ void Assembler::cmpb(Address dst, int imm8) { emit_int8(imm8); } +void Assembler::cmpb(Register dst, int imm8) { + prefix(dst); + emit_arith_b(0x80, 0xF8, dst, imm8); +} + void Assembler::cmpl(Address dst, int32_t imm32) { InstructionMark im(this); prefix(dst); @@ -8048,6 +8053,14 @@ void Assembler::andpd(XMMRegister dst, XMMRegister src) { emit_int16(0x54, (0xC0 | encode)); } +void Assembler::andnpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_rex_vex_w_reverted(); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x55, (0xC0 | encode)); +} + void Assembler::andps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); @@ -9659,6 +9672,15 @@ void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, emit_int24(0x3A, (0xC0 | encode), imm8 & 0x01); } +void Assembler::evinserti64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8, int vector_len) { + assert(VM_Version::supports_avx512dq(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x38, (0xC0 | encode), imm8 & 0x03); +} + // vinsertf forms @@ -11723,6 +11745,21 @@ void Assembler::vbroadcastf128(XMMRegister dst, Address src, int vector_len) { emit_operand(dst, src, 0); } +void Assembler::evbroadcastf64x2(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_avx512dq(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + assert(dst != xnoreg, "sanity"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T2, /* input_size_in_bits */ EVEX_64bit); + attributes.set_is_evex_instruction(); + // swap src<->dst for encoding + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x1A); + emit_operand(dst, src, 0); +} + + // gpr source broadcast forms // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 7f4790e05665e..eace7bb9cc169 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -1239,6 +1239,7 @@ class Assembler : public AbstractAssembler { void cmpb(Address dst, int imm8); void cmpb(Address dst, Register reg); void cmpb(Register reg, Address dst); + void cmpb(Register reg, int imm8); void cmpl(Address dst, int32_t imm32); void cmpl(Register dst, int32_t imm32); @@ -2631,6 +2632,7 @@ class Assembler : public AbstractAssembler { // Bitwise Logical AND of Packed Floating-Point Values void andpd(XMMRegister dst, XMMRegister src); + void andnpd(XMMRegister dst, XMMRegister src); void andps(XMMRegister dst, XMMRegister src); void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); @@ -2985,6 +2987,7 @@ class Assembler : public AbstractAssembler { void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8); void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); + void evinserti64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8, int vector_len); // vinsertf forms void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); @@ -3034,6 +3037,7 @@ class Assembler : public AbstractAssembler { void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len); void vbroadcastsd(XMMRegister dst, Address src, int vector_len); void vbroadcastf128(XMMRegister dst, Address src, int vector_len); + void evbroadcastf64x2(XMMRegister dst, Address src, int vector_len); // gpr sourced byte/word/dword/qword replicate void evpbroadcastb(XMMRegister dst, Register src, int vector_len); diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index c3444d5a5abce..6d9812c11ae6e 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1578,6 +1578,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { if (op->init_check()) { add_debug_info_for_null_check_here(op->stub()->info()); + // init_state needs acquire, but x86 is TSO, and so we are already good. __ cmpb(Address(op->klass()->as_register(), InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index ff237d16d2216..36e2021138f2e 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -807,7 +807,11 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos || x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan || - x->id() == vmIntrinsics::_dlog10) { + x->id() == vmIntrinsics::_dlog10 +#ifdef _LP64 + || x->id() == vmIntrinsics::_dtanh +#endif + ) { do_LibmIntrinsic(x); return; } @@ -989,11 +993,17 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { break; case vmIntrinsics::_dtan: if (StubRoutines::dtan() != nullptr) { - __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); + __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); } else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); } break; + case vmIntrinsics::_dtanh: + assert(StubRoutines::dtanh() != nullptr, "tanh intrinsic not found"); + if (StubRoutines::dtanh() != nullptr) { + __ call_runtime_leaf(StubRoutines::dtanh(), getThreadTemp(), result_reg, cc->args()); + } + break; default: ShouldNotReachHere(); } #endif // _LP64 diff --git a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp index 1990488d8a0df..44f897529e7ce 100644 --- a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp @@ -80,8 +80,6 @@ int C2FastUnlockLightweightStub::max_size() const { void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { assert(_t == rax, "must be"); - Label restore_held_monitor_count_and_slow_path; - { // Restore lock-stack and handle the unlock in runtime. __ bind(_push_and_slow_path); @@ -91,61 +89,9 @@ void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { __ movptr(Address(_thread, _t), _obj); #endif __ addl(Address(_thread, JavaThread::lock_stack_top_offset()), oopSize); - } - - { // Restore held monitor count and slow path. - - __ bind(restore_held_monitor_count_and_slow_path); - __ bind(_slow_path); - // Restore held monitor count. - __ increment(Address(_thread, JavaThread::held_monitor_count_offset())); - // increment will always result in ZF = 0 (no overflows). + // addl will always result in ZF = 0 (no overflows). __ jmp(slow_path_continuation()); } - - { // Handle monitor medium path. - - __ bind(_check_successor); - - Label fix_zf_and_unlocked; - const Register monitor = _mark; - -#ifndef _LP64 - __ jmpb(restore_held_monitor_count_and_slow_path); -#else // _LP64 - const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); - const Address succ_address(monitor, ObjectMonitor::succ_offset() - monitor_tag); - const Address owner_address(monitor, ObjectMonitor::owner_offset() - monitor_tag); - - // successor null check. - __ cmpptr(succ_address, NULL_WORD); - __ jccb(Assembler::equal, restore_held_monitor_count_and_slow_path); - - // Release lock. - __ movptr(owner_address, NULL_WORD); - - // Fence. - // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. - __ lock(); __ addl(Address(rsp, 0), 0); - - // Recheck successor. - __ cmpptr(succ_address, NULL_WORD); - // Observed a successor after the release -> fence we have handed off the monitor - __ jccb(Assembler::notEqual, fix_zf_and_unlocked); - - // Try to relock, if it fails the monitor has been handed over - // TODO: Caveat, this may fail due to deflation, which does - // not handle the monitor handoff. Currently only works - // due to the responsible thread. - __ xorptr(rax, rax); - __ lock(); __ cmpxchgptr(_thread, owner_address); - __ jccb (Assembler::equal, restore_held_monitor_count_and_slow_path); -#endif - - __ bind(fix_zf_and_unlocked); - __ xorl(rax, rax); - __ jmp(unlocked_continuation()); - } } #undef __ diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index c2801a791cb5a..aba5344b7e434 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -459,87 +459,43 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // IA32's memory-model is SPO, so STs are ordered with respect to // each other and there's no need for an explicit barrier (fence). // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. -#ifndef _LP64 - // Note that we could employ various encoding schemes to reduce - // the number of loads below (currently 4) to just 2 or 3. - // Refer to the comments in synchronizer.cpp. - // In practice the chain of fetches doesn't seem to impact performance, however. - xorptr(boxReg, boxReg); - orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); - jccb (Assembler::notZero, DONE_LABEL); - movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - jccb (Assembler::notZero, DONE_LABEL); - movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); - jmpb (DONE_LABEL); -#else // _LP64 - // It's inflated - Label CheckSucc, LNotRecursive, LSuccess, LGoSlowPath; + Label LSuccess, LNotRecursive; cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0); jccb(Assembler::equal, LNotRecursive); // Recursive inflated unlock - decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); + decrement(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); jmpb(LSuccess); bind(LNotRecursive); - movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - jccb (Assembler::notZero, CheckSucc); - // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. + + // Set owner to null. + // Release to satisfy the JMM movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); - jmpb (DONE_LABEL); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); - // Try to avoid passing control into the slow_path ... - bind (CheckSucc); + // Check if the entry lists are empty. + movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); + orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); + jccb(Assembler::zero, LSuccess); // If so we are done. - // The following optional optimization can be elided if necessary - // Effectively: if (succ == null) goto slow path - // The code reduces the window for a race, however, - // and thus benefits performance. + // Check if there is a successor. cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); - jccb (Assembler::zero, LGoSlowPath); - - xorptr(boxReg, boxReg); - // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. - movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); + jccb(Assembler::notZero, LSuccess); // If so we are done. - // Memory barrier/fence - // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ - // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. - // This is faster on Nehalem and AMD Shanghai/Barcelona. - // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences - // We might also restructure (ST Owner=0;barrier;LD _Succ) to - // (mov box,0; xchgq box, &m->Owner; LD _succ) . - lock(); addl(Address(rsp, 0), 0); + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + andptr(tmpReg, ~(int32_t)markWord::monitor_value); +#ifndef _LP64 + get_thread(boxReg); + movptr(Address(boxReg, JavaThread::unlocked_inflated_monitor_offset()), tmpReg); +#else // _LP64 + movptr(Address(r15_thread, JavaThread::unlocked_inflated_monitor_offset()), tmpReg); +#endif - cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); - jccb (Assembler::notZero, LSuccess); - - // Rare inopportune interleaving - race. - // The successor vanished in the small window above. - // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor. - // We need to ensure progress and succession. - // Try to reacquire the lock. - // If that fails then the new owner is responsible for succession and this - // thread needs to take no further action and can exit via the fast path (success). - // If the re-acquire succeeds then pass control into the slow path. - // As implemented, this latter mode is horrible because we generated more - // coherence traffic on the lock *and* artificially extended the critical section - // length while by virtue of passing control into the slow path. - - // box is really RAX -- the following CMPXCHG depends on that binding - // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R) - lock(); - cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); - // There's no successor so we tried to regrab the lock. - // If that didn't work, then another thread grabbed the - // lock so we're done (and exit was a success). - jccb (Assembler::notEqual, LSuccess); - // Intentional fall-through into slow path - - bind (LGoSlowPath); orl (boxReg, 1); // set ICC.ZF=0 to indicate failure jmpb (DONE_LABEL); @@ -547,7 +503,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t testl (boxReg, 0); // set ICC.ZF=1 to indicate success jmpb (DONE_LABEL); -#endif if (LockingMode == LM_LEGACY) { bind (Stacked); movptr(tmpReg, Address (boxReg, 0)); // re-fetch @@ -744,10 +699,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // Handle inflated monitor. Label inflated, inflated_check_lock_stack; // Finish fast unlock successfully. MUST jump with ZF == 1 - Label unlocked; - - // Assume success. - decrement(Address(thread, JavaThread::held_monitor_count_offset())); + Label unlocked, slow_path; const Register mark = t; const Register monitor = t; @@ -763,8 +715,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, } Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path(); - Label& check_successor = stub == nullptr ? dummy : stub->check_successor(); - Label& slow_path = stub == nullptr ? dummy : stub->slow_path(); { // Lightweight Unlock @@ -839,6 +789,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag}; const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag}; + const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag}; const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag}; const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag}; @@ -846,27 +797,42 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // Check if recursive. cmpptr(recursions_address, 0); - jccb(Assembler::notEqual, recursive); + jccb(Assembler::notZero, recursive); + + // Set owner to null. + // Release to satisfy the JMM + movptr(owner_address, NULL_WORD); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); // Check if the entry lists are empty. movptr(reg_rax, cxq_address); orptr(reg_rax, EntryList_address); - jcc(Assembler::notZero, check_successor); + jccb(Assembler::zero, unlocked); // If so we are done. - // Release lock. - movptr(owner_address, NULL_WORD); - jmpb(unlocked); + // Check if there is a successor. + cmpptr(succ_address, NULL_WORD); + jccb(Assembler::notZero, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + if (!UseObjectMonitorTable) { + andptr(monitor, ~(int32_t)markWord::monitor_value); + } + movptr(Address(thread, JavaThread::unlocked_inflated_monitor_offset()), monitor); + + orl(t, 1); // Fast Unlock ZF = 0 + jmpb(slow_path); // Recursive unlock. bind(recursive); decrement(recursions_address); - xorl(t, t); } bind(unlocked); - if (stub != nullptr) { - bind(stub->unlocked_continuation()); - } + decrement(Address(thread, JavaThread::held_monitor_count_offset())); + xorl(t, t); // Fast Unlock ZF = 1 #ifdef ASSERT // Check that unlocked label is reached with ZF set. @@ -875,6 +841,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, stop("Fast Unlock ZF != 1"); #endif + bind(slow_path); if (stub != nullptr) { bind(stub->slow_path_continuation()); } diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index b52be627776b8..b6be4012519a0 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -38,7 +38,10 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> @@ -160,6 +163,56 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator } } +static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register thread, const Register value, const Register temp) { + // This code assumes that buffer index is pointer sized. + STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t)); + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + __ movptr(temp, Address(thread, in_bytes(index_offset))); // temp := *(index address) + __ testptr(temp, temp); // index == 0? + __ jcc(Assembler::zero, runtime); // jump to runtime if index == 0 (full buffer) + // The buffer is not full, store value into it. + __ subptr(temp, wordSize); // temp := next index + __ movptr(Address(thread, in_bytes(index_offset)), temp); // *(index address) := next index + __ addptr(temp, Address(thread, in_bytes(buffer_offset))); // temp := buffer address + next index + __ movptr(Address(temp, 0), value); // *(buffer address + next index) := value +} + +static void generate_pre_barrier_fast_path(MacroAssembler* masm, + const Register thread) { + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ cmpl(in_progress, 0); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ cmpb(in_progress, 0); + } +} + +static void generate_pre_barrier_slow_path(MacroAssembler* masm, + const Register obj, + const Register pre_val, + const Register thread, + const Register tmp, + Label& done, + Label& runtime) { + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } + // Is the previous value null? + __ cmpptr(pre_val, NULL_WORD); + __ jcc(Assembler::equal, done); + generate_queue_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, + thread, pre_val, tmp); + __ jmp(done); +} + void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Register obj, Register pre_val, @@ -185,43 +238,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, assert(pre_val != rax, "check this code"); } - Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); - Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); - - // Is marking active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ cmpl(in_progress, 0); - } else { - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ cmpb(in_progress, 0); - } - __ jcc(Assembler::equal, done); - - // Do we need to load the previous value? - if (obj != noreg) { - __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); - } - - // Is the previous value null? - __ cmpptr(pre_val, NULL_WORD); + generate_pre_barrier_fast_path(masm, thread); + // If marking is not active (*(mark queue active address) == 0), jump to done __ jcc(Assembler::equal, done); - - // Can we store original value in the thread's buffer? - // Is index == 0? - // (The index field is typed as size_t.) - - __ movptr(tmp, index); // tmp := *index_adr - __ cmpptr(tmp, 0); // tmp == 0? - __ jcc(Assembler::equal, runtime); // If yes, goto runtime - - __ subptr(tmp, wordSize); // tmp := tmp - wordSize - __ movptr(index, tmp); // *index_adr := tmp - __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr - - // Record the previous value - __ movptr(Address(tmp, 0), pre_val); - __ jmp(done); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done, runtime); __ bind(runtime); @@ -263,6 +283,54 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, __ bind(done); } +static void generate_post_barrier_fast_path(MacroAssembler* masm, + const Register store_addr, + const Register new_val, + const Register tmp, + const Register tmp2, + Label& done, + bool new_val_may_be_null) { + CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); + // Does store cross heap regions? + __ movptr(tmp, store_addr); // tmp := store address + __ xorptr(tmp, new_val); // tmp := store address ^ new value + __ shrptr(tmp, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0? + __ jcc(Assembler::equal, done); + // Crosses regions, storing null? + if (new_val_may_be_null) { + __ cmpptr(new_val, NULL_WORD); // new value == null? + __ jcc(Assembler::equal, done); + } + // Storing region crossing non-null, is card young? + __ movptr(tmp, store_addr); // tmp := store address + __ shrptr(tmp, CardTable::card_shift()); // tmp := card address relative to card table base + // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT + // a valid address and therefore is not properly handled by the relocation code. + __ movptr(tmp2, (intptr_t)ct->card_table()->byte_map_base()); // tmp2 := card table base address + __ addptr(tmp, tmp2); // tmp := card address + __ cmpb(Address(tmp, 0), G1CardTable::g1_young_card_val()); // *(card address) == young_card_val? +} + +static void generate_post_barrier_slow_path(MacroAssembler* masm, + const Register thread, + const Register tmp, + const Register tmp2, + Label& done, + Label& runtime) { + __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); // StoreLoad membar + __ cmpb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) == dirty_card_val? + __ jcc(Assembler::equal, done); + // Storing a region crossing, non-null oop, card is clean. + // Dirty card and log. + __ movb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val + generate_queue_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, + thread, tmp, tmp2); + __ jmp(done); +} + void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, @@ -273,74 +341,125 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, assert(thread == r15_thread, "must be"); #endif // _LP64 - Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); - - CardTableBarrierSet* ct = - barrier_set_cast(BarrierSet::barrier_set()); - Label done; Label runtime; - // Does store cross heap regions? - - __ movptr(tmp, store_addr); - __ xorptr(tmp, new_val); - __ shrptr(tmp, G1HeapRegion::LogOfHRGrainBytes); + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, done, true /* new_val_may_be_null */); + // If card is young, jump to done __ jcc(Assembler::equal, done); + generate_post_barrier_slow_path(masm, thread, tmp, tmp2, done, runtime); - // crosses regions, storing null? + __ bind(runtime); + // save the live input values + RegSet saved = RegSet::of(store_addr NOT_LP64(COMMA thread)); + __ push_set(saved); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp, thread); + __ pop_set(saved); - __ cmpptr(new_val, NULL_WORD); - __ jcc(Assembler::equal, done); + __ bind(done); +} - // storing region crossing non-null, is card already dirty? +#if defined(COMPILER2) - const Register card_addr = tmp; - const Register cardtable = tmp2; +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { +#ifdef _LP64 + SaveLiveRegisters save_registers(masm, stub); + if (c_rarg0 != arg) { + __ mov(c_rarg0, arg); + } + __ mov(c_rarg1, r15_thread); + // rax is a caller-saved, non-argument-passing register, so it does not + // interfere with c_rarg0 or c_rarg1. If it contained any live value before + // entering this stub, it is saved at this point, and restored after the + // call. If it did not contain any live value, it is free to be used. In + // either case, it is safe to use it here as a call scratch register. + __ call(RuntimeAddress(runtime_path), rax); +#else + Unimplemented(); +#endif // _LP64 +} - __ movptr(card_addr, store_addr); - __ shrptr(card_addr, CardTable::card_shift()); - // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT - // a valid address and therefore is not properly handled by the relocation code. - __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base()); - __ addptr(card_addr, cardtable); +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + G1PreBarrierStubC2* stub) { +#ifdef _LP64 + assert(thread == r15_thread, "must be"); +#endif // _LP64 + assert(pre_val != noreg, "check this code"); + if (obj != noreg) { + assert_different_registers(obj, pre_val, tmp); + } - __ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val()); - __ jcc(Assembler::equal, done); + stub->initialize_registers(obj, pre_val, thread, tmp); - __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); - __ cmpb(Address(card_addr, 0), G1CardTable::dirty_card_val()); - __ jcc(Assembler::equal, done); + generate_pre_barrier_fast_path(masm, thread); + // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) + __ jcc(Assembler::notEqual, *stub->entry()); + __ bind(*stub->continuation()); +} - // storing a region crossing, non-null oop, card is clean. - // dirty card and log. +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register thread = stub->thread(); + Register tmp = stub->tmp1(); + assert(stub->tmp2() == noreg, "not needed in this platform"); - __ movb(Address(card_addr, 0), G1CardTable::dirty_card_val()); + __ bind(*stub->entry()); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation(), runtime); - // The code below assumes that buffer index is pointer sized. - STATIC_ASSERT(in_bytes(G1DirtyCardQueue::byte_width_of_index()) == sizeof(intptr_t)); + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); + __ jmp(*stub->continuation()); +} - __ movptr(tmp2, queue_index); - __ testptr(tmp2, tmp2); - __ jcc(Assembler::zero, runtime); - __ subptr(tmp2, wordSize); - __ movptr(queue_index, tmp2); - __ addptr(tmp2, buffer); - __ movptr(Address(tmp2, 0), card_addr); - __ jmp(done); +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2, + G1PostBarrierStubC2* stub) { +#ifdef _LP64 + assert(thread == r15_thread, "must be"); +#endif // _LP64 - __ bind(runtime); - // save the live input values - RegSet saved = RegSet::of(store_addr NOT_LP64(COMMA thread)); - __ push_set(saved); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); - __ pop_set(saved); + stub->initialize_registers(thread, tmp, tmp2); - __ bind(done); + bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0; + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, *stub->continuation(), new_val_may_be_null); + // If card is not young, jump to stub (slow path) + __ jcc(Assembler::notEqual, *stub->entry()); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register thread = stub->thread(); + Register tmp = stub->tmp1(); // tmp holds the card address. + Register tmp2 = stub->tmp2(); + assert(stub->tmp3() == noreg, "not needed in this platform"); + + __ bind(*stub->entry()); + generate_post_barrier_slow_path(masm, thread, tmp, tmp2, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, tmp, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)); + __ jmp(*stub->continuation()); } +#endif // COMPILER2 + void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { bool in_heap = (decorators & IN_HEAP) != 0; diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp index a5695f5657a4a..4dbb1efd885ea 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp @@ -32,6 +32,9 @@ class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1BarrierStubC2; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -65,6 +68,26 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp_thread); + +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2, + G1PostBarrierStubC2* c2_stub); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif // COMPILER2 }; #endif // CPU_X86_GC_G1_G1BARRIERSETASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad new file mode 100644 index 0000000000000..8c1559f90f46d --- /dev/null +++ b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad @@ -0,0 +1,371 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_x86.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void write_barrier_pre(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp, + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, r15_thread, tmp, stub); +} + +static void write_barrier_post(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, r15_thread, tmp1, tmp2, stub); +} + +%} + +instruct g1StoreP(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(125); // XXX + format %{ "movq $mem, $src\t# ptr" %} + ins_encode %{ + // Materialize the store address internally (as opposed to defining 'mem' as + // an indirect memory operand) to reduce the overhead of LCM when processing + // large basic blocks with many stores. Such basic blocks arise, for + // instance, from static initializations of large String arrays. + // The same holds for g1StoreN and g1EncodePAndStoreN. + __ lea($tmp1$$Register, $mem$$Address); + write_barrier_pre(masm, this, + $tmp1$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($tmp1$$Register, $src$$Register) /* preserve */); + __ movq(Address($tmp1$$Register, 0), $src$$Register); + write_barrier_post(masm, this, + $tmp1$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp3$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(ialu_mem_reg); +%} + +instruct g1StoreN(memory mem, rRegN src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(125); // XXX + format %{ "movl $mem, $src\t# ptr" %} + ins_encode %{ + __ lea($tmp1$$Register, $mem$$Address); + write_barrier_pre(masm, this, + $tmp1$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($tmp1$$Register, $src$$Register) /* preserve */); + __ movl(Address($tmp1$$Register, 0), $src$$Register); + if ((barrier_data() & G1C2BarrierPost) != 0) { + __ movl($tmp2$$Register, $src$$Register); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ decode_heap_oop($tmp2$$Register); + } else { + __ decode_heap_oop_not_null($tmp2$$Register); + } + } + write_barrier_post(masm, this, + $tmp1$$Register /* store_addr */, + $tmp2$$Register /* new_val */, + $tmp3$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(ialu_mem_reg); +%} + +instruct g1EncodePAndStoreN(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(125); // XXX + format %{ "encode_heap_oop $src\n\t" + "movl $mem, $src\t# ptr" %} + ins_encode %{ + __ lea($tmp1$$Register, $mem$$Address); + write_barrier_pre(masm, this, + $tmp1$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($tmp1$$Register, $src$$Register) /* preserve */); + __ movq($tmp2$$Register, $src$$Register); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ encode_heap_oop($tmp2$$Register); + } else { + __ encode_heap_oop_not_null($tmp2$$Register); + } + __ movl(Address($tmp1$$Register, 0), $tmp2$$Register); + write_barrier_post(masm, this, + $tmp1$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp3$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(ialu_mem_reg); +%} + +instruct g1CompareAndExchangeP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegP oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set oldval (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + format %{ "lock\n\t" + "cmpxchgq $newval, $mem" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */); + __ movq($tmp1$$Register, $newval$$Register); + __ lock(); + __ cmpxchgq($tmp1$$Register, Address($mem$$Register, 0)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1CompareAndExchangeN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegN oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set oldval (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + format %{ "lock\n\t" + "cmpxchgq $newval, $mem" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */); + __ movl($tmp1$$Register, $newval$$Register); + __ lock(); + __ cmpxchgl($tmp1$$Register, Address($mem$$Register, 0)); + __ decode_heap_oop($tmp1$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1CompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegP oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL oldval, KILL cr); + format %{ "lock\n\t" + "cmpxchgq $newval, $mem\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ movq($tmp1$$Register, $newval$$Register); + __ lock(); + __ cmpxchgq($tmp1$$Register, Address($mem$$Register, 0)); + __ setb(Assembler::equal, $res$$Register); + __ movzbl($res$$Register, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1CompareAndSwapN(rRegI res, indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegN oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL oldval, KILL cr); + format %{ "lock\n\t" + "cmpxchgq $newval, $mem\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ movl($tmp1$$Register, $newval$$Register); + __ lock(); + __ cmpxchgl($tmp1$$Register, Address($mem$$Register, 0)); + __ setb(Assembler::equal, $res$$Register); + __ movzbl($res$$Register, $res$$Register); + __ decode_heap_oop($tmp1$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1GetAndSetP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set newval (GetAndSetP mem newval)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + format %{ "xchgq $newval, $mem" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + __ movq($tmp1$$Register, $newval$$Register); + __ xchgq($newval$$Register, Address($mem$$Register, 0)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1GetAndSetN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set newval (GetAndSetN mem newval)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + format %{ "xchgq $newval, $mem" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + __ movl($tmp1$$Register, $newval$$Register); + __ decode_heap_oop($tmp1$$Register); + __ xchgl($newval$$Register, Address($mem$$Register, 0)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1LoadP(rRegP dst, memory mem, rRegP tmp, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp, KILL cr); + ins_cost(125); // XXX + format %{ "movq $dst, $mem\t# ptr" %} + ins_encode %{ + __ movq($dst$$Register, $mem$$Address); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp$$Register /* tmp */); + %} + ins_pipe(ialu_reg_mem); // XXX +%} + +instruct g1LoadN(rRegN dst, memory mem, rRegP tmp1, rRegP tmp2, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadN mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(125); // XXX + format %{ "movl $dst, $mem\t# compressed ptr" %} + ins_encode %{ + __ movl($dst$$Register, $mem$$Address); + __ movl($tmp1$$Register, $dst$$Register); + __ decode_heap_oop($tmp1$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp */); + %} + ins_pipe(ialu_reg_mem); // XXX +%} diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp index 47078dff90738..a7682fe0c3879 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp @@ -163,12 +163,12 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec assert(dst == rsi, "expected"); assert(count == rdx, "expected"); if (UseCompressedOops) { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop), src, dst, count); } else #endif { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count); } @@ -296,9 +296,9 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, __ push(thread); __ push(pre_val); #endif - __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); + __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), 2); } else { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); } NOT_LP64( __ pop(thread); ) @@ -925,7 +925,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss // load the pre-value __ load_parameter(0, rcx); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), rcx, thread); __ restore_live_registers(true); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 893ae4e844ba4..018258a012e57 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -5084,7 +5084,8 @@ void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fa L_slow_path = &L_fallthrough; } - // Fast path check: class is fully initialized + // Fast path check: class is fully initialized. + // init_state needs acquire, but x86 is TSO, and so we are already good. cmpb(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); jcc(Assembler::equal, *L_fast_path); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp index 439c17b10d37a..09d379a4296d4 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp @@ -81,8 +81,8 @@ void MacroAssembler::fast_md5(Register buf, Address state, Address ofs, Address notl(rsi); \ andl(rdi, r2); \ andl(rsi, r3); \ - orl(rsi, rdi); \ addl(r1, rsi); \ + addl(r1, rdi); \ roll(r1, s); \ addl(r1, r2); diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp index 4bd91f640fca7..174e2e0277903 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp @@ -2674,7 +2674,7 @@ void SharedRuntime::generate_deopt_blob() { int reexecute_offset = __ pc() - start; #if INCLUDE_JVMCI && !defined(COMPILER1) - if (EnableJVMCI && UseJVMCICompiler) { + if (UseJVMCICompiler) { // JVMCI does not use this kind of deoptimization __ should_not_reach_here(); } diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 2bc4a0a9cba94..ee6311c25f6fe 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.hpp" +#include "classfile/javaClasses.hpp" #include "classfile/vmIntrinsics.hpp" #include "compiler/oopMap.hpp" #include "gc/shared/barrierSet.hpp" @@ -3573,6 +3574,9 @@ void StubGenerator::generate_libm_stubs() { if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { StubRoutines::_dtan = generate_libmTan(); // from stubGenerator_x86_64_tan.cpp } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtanh)) { + StubRoutines::_dtanh = generate_libmTanh(); // from stubGenerator_x86_64_tanh.cpp + } if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) { StubRoutines::_dexp = generate_libmExp(); // from stubGenerator_x86_64_exp.cpp } @@ -3793,6 +3797,28 @@ address StubGenerator::generate_upcall_stub_exception_handler() { return start; } +// load Method* target of MethodHandle +// j_rarg0 = jobject receiver +// rbx = result +address StubGenerator::generate_upcall_stub_load_target() { + StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target"); + address start = __ pc(); + + __ resolve_global_jobject(j_rarg0, r15_thread, rscratch1); + // Load target method from receiver + __ load_heap_oop(rbx, Address(j_rarg0, java_lang_invoke_MethodHandle::form_offset()), rscratch1); + __ load_heap_oop(rbx, Address(rbx, java_lang_invoke_LambdaForm::vmentry_offset()), rscratch1); + __ load_heap_oop(rbx, Address(rbx, java_lang_invoke_MemberName::method_offset()), rscratch1); + __ access_load_at(T_ADDRESS, IN_HEAP, rbx, + Address(rbx, java_lang_invoke_ResolvedMethodName::vmtarget_offset()), + noreg, noreg); + __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized + + __ ret(0); + + return start; +} + address StubGenerator::generate_lookup_secondary_supers_table_stub(u1 super_klass_index) { StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table"); @@ -3952,6 +3978,7 @@ void StubGenerator::generate_final_stubs() { } StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler(); + StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target(); } void StubGenerator::generate_compiler_stubs() { @@ -4157,41 +4184,41 @@ void StubGenerator::generate_compiler_stubs() { log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "jsvml" JNI_LIB_SUFFIX, p2i(libjsvml)); if (UseAVX > 2) { - for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) { - int vop = VectorSupport::VECTOR_OP_SVML_START + op; + for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { + int vop = VectorSupport::VECTOR_OP_MATH_START + op; if ((!VM_Version::supports_avx512dq()) && (vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) { continue; } - snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::svmlname[op]); + snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::mathname[op]); StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf); - snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::svmlname[op]); + snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::mathname[op]); StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf); } } const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex"); - for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) { - int vop = VectorSupport::VECTOR_OP_SVML_START + op; + for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { + int vop = VectorSupport::VECTOR_OP_MATH_START + op; if (vop == VectorSupport::VECTOR_OP_POW) { continue; } - snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str); + snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str); StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf); - snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str); + snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str); StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf); - snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::svmlname[op], avx_sse_str); + snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::mathname[op], avx_sse_str); StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf); - snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::svmlname[op], avx_sse_str); + snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::mathname[op], avx_sse_str); StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf); - snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::svmlname[op], avx_sse_str); + snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::mathname[op], avx_sse_str); StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf); - snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::svmlname[op], avx_sse_str); + snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::mathname[op], avx_sse_str); StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf); } } diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp index d65c681585d6d..7280e9fbe957e 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp @@ -376,11 +376,22 @@ class StubGenerator: public StubCodeGenerator { void roundDec(XMMRegister key, int rnum); void lastroundDec(XMMRegister key, int rnum); void gfmul_avx512(XMMRegister ghash, XMMRegister hkey); - void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl, Register rscratch); - void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, - XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction, - XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos, - bool final_reduction, int index, XMMRegister counter_inc_mask); + void ghash16_encrypt_parallel16_avx512(Register in, Register out, Register ct, Register pos, Register avx512_subkeyHtbl, + Register CTR_CHECK, Register NROUNDS, Register key, XMMRegister CTR, XMMRegister GHASH, + XMMRegister ADDBE_4x4, XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK, + bool hk_broadcast, bool is_hash_start, bool do_hash_reduction, bool do_hash_hxor, + bool no_ghash_in, int ghashin_offset, int aesout_offset, int hashkey_offset); + void generateHtbl_32_blocks_avx512(Register htbl, Register avx512_htbl); + void initial_blocks_16_avx512(Register in, Register out, Register ct, Register pos, Register key, Register avx512_subkeyHtbl, + Register CTR_CHECK, Register rounds, XMMRegister CTR, XMMRegister GHASH, XMMRegister ADDBE_4x4, + XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK, int stack_offset); + void gcm_enc_dec_last_avx512(Register len, Register in, Register pos, XMMRegister HASH, XMMRegister SHUFM, Register subkeyHtbl, + int ghashin_offset, int hashkey_offset, bool start_ghash, bool do_reduction); + void ghash16_avx512(bool start_ghash, bool do_reduction, bool uload_shuffle, bool hk_broadcast, bool do_hxor, + Register in, Register pos, Register subkeyHtbl, XMMRegister HASH, XMMRegister SHUFM, int in_offset, + int in_disp, int displacement, int hashkey_offset); + void aesgcm_avx512(Register in, Register len, Register ct, Register out, Register key, + Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter); // AVX2 AES-GCM related functions void initial_blocks_avx2(XMMRegister ctr, Register rounds, Register key, Register len, Register in, Register out, Register ct, XMMRegister aad_hashx, Register pos); @@ -546,6 +557,7 @@ class StubGenerator: public StubCodeGenerator { address generate_libmSin(); address generate_libmCos(); address generate_libmTan(); + address generate_libmTanh(); address generate_libmExp(); address generate_libmPow(); address generate_libmLog(); @@ -608,6 +620,7 @@ class StubGenerator: public StubCodeGenerator { // shared exception handler for FFM upcall stubs address generate_upcall_stub_exception_handler(); + address generate_upcall_stub_load_target(); // Specialized stub implementations for UseSecondarySupersTable. address generate_lookup_secondary_supers_table_stub(u1 super_klass_index); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp index 9744169498c8b..f14d368c376e1 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2019, 2023, Intel Corporation. All rights reserved. +* Copyright (c) 2019, 2024, Intel Corporation. All rights reserved. * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -172,6 +172,38 @@ static address ghash_polynomial_two_one_addr() { return (address)GHASH_POLYNOMIAL_TWO_ONE; } +// This mask is used for incrementing counter value +ATTRIBUTE_ALIGNED(64) static const uint64_t COUNTER_MASK_ADDBE_4444[] = { + 0x0000000000000000ULL, 0x0400000000000000ULL, + 0x0000000000000000ULL, 0x0400000000000000ULL, + 0x0000000000000000ULL, 0x0400000000000000ULL, + 0x0000000000000000ULL, 0x0400000000000000ULL, +}; +static address counter_mask_addbe_4444_addr() { + return (address)COUNTER_MASK_ADDBE_4444; +} + +// This mask is used for incrementing counter value +ATTRIBUTE_ALIGNED(64) static const uint64_t COUNTER_MASK_ADDBE_1234[] = { + 0x0000000000000000ULL, 0x0100000000000000ULL, + 0x0000000000000000ULL, 0x0200000000000000ULL, + 0x0000000000000000ULL, 0x0300000000000000ULL, + 0x0000000000000000ULL, 0x0400000000000000ULL, +}; +static address counter_mask_addbe_1234_addr() { + return (address)COUNTER_MASK_ADDBE_1234; +} + +// This mask is used for incrementing counter value +ATTRIBUTE_ALIGNED(64) static const uint64_t COUNTER_MASK_ADD_1234[] = { + 0x0000000000000001ULL, 0x0000000000000000ULL, + 0x0000000000000002ULL, 0x0000000000000000ULL, + 0x0000000000000003ULL, 0x0000000000000000ULL, + 0x0000000000000004ULL, 0x0000000000000000ULL, +}; +static address counter_mask_add_1234_addr() { + return (address)COUNTER_MASK_ADD_1234; +} // AES intrinsic stubs @@ -209,10 +241,10 @@ void StubGenerator::generate_aes_stubs() { // len = rdx (c_rarg1) | rdi (c_rarg1) // ct = r8 (c_rarg2) | rdx (c_rarg2) // out = r9 (c_rarg3) | rcx (c_rarg3) -// key = r10 | r8 (c_rarg4) -// state = r13 | r9 (c_rarg5) -// subkeyHtbl = r14 | r11 -// counter = rsi | r12 +// key = rsi | r8 (c_rarg4) +// state = rdi | r9 (c_rarg5) +// subkeyHtbl = r10 | r10 +// counter = r11 | r11 // // Output: // rax - number of processed bytes @@ -230,31 +262,31 @@ address StubGenerator::generate_galoisCounterMode_AESCrypt() { const Register key = c_rarg4; const Register state = c_rarg5; const Address subkeyH_mem(rbp, 2 * wordSize); - const Register subkeyHtbl = r11; - const Register avx512_subkeyHtbl = r13; + const Register subkeyHtbl = r10; + const Register avx512_subkeyHtbl = r12; const Address counter_mem(rbp, 3 * wordSize); - const Register counter = r12; + const Register counter = r11; #else const Address key_mem(rbp, 6 * wordSize); - const Register key = r10; + const Register key = rsi; const Address state_mem(rbp, 7 * wordSize); - const Register state = r13; + const Register state = rdi; const Address subkeyH_mem(rbp, 8 * wordSize); - const Register subkeyHtbl = r14; + const Register subkeyHtbl = r10; const Register avx512_subkeyHtbl = r12; const Address counter_mem(rbp, 9 * wordSize); - const Register counter = rsi; + const Register counter = r11; #endif __ enter(); // Save state before entering routine - __ push(r12); - __ push(r13); - __ push(r14); - __ push(r15); - __ push(rbx); + __ push(r12);//holds pointer to avx512_subkeyHtbl + __ push(r14);//holds CTR_CHECK value to check for overflow + __ push(r15);//holds number of rounds + __ push(rbx);//scratch register #ifdef _WIN64 // on win64, fill len_reg from stack position __ push(rsi); + __ push(rdi); __ movptr(key, key_mem); __ movptr(state, state_mem); #endif @@ -262,24 +294,24 @@ address StubGenerator::generate_galoisCounterMode_AESCrypt() { __ movptr(counter, counter_mem); // Align stack __ andq(rsp, -64); - __ subptr(rsp, 96 * longSize); // Create space on the stack for htbl entries + __ subptr(rsp, 200 * longSize); // Create space on the stack for 64 htbl entries and 8 zmm AES entries __ movptr(avx512_subkeyHtbl, rsp); - aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, avx512_subkeyHtbl, counter); + aesgcm_avx512(in, len, ct, out, key, state, subkeyHtbl, avx512_subkeyHtbl, counter); __ vzeroupper(); // Restore state before leaving routine #ifdef _WIN64 __ lea(rsp, Address(rbp, -6 * wordSize)); + __ pop(rdi); __ pop(rsi); #else - __ lea(rsp, Address(rbp, -5 * wordSize)); + __ lea(rsp, Address(rbp, -4 * wordSize)); #endif __ pop(rbx); __ pop(r15); __ pop(r14); - __ pop(r13); __ pop(r12); __ leave(); // required for proper stackwalking of RuntimeStub frame @@ -2708,87 +2740,100 @@ void StubGenerator::gfmul_avx512(XMMRegister GH, XMMRegister HK) { __ vpternlogq(GH, 0x96, TMP1, TMP2, Assembler::AVX_512bit); } -void StubGenerator::generateHtbl_48_block_zmm(Register htbl, Register avx512_htbl, Register rscratch) { +// Holds 64 Htbl entries, 32 HKey and 32 HkKey (derived from HKey) +void StubGenerator::generateHtbl_32_blocks_avx512(Register htbl, Register avx512_htbl) { const XMMRegister HK = xmm6; - const XMMRegister ZT5 = xmm4; - const XMMRegister ZT7 = xmm7; - const XMMRegister ZT8 = xmm8; - - Label GFMUL_AVX512; + const XMMRegister ZT1 = xmm0, ZT2 = xmm1, ZT3 = xmm2, ZT4 = xmm3; + const XMMRegister ZT5 = xmm4, ZT6 = xmm5, ZT7 = xmm7, ZT8 = xmm8; + const XMMRegister ZT10 = xmm10, ZT11 = xmm11, ZT12 = xmm12; __ movdqu(HK, Address(htbl, 0)); - __ movdqu(xmm10, ExternalAddress(ghash_long_swap_mask_addr()), rscratch); - __ vpshufb(HK, HK, xmm10, Assembler::AVX_128bit); - - __ movdqu(xmm11, ExternalAddress(ghash_polynomial_addr()), rscratch); - __ movdqu(xmm12, ExternalAddress(ghash_polynomial_two_one_addr()), rscratch); + __ movdqu(ZT10, ExternalAddress(ghash_long_swap_mask_addr()), r15); + __ vpshufb(HK, HK, ZT10, Assembler::AVX_128bit); + __ movdqu(ZT11, ExternalAddress(ghash_polynomial_addr()), r15); + __ movdqu(ZT12, ExternalAddress(ghash_polynomial_two_one_addr()), r15); // Compute H ^ 2 from the input subkeyH - __ movdqu(xmm2, xmm6); - __ vpsllq(xmm6, xmm6, 1, Assembler::AVX_128bit); - __ vpsrlq(xmm2, xmm2, 63, Assembler::AVX_128bit); - __ movdqu(xmm1, xmm2); - __ vpslldq(xmm2, xmm2, 8, Assembler::AVX_128bit); - __ vpsrldq(xmm1, xmm1, 8, Assembler::AVX_128bit); - __ vpor(xmm6, xmm6, xmm2, Assembler::AVX_128bit); + __ movdqu(ZT3, HK); + __ vpsllq(HK, HK, 1, Assembler::AVX_128bit); + __ vpsrlq(ZT3, ZT3, 63, Assembler::AVX_128bit); + __ movdqu(ZT2, ZT3); + __ vpslldq(ZT3, ZT3, 8, Assembler::AVX_128bit); + __ vpsrldq(ZT2, ZT2, 8, Assembler::AVX_128bit); + __ vpor(HK, HK, ZT3, Assembler::AVX_128bit); + __ vpshufd(ZT3, ZT2, 0x24, Assembler::AVX_128bit); + __ vpcmpeqd(ZT3, ZT3, ZT12, Assembler::AVX_128bit); + __ vpand(ZT3, ZT3, ZT11, Assembler::AVX_128bit); + __ vpxor(HK, HK, ZT3, Assembler::AVX_128bit); + __ movdqu(Address(avx512_htbl, 16 * 31), HK); // H ^ 2 - __ vpshufd(xmm2, xmm1, 0x24, Assembler::AVX_128bit); - __ vpcmpeqd(xmm2, xmm2, xmm12, Assembler::AVX_128bit); - __ vpand(xmm2, xmm2, xmm11, Assembler::AVX_128bit); - __ vpxor(xmm6, xmm6, xmm2, Assembler::AVX_128bit); - __ movdqu(Address(avx512_htbl, 16 * 47), xmm6); // H ^ 2 - // Compute the remaining three powers of H using XMM registers and all following powers using ZMM __ movdqu(ZT5, HK); - __ vinserti32x4(ZT7, ZT7, HK, 3); + __ evinserti64x2(ZT7, ZT7, HK, 3, Assembler::AVX_512bit); + //calculate HashKey ^ 2 << 1 mod poly gfmul_avx512(ZT5, HK); - __ movdqu(Address(avx512_htbl, 16 * 46), ZT5); // H ^ 2 * 2 - __ vinserti32x4(ZT7, ZT7, ZT5, 2); + __ movdqu(Address(avx512_htbl, 16 * 30), ZT5); + __ evinserti64x2(ZT7, ZT7, ZT5, 2, Assembler::AVX_512bit); + //calculate HashKey ^ 3 << 1 mod poly gfmul_avx512(ZT5, HK); - __ movdqu(Address(avx512_htbl, 16 * 45), ZT5); // H ^ 2 * 3 - __ vinserti32x4(ZT7, ZT7, ZT5, 1); + __ movdqu(Address(avx512_htbl, 16 * 29), ZT5); + __ evinserti64x2(ZT7, ZT7, ZT5, 1, Assembler::AVX_512bit); + //calculate HashKey ^ 4 << 1 mod poly gfmul_avx512(ZT5, HK); - __ movdqu(Address(avx512_htbl, 16 * 44), ZT5); // H ^ 2 * 4 - __ vinserti32x4(ZT7, ZT7, ZT5, 0); - - __ evshufi64x2(ZT5, ZT5, ZT5, 0x00, Assembler::AVX_512bit); - __ evmovdquq(ZT8, ZT7, Assembler::AVX_512bit); - gfmul_avx512(ZT7, ZT5); - __ evmovdquq(Address(avx512_htbl, 16 * 40), ZT7, Assembler::AVX_512bit); - __ evshufi64x2(ZT5, ZT7, ZT7, 0x00, Assembler::AVX_512bit); - gfmul_avx512(ZT8, ZT5); - __ evmovdquq(Address(avx512_htbl, 16 * 36), ZT8, Assembler::AVX_512bit); + __ movdqu(Address(avx512_htbl, 16 * 28), ZT5); + __ evinserti64x2(ZT7, ZT7, ZT5, 0, Assembler::AVX_512bit); + // ZT5 amd ZT7 to be cleared(hash key) + //calculate HashKeyK = HashKey x POLY + __ evmovdquq(xmm11, ExternalAddress(ghash_polynomial_addr()), Assembler::AVX_512bit, r15); + __ evpclmulqdq(ZT1, ZT7, xmm11, 0x10, Assembler::AVX_512bit); + __ vpshufd(ZT2, ZT7, 78, Assembler::AVX_512bit); + __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit); + __ evmovdquq(Address(avx512_htbl, 16 * 60), ZT1, Assembler::AVX_512bit); + //**ZT1 amd ZT2 to be cleared(hash key) + + //switch to 4x128 - bit computations now + __ evshufi64x2(ZT5, ZT5, ZT5, 0x00, Assembler::AVX_512bit); //;; broadcast HashKey ^ 4 across all ZT5 + __ evmovdquq(ZT8, ZT7, Assembler::AVX_512bit);//; save HashKey ^ 4 to HashKey ^ 1 in ZT8 + //**ZT8 to be cleared(hash key) + + //calculate HashKey ^ 5 << 1 mod poly, HashKey ^ 6 << 1 mod poly, ... HashKey ^ 8 << 1 mod poly gfmul_avx512(ZT7, ZT5); - __ evmovdquq(Address(avx512_htbl, 16 * 32), ZT7, Assembler::AVX_512bit); - gfmul_avx512(ZT8, ZT5); - __ evmovdquq(Address(avx512_htbl, 16 * 28), ZT8, Assembler::AVX_512bit); - gfmul_avx512(ZT7, ZT5); - __ evmovdquq(Address(avx512_htbl, 16 * 24), ZT7, Assembler::AVX_512bit); - gfmul_avx512(ZT8, ZT5); - __ evmovdquq(Address(avx512_htbl, 16 * 20), ZT8, Assembler::AVX_512bit); - gfmul_avx512(ZT7, ZT5); - __ evmovdquq(Address(avx512_htbl, 16 * 16), ZT7, Assembler::AVX_512bit); - gfmul_avx512(ZT8, ZT5); - __ evmovdquq(Address(avx512_htbl, 16 * 12), ZT8, Assembler::AVX_512bit); - gfmul_avx512(ZT7, ZT5); - __ evmovdquq(Address(avx512_htbl, 16 * 8), ZT7, Assembler::AVX_512bit); - gfmul_avx512(ZT8, ZT5); - __ evmovdquq(Address(avx512_htbl, 16 * 4), ZT8, Assembler::AVX_512bit); - gfmul_avx512(ZT7, ZT5); - __ evmovdquq(Address(avx512_htbl, 16 * 0), ZT7, Assembler::AVX_512bit); - __ ret(0); -} - -#define vclmul_reduce(out, poly, hi128, lo128, tmp0, tmp1) \ -__ evpclmulqdq(tmp0, poly, lo128, 0x01, Assembler::AVX_512bit); \ -__ vpslldq(tmp0, tmp0, 8, Assembler::AVX_512bit); \ -__ evpxorq(tmp0, lo128, tmp0, Assembler::AVX_512bit); \ -__ evpclmulqdq(tmp1, poly, tmp0, 0x00, Assembler::AVX_512bit); \ -__ vpsrldq(tmp1, tmp1, 4, Assembler::AVX_512bit); \ -__ evpclmulqdq(out, poly, tmp0, 0x10, Assembler::AVX_512bit); \ -__ vpslldq(out, out, 4, Assembler::AVX_512bit); \ -__ vpternlogq(out, 0x96, tmp1, hi128, Assembler::AVX_512bit); \ + __ evmovdquq(Address(avx512_htbl, 16 * 24), ZT7, Assembler::AVX_512bit);//; HashKey ^ 8 to HashKey ^ 5 in ZT7 now + + //calculate HashKeyX = HashKey x POLY + __ evpclmulqdq(ZT1, ZT7, xmm11, 0x10, Assembler::AVX_512bit); + __ vpshufd(ZT2, ZT7, 78, Assembler::AVX_512bit); + __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit); + __ evmovdquq(Address(avx512_htbl, 16 * 56), ZT1, Assembler::AVX_512bit); + + __ evshufi64x2(ZT5, ZT7, ZT7, 0x00, Assembler::AVX_512bit);//;; broadcast HashKey ^ 8 across all ZT5 + + for (int i = 20, j = 52; i > 0;) { + gfmul_avx512(ZT8, ZT5); + __ evmovdquq(Address(avx512_htbl, 16 * i), ZT8, Assembler::AVX_512bit); + //calculate HashKeyK = HashKey x POLY + __ evpclmulqdq(ZT1, ZT8, xmm11, 0x10, Assembler::AVX_512bit); + __ vpshufd(ZT2, ZT8, 78, Assembler::AVX_512bit); + __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit); + __ evmovdquq(Address(avx512_htbl, 16 * j), ZT1, Assembler::AVX_512bit); + + i -= 4; + j -= 4; + //compute HashKey ^ (8 + n), HashKey ^ (7 + n), ... HashKey ^ (5 + n) + gfmul_avx512(ZT7, ZT5); + __ evmovdquq(Address(avx512_htbl, 16 * i), ZT7, Assembler::AVX_512bit); + + //calculate HashKeyK = HashKey x POLY + __ evpclmulqdq(ZT1, ZT7, xmm11, 0x10, Assembler::AVX_512bit); + __ vpshufd(ZT2, ZT7, 78, Assembler::AVX_512bit); + __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit); + __ evmovdquq(Address(avx512_htbl, 16 * j), ZT1, Assembler::AVX_512bit); + + i -= 4; + j -= 4; + } + } #define vhpxori4x128(reg, tmp) \ __ vextracti64x4(tmp, reg, 1); \ @@ -2820,21 +2865,17 @@ __ evmovdquq(dst2, Address(src, position, Address::times_1, 1 * 64), Assembler:: __ evmovdquq(dst3, Address(src, position, Address::times_1, 2 * 64), Assembler::AVX_512bit); \ __ evmovdquq(dst4, Address(src, position, Address::times_1, 3 * 64), Assembler::AVX_512bit); \ -#define carrylessMultiply(dst00, dst01, dst10, dst11, ghdata, hkey) \ -__ evpclmulqdq(dst00, ghdata, hkey, 0x00, Assembler::AVX_512bit); \ -__ evpclmulqdq(dst01, ghdata, hkey, 0x01, Assembler::AVX_512bit); \ -__ evpclmulqdq(dst10, ghdata, hkey, 0x10, Assembler::AVX_512bit); \ -__ evpclmulqdq(dst11, ghdata, hkey, 0x11, Assembler::AVX_512bit); \ - -#define shuffleExorRnd1Key(dst0, dst1, dst2, dst3, shufmask, rndkey) \ -__ vpshufb(dst0, dst0, shufmask, Assembler::AVX_512bit); \ -__ evpxorq(dst0, dst0, rndkey, Assembler::AVX_512bit); \ -__ vpshufb(dst1, dst1, shufmask, Assembler::AVX_512bit); \ -__ evpxorq(dst1, dst1, rndkey, Assembler::AVX_512bit); \ -__ vpshufb(dst2, dst2, shufmask, Assembler::AVX_512bit); \ -__ evpxorq(dst2, dst2, rndkey, Assembler::AVX_512bit); \ -__ vpshufb(dst3, dst3, shufmask, Assembler::AVX_512bit); \ -__ evpxorq(dst3, dst3, rndkey, Assembler::AVX_512bit); \ +#define carrylessMultiply(dst00, dst01, dst10, dst11, ghdata, hkey2, hkey1) \ +__ evpclmulqdq(dst00, ghdata, hkey2, 0x00, Assembler::AVX_512bit); \ +__ evpclmulqdq(dst01, ghdata, hkey2, 0x10, Assembler::AVX_512bit); \ +__ evpclmulqdq(dst10, ghdata, hkey1, 0x01, Assembler::AVX_512bit); \ +__ evpclmulqdq(dst11, ghdata, hkey1, 0x11, Assembler::AVX_512bit); \ + +#define shuffle(dst0, dst1, dst2, dst3, src0, src1, src2, src3, shufmask) \ +__ vpshufb(dst0, src0, shufmask, Assembler::AVX_512bit); \ +__ vpshufb(dst1, src1, shufmask, Assembler::AVX_512bit); \ +__ vpshufb(dst2, src2, shufmask, Assembler::AVX_512bit); \ +__ vpshufb(dst3, src3, shufmask, Assembler::AVX_512bit); \ #define xorBeforeStore(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ __ evpxorq(dst0, dst0, src0, Assembler::AVX_512bit); \ @@ -2848,211 +2889,462 @@ __ vpternlogq(dst1, 0x96, src12, src13, Assembler::AVX_512bit); \ __ vpternlogq(dst2, 0x96, src22, src23, Assembler::AVX_512bit); \ __ vpternlogq(dst3, 0x96, src32, src33, Assembler::AVX_512bit); \ -void StubGenerator::ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, XMMRegister aad_hashx, - Register in, Register out, Register data, Register pos, bool first_time_reduction, XMMRegister addmask, bool ghash_input, Register rounds, - Register ghash_pos, bool final_reduction, int i, XMMRegister counter_inc_mask) { - Label AES_192, AES_256, LAST_AES_RND; +//schoolbook multiply of 16 blocks(8 x 16 bytes) +//it is assumed that data read is already shuffledand +void StubGenerator::ghash16_avx512(bool start_ghash, bool do_reduction, bool uload_shuffle, bool hk_broadcast, bool do_hxor, + Register in, Register pos, Register subkeyHtbl, XMMRegister HASH, XMMRegister SHUFM, int in_offset, + int in_disp, int displacement, int hashkey_offset) { const XMMRegister ZTMP0 = xmm0; const XMMRegister ZTMP1 = xmm3; const XMMRegister ZTMP2 = xmm4; const XMMRegister ZTMP3 = xmm5; + const XMMRegister ZTMP4 = xmm6; const XMMRegister ZTMP5 = xmm7; const XMMRegister ZTMP6 = xmm10; const XMMRegister ZTMP7 = xmm11; const XMMRegister ZTMP8 = xmm12; const XMMRegister ZTMP9 = xmm13; - const XMMRegister ZTMP10 = xmm15; - const XMMRegister ZTMP11 = xmm16; - const XMMRegister ZTMP12 = xmm17; + const XMMRegister ZTMPA = xmm26; + const XMMRegister ZTMPB = xmm23; + const XMMRegister GH = xmm24; + const XMMRegister GL = xmm25; + const int hkey_gap = 16 * 32; + + if (uload_shuffle) { + __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp), Assembler::AVX_512bit); + __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit); + } else { + __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp), Assembler::AVX_512bit); + } - const XMMRegister ZTMP13 = xmm19; - const XMMRegister ZTMP14 = xmm20; - const XMMRegister ZTMP15 = xmm21; - const XMMRegister ZTMP16 = xmm30; - const XMMRegister ZTMP17 = xmm31; - const XMMRegister ZTMP18 = xmm1; - const XMMRegister ZTMP19 = xmm2; - const XMMRegister ZTMP20 = xmm8; - const XMMRegister ZTMP21 = xmm22; - const XMMRegister ZTMP22 = xmm23; + if (start_ghash) { + __ evpxorq(ZTMP9, ZTMP9, HASH, Assembler::AVX_512bit); + } + if (hk_broadcast) { + __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 0 * 64), Assembler::AVX_512bit); + __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 0 * 64), Assembler::AVX_512bit); + } else { + __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 0 * 64), Assembler::AVX_512bit); + __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 0 * 64), Assembler::AVX_512bit); + } + + carrylessMultiply(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP9, ZTMPA, ZTMP8); + + //ghash blocks 4 - 7 + if (uload_shuffle) { + __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 64), Assembler::AVX_512bit); + __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit); + } else { + __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 64), Assembler::AVX_512bit); + } + + if (hk_broadcast) { + __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 1 * 64), Assembler::AVX_512bit);; + __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 1 * 64), Assembler::AVX_512bit); + } else { + __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 1 * 64), Assembler::AVX_512bit); + __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 1 * 64), Assembler::AVX_512bit); + } + + carrylessMultiply(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP9, ZTMPA, ZTMP8); + + //update sums + if (start_ghash) { + __ evpxorq(GL, ZTMP0, ZTMP2, Assembler::AVX_512bit);//T2 = THL + TLL + __ evpxorq(GH, ZTMP1, ZTMP3, Assembler::AVX_512bit);//T1 = THH + TLH + } else { //mid, end, end_reduce + __ vpternlogq(GL, 0x96, ZTMP0, ZTMP2, Assembler::AVX_512bit);//T2 = THL + TLL + __ vpternlogq(GH, 0x96, ZTMP1, ZTMP3, Assembler::AVX_512bit);//T1 = THH + TLH + } + //ghash blocks 8 - 11 + if (uload_shuffle) { + __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 128), Assembler::AVX_512bit); + __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit); + } else { + __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 128), Assembler::AVX_512bit); + } + if (hk_broadcast) { + __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 2 * 64), Assembler::AVX_512bit); + __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 2 * 64), Assembler::AVX_512bit); + } else { + __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 2 * 64), Assembler::AVX_512bit); + __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 2 * 64), Assembler::AVX_512bit); + } + + carrylessMultiply(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP9, ZTMPA, ZTMP8); + + //update sums + __ vpternlogq(GL, 0x96, ZTMP6, ZTMP4, Assembler::AVX_512bit);//T2 = THL + TLL + __ vpternlogq(GH, 0x96, ZTMP7, ZTMP5, Assembler::AVX_512bit);//T1 = THH + TLH + //ghash blocks 12 - 15 + if (uload_shuffle) { + __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 192), Assembler::AVX_512bit); + __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit); + } else { + __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 192), Assembler::AVX_512bit); + } + + if (hk_broadcast) { + __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 3 * 64), Assembler::AVX_512bit); + __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 3 * 64), Assembler::AVX_512bit); + } else { + __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 3 * 64), Assembler::AVX_512bit); + __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 3 * 64), Assembler::AVX_512bit); + } + carrylessMultiply(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP9, ZTMPA, ZTMP8); + + //update sums + xorGHASH(GL, GH, GL, GH, ZTMP0, ZTMP2, ZTMP1, ZTMP3, ZTMP6, ZTMP4, ZTMP7, ZTMP5); + + if (do_reduction) { + //new reduction + __ evmovdquq(ZTMPB, ExternalAddress(ghash_polynomial_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); + __ evpclmulqdq(HASH, GL, ZTMPB, 0x10, Assembler::AVX_512bit); + __ vpshufd(ZTMP0, GL, 78, Assembler::AVX_512bit); + __ vpternlogq(HASH, 0x96, GH, ZTMP0, Assembler::AVX_512bit); + if (do_hxor) { + vhpxori4x128(HASH, ZTMP0); + } + } +} - // Pre increment counters - __ vpaddd(ZTMP0, ctr_blockx, counter_inc_mask, Assembler::AVX_512bit); - __ vpaddd(ZTMP1, ZTMP0, counter_inc_mask, Assembler::AVX_512bit); - __ vpaddd(ZTMP2, ZTMP1, counter_inc_mask, Assembler::AVX_512bit); - __ vpaddd(ZTMP3, ZTMP2, counter_inc_mask, Assembler::AVX_512bit); - // Save counter value - __ evmovdquq(ctr_blockx, ZTMP3, Assembler::AVX_512bit); - - // Reuse ZTMP17 / ZTMP18 for loading AES Keys - // Pre-load AES round keys - ev_load_key(ZTMP17, key, 0, xmm29); - ev_load_key(ZTMP18, key, 1 * 16, xmm29); - - // ZTMP19 & ZTMP20 used for loading hash key - // Pre-load hash key - __ evmovdquq(ZTMP19, Address(subkeyHtbl, i * 64), Assembler::AVX_512bit); - __ evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit); - // Load data for computing ghash - __ evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); - __ vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit); - - // Xor cipher block 0 with input ghash, if available - if (ghash_input) { - __ evpxorq(ZTMP21, ZTMP21, aad_hashx, Assembler::AVX_512bit); +//Stitched GHASH of 16 blocks(with reduction) with encryption of 0 blocks +void StubGenerator::gcm_enc_dec_last_avx512(Register len, Register in, Register pos, XMMRegister HASH, XMMRegister SHUFM, Register subkeyHtbl, + int ghashin_offset, int hashkey_offset, bool start_ghash, bool do_reduction) { + //there is 0 blocks to cipher so there are only 16 blocks for ghash and reduction + ghash16_avx512(start_ghash, do_reduction, false, false, true, in, pos, subkeyHtbl, HASH, SHUFM, ghashin_offset, 0, 0, hashkey_offset); +} + +//Main GCM macro stitching cipher with GHASH +//encrypts 16 blocks at a time +//ghash the 16 previously encrypted ciphertext blocks +void StubGenerator::ghash16_encrypt_parallel16_avx512(Register in, Register out, Register ct, Register pos, Register avx512_subkeyHtbl, + Register CTR_CHECK, Register NROUNDS, Register key, XMMRegister CTR_BE, XMMRegister GHASH_IN, + XMMRegister ADDBE_4x4, XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHFMSK, + bool hk_broadcast, bool is_hash_start, bool do_hash_reduction, bool do_hash_hxor, + bool no_ghash_in, int ghashin_offset, int aesout_offset, int hashkey_offset) { + const XMMRegister B00_03 = xmm0; + const XMMRegister B04_07 = xmm3; + const XMMRegister B08_11 = xmm4; + const XMMRegister B12_15 = xmm5; + const XMMRegister THH1 = xmm6; + const XMMRegister THL1 = xmm7; + const XMMRegister TLH1 = xmm10; + const XMMRegister TLL1 = xmm11, THH2 = xmm12, THL2 = xmm13, TLH2 = xmm15; + const XMMRegister TLL2 = xmm16, THH3 = xmm17, THL3 = xmm19, TLH3 = xmm20; + const XMMRegister TLL3 = xmm21, DATA1 = xmm17, DATA2 = xmm19, DATA3 = xmm20, DATA4 = xmm21; + const XMMRegister AESKEY1 = xmm30, AESKEY2 = xmm31; + const XMMRegister GHKEY1 = xmm1, GHKEY2 = xmm18, GHDAT1 = xmm8, GHDAT2 = xmm22; + const XMMRegister ZT = xmm23, TO_REDUCE_L = xmm25, TO_REDUCE_H = xmm24; + const int hkey_gap = 16 * 32; + + Label blocks_overflow, blocks_ok, skip_shuffle, cont, aes_256, aes_192, last_aes_rnd; + + __ cmpb(CTR_CHECK, (256 - 16)); + __ jcc(Assembler::aboveEqual, blocks_overflow); + __ vpaddd(B00_03, CTR_BE, ADDBE_1234, Assembler::AVX_512bit); + __ vpaddd(B04_07, B00_03, ADDBE_4x4, Assembler::AVX_512bit); + __ vpaddd(B08_11, B04_07, ADDBE_4x4, Assembler::AVX_512bit); + __ vpaddd(B12_15, B08_11, ADDBE_4x4, Assembler::AVX_512bit); + __ jmp(blocks_ok); + __ bind(blocks_overflow); + __ vpshufb(CTR_BE, CTR_BE, SHFMSK, Assembler::AVX_512bit); + __ evmovdquq(B12_15, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); + __ vpaddd(B00_03, CTR_BE, ADD_1234, Assembler::AVX_512bit); + __ vpaddd(B04_07, B00_03, B12_15, Assembler::AVX_512bit); + __ vpaddd(B08_11, B04_07, B12_15, Assembler::AVX_512bit); + __ vpaddd(B12_15, B08_11, B12_15, Assembler::AVX_512bit); + shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHFMSK); + + __ bind(blocks_ok); + + //pre - load constants + ev_load_key(AESKEY1, key, 0, rbx); + if (!no_ghash_in) { + __ evpxorq(GHDAT1, GHASH_IN, Address(avx512_subkeyHtbl, 16 * ghashin_offset), Assembler::AVX_512bit); + } else { + __ evmovdquq(GHDAT1, Address(avx512_subkeyHtbl, 16 * ghashin_offset), Assembler::AVX_512bit); + } + + if (hk_broadcast) { + __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 0 * 64), Assembler::AVX_512bit); + __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 0 * 64), Assembler::AVX_512bit); + } else { + __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 0 * 64), Assembler::AVX_512bit); + __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 0 * 64), Assembler::AVX_512bit); + } + + //save counter for the next round + //increment counter overflow check register + __ evshufi64x2(CTR_BE, B12_15, B12_15, 255, Assembler::AVX_512bit); + __ addb(CTR_CHECK, 16); + + //pre - load constants + ev_load_key(AESKEY2, key, 1 * 16, rbx); + __ evmovdquq(GHDAT2, Address(avx512_subkeyHtbl, 16 * (ghashin_offset +4)), Assembler::AVX_512bit); + + //stitch AES rounds with GHASH + //AES round 0 + __ evpxorq(B00_03, B00_03, AESKEY1, Assembler::AVX_512bit); + __ evpxorq(B04_07, B04_07, AESKEY1, Assembler::AVX_512bit); + __ evpxorq(B08_11, B08_11, AESKEY1, Assembler::AVX_512bit); + __ evpxorq(B12_15, B12_15, AESKEY1, Assembler::AVX_512bit); + ev_load_key(AESKEY1, key, 2 * 16, rbx); + + //GHASH 4 blocks(15 to 12) + carrylessMultiply(TLL1, TLH1, THL1, THH1, GHDAT1, GHKEY2, GHKEY1); + + if (hk_broadcast) { + __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 1 * 64), Assembler::AVX_512bit); + __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 1 * 64), Assembler::AVX_512bit); + } else { + __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 1 * 64), Assembler::AVX_512bit); + __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 1 * 64), Assembler::AVX_512bit); + } + + __ evmovdquq(GHDAT1, Address(avx512_subkeyHtbl, 16 * (ghashin_offset + 8)), Assembler::AVX_512bit); + + //AES round 1 + roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15); + + ev_load_key(AESKEY2, key, 3 * 16, rbx); + + //GHASH 4 blocks(11 to 8) + carrylessMultiply(TLL2, TLH2, THL2, THH2, GHDAT2, GHKEY2, GHKEY1); + + if (hk_broadcast) { + __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 2 * 64), Assembler::AVX_512bit); + __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 2 * 64), Assembler::AVX_512bit); + } else { + __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 2 * 64 ), Assembler::AVX_512bit); + __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 2 * 64), Assembler::AVX_512bit); } - // Load data for computing ghash - __ evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); - __ vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit); - - // stitch AES rounds with GHASH - // AES round 0, xmm24 has shuffle mask - shuffleExorRnd1Key(ZTMP0, ZTMP1, ZTMP2, ZTMP3, xmm24, ZTMP17); - // Reuse ZTMP17 / ZTMP18 for loading remaining AES Keys - ev_load_key(ZTMP17, key, 2 * 16, xmm29); - // GHASH 4 blocks - carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP21, ZTMP19); - // Load the next hkey and Ghash data - __ evmovdquq(ZTMP19, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit); - __ evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 2 * 64), Assembler::AVX_512bit); - __ vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit); - - // AES round 1 - roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP18, key, 3 * 16, xmm29); - - // GHASH 4 blocks(11 to 8) - carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20); - // Load the next hkey and GDATA - __ evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit); - __ evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 3 * 64), Assembler::AVX_512bit); - __ vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit); - - // AES round 2 - roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP17, key, 4 * 16, xmm29); - - // GHASH 4 blocks(7 to 4) - carrylessMultiply(ZTMP14, ZTMP16, ZTMP15, ZTMP13, ZTMP21, ZTMP19); - // AES rounds 3 - roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP18, key, 5 * 16, xmm29); - - // Gather(XOR) GHASH for 12 blocks - xorGHASH(ZTMP5, ZTMP6, ZTMP8, ZTMP7, ZTMP9, ZTMP13, ZTMP10, ZTMP14, ZTMP12, ZTMP16, ZTMP11, ZTMP15); - - // AES rounds 4 - roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP17, key, 6 * 16, xmm29); - - // load plain / cipher text(recycle registers) - loadData(in, pos, ZTMP13, ZTMP14, ZTMP15, ZTMP16); - - // AES rounds 5 - roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP18, key, 7 * 16, xmm29); - // GHASH 4 blocks(3 to 0) - carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20); - - // AES round 6 - roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP17, key, 8 * 16, xmm29); - - // gather GHASH in ZTMP6(low) and ZTMP5(high) - if (first_time_reduction) { - __ vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit); - __ evpxorq(xmm25, ZTMP7, ZTMP11, Assembler::AVX_512bit); - __ evpxorq(xmm27, ZTMP5, ZTMP9, Assembler::AVX_512bit); - __ evpxorq(xmm26, ZTMP6, ZTMP10, Assembler::AVX_512bit); - } else if (!first_time_reduction && !final_reduction) { - xorGHASH(ZTMP7, xmm25, xmm27, xmm26, ZTMP8, ZTMP12, ZTMP7, ZTMP11, ZTMP5, ZTMP9, ZTMP6, ZTMP10); + __ evmovdquq(GHDAT2, Address(avx512_subkeyHtbl, 16 * (ghashin_offset + 12)), Assembler::AVX_512bit); + + //AES round 2 + roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15); + ev_load_key(AESKEY1, key, 4 * 16, rbx); + + //GHASH 4 blocks(7 to 4) + carrylessMultiply(TLL3, TLH3, THL3, THH3, GHDAT1, GHKEY2, GHKEY1); + + if (hk_broadcast) { + __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 3 * 64), Assembler::AVX_512bit); + __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 3 * 64), Assembler::AVX_512bit); + } else { + __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 3 * 64), Assembler::AVX_512bit); + __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 3 * 64), Assembler::AVX_512bit); } - if (final_reduction) { - // Phase one: Add mid products together - // Also load polynomial constant for reduction - __ vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit); - __ vpternlogq(ZTMP7, 0x96, xmm25, ZTMP11, Assembler::AVX_512bit); - __ vpsrldq(ZTMP11, ZTMP7, 8, Assembler::AVX_512bit); - __ vpslldq(ZTMP7, ZTMP7, 8, Assembler::AVX_512bit); - __ evmovdquq(ZTMP12, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); + //AES rounds 3 + roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15); + ev_load_key(AESKEY2, key, 5 * 16, rbx); + + //Gather(XOR) GHASH for 12 blocks + xorGHASH(TLL1, TLH1, THL1, THH1, TLL2, TLL3, TLH2, TLH3, THL2, THL3, THH2, THH3); + + //AES rounds 4 + roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15); + ev_load_key(AESKEY1, key, 6 * 16, rbx); + + //load plain / cipher text(recycle GH3xx registers) + loadData(in, pos, DATA1, DATA2, DATA3, DATA4); + + //AES rounds 5 + roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15); + ev_load_key(AESKEY2, key, 7 * 16, rbx); + + //GHASH 4 blocks(3 to 0) + carrylessMultiply(TLL2, TLH2, THL2, THH2, GHDAT2, GHKEY2, GHKEY1); + + //AES round 6 + roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15); + ev_load_key(AESKEY1, key, 8 * 16, rbx); + + //gather GHASH in TO_REDUCE_H / L + if (is_hash_start) { + __ evpxorq(TO_REDUCE_L, TLL2, THL2, Assembler::AVX_512bit); + __ evpxorq(TO_REDUCE_H, THH2, TLH2, Assembler::AVX_512bit); + __ vpternlogq(TO_REDUCE_L, 0x96, TLL1, THL1, Assembler::AVX_512bit); + __ vpternlogq(TO_REDUCE_H, 0x96, THH1, TLH1, Assembler::AVX_512bit); + } else { + //not the first round so sums need to be updated + xorGHASH(TO_REDUCE_L, TO_REDUCE_H, TO_REDUCE_L, TO_REDUCE_H, TLL2, THL2, THH2, TLH2, TLL1, THL1, THH1, TLH1); } - // AES round 7 - roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP18, key, 9 * 16, xmm29); - if (final_reduction) { - __ vpternlogq(ZTMP5, 0x96, ZTMP9, ZTMP11, Assembler::AVX_512bit); - __ evpxorq(ZTMP5, ZTMP5, xmm27, Assembler::AVX_512bit); - __ vpternlogq(ZTMP6, 0x96, ZTMP10, ZTMP7, Assembler::AVX_512bit); - __ evpxorq(ZTMP6, ZTMP6, xmm26, Assembler::AVX_512bit); + + //AES round 7 + roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15); + ev_load_key(AESKEY2, key, 9 * 16, rbx); + + //new reduction + if (do_hash_reduction) { + __ evmovdquq(ZT, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); + __ evpclmulqdq(THH1, TO_REDUCE_L, ZT, 0x10, Assembler::AVX_512bit); + __ vpshufd(TO_REDUCE_L, TO_REDUCE_L, 78, Assembler::AVX_512bit); + __ vpternlogq(THH1, 0x96, TO_REDUCE_H, TO_REDUCE_L, Assembler::AVX_512bit); } - // AES round 8 - roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP17, key, 10 * 16, xmm29); - - // Horizontal xor of low and high 4*128 - if (final_reduction) { - vhpxori4x128(ZTMP5, ZTMP9); - vhpxori4x128(ZTMP6, ZTMP10); + + //AES round 8 + roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15); + ev_load_key(AESKEY1, key, 10 * 16, rbx); + + //horizontalxor of 4 reduced hashes + if (do_hash_hxor) { + vhpxori4x128(THH1, TLL1); } - // AES round 9 - roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - // First phase of reduction - if (final_reduction) { - __ evpclmulqdq(ZTMP10, ZTMP12, ZTMP6, 0x01, Assembler::AVX_128bit); - __ vpslldq(ZTMP10, ZTMP10, 8, Assembler::AVX_128bit); - __ evpxorq(ZTMP10, ZTMP6, ZTMP10, Assembler::AVX_128bit); + + //AES round 9 + roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15); + ev_load_key(AESKEY2, key, 11 * 16, rbx); + //AES rounds up to 11 (AES192) or 13 (AES256) + //AES128 is done + __ cmpl(NROUNDS, 52); + __ jcc(Assembler::less, last_aes_rnd); + __ bind(aes_192); + roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15); + ev_load_key(AESKEY1, key, 12 * 16, rbx); + roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15); + __ cmpl(NROUNDS, 60); + __ jcc(Assembler::less, last_aes_rnd); + __ bind(aes_256); + ev_load_key(AESKEY2, key, 13 * 16, rbx); + roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15); + ev_load_key(AESKEY1, key, 14 * 16, rbx); + roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15); + + __ bind(last_aes_rnd); + //the last AES round + lastroundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15); + //AESKEY1and AESKEY2 contain AES round keys + + //XOR against plain / cipher text + xorBeforeStore(B00_03, B04_07, B08_11, B12_15, DATA1, DATA2, DATA3, DATA4); + + //store cipher / plain text + storeData(out, pos, B00_03, B04_07, B08_11, B12_15); + //**B00_03, B04_07, B08_011, B12_B15 may contain sensitive data + + //shuffle cipher text blocks for GHASH computation + __ cmpptr(ct, out); + __ jcc(Assembler::notEqual, skip_shuffle); + shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHFMSK); + __ jmp(cont); + __ bind(skip_shuffle); + shuffle(B00_03, B04_07, B08_11, B12_15, DATA1, DATA2, DATA3, DATA4, SHFMSK); + + //**B00_03, B04_07, B08_011, B12_B15 overwritten with shuffled cipher text + __ bind(cont); + //store shuffled cipher text for ghashing + __ evmovdquq(Address(avx512_subkeyHtbl, 16 * aesout_offset), B00_03, Assembler::AVX_512bit); + __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (aesout_offset + 4)), B04_07, Assembler::AVX_512bit); + __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (aesout_offset + 8)), B08_11, Assembler::AVX_512bit); + __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (aesout_offset + 12)), B12_15, Assembler::AVX_512bit); +} + + +//Encrypt / decrypt the initial 16 blocks +void StubGenerator::initial_blocks_16_avx512(Register in, Register out, Register ct, Register pos, Register key, Register avx512_subkeyHtbl, + Register CTR_CHECK, Register rounds, XMMRegister CTR, XMMRegister GHASH, XMMRegister ADDBE_4x4, + XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK, int stack_offset) { + const XMMRegister B00_03 = xmm7; + const XMMRegister B04_07 = xmm10; + const XMMRegister B08_11 = xmm11; + const XMMRegister B12_15 = xmm12; + const XMMRegister T0 = xmm0; + const XMMRegister T1 = xmm3; + const XMMRegister T2 = xmm4; + const XMMRegister T3 = xmm5; + const XMMRegister T4 = xmm6; + const XMMRegister T5 = xmm30; + + Label next_16_overflow, next_16_ok, cont, skip_shuffle, aes_256, aes_192, last_aes_rnd; + //prepare counter blocks + __ cmpb(CTR_CHECK, (256 - 16)); + __ jcc(Assembler::aboveEqual, next_16_overflow); + __ vpaddd(B00_03, CTR, ADDBE_1234, Assembler::AVX_512bit); + __ vpaddd(B04_07, B00_03, ADDBE_4x4, Assembler::AVX_512bit); + __ vpaddd(B08_11, B04_07, ADDBE_4x4, Assembler::AVX_512bit); + __ vpaddd(B12_15, B08_11, ADDBE_4x4, Assembler::AVX_512bit); + __ jmp(next_16_ok); + __ bind(next_16_overflow); + __ vpshufb(CTR, CTR, SHUF_MASK, Assembler::AVX_512bit); + __ evmovdquq(B12_15, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, rbx); + __ vpaddd(B00_03, CTR, ADD_1234, Assembler::AVX_512bit); + __ vpaddd(B04_07, B00_03, B12_15, Assembler::AVX_512bit); + __ vpaddd(B08_11, B04_07, B12_15, Assembler::AVX_512bit); + __ vpaddd(B12_15, B08_11, B12_15, Assembler::AVX_512bit); + shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHUF_MASK); + __ bind(next_16_ok); + __ evshufi64x2(CTR, B12_15, B12_15, 255, Assembler::AVX_512bit); + __ addb(CTR_CHECK, 16); + + //load 16 blocks of data + loadData(in, pos, T0, T1, T2, T3); + + //move to AES encryption rounds + __ movdqu(T5, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/); + ev_load_key(T4, key, 0, T5); + __ evpxorq(B00_03, B00_03, T4, Assembler::AVX_512bit); + __ evpxorq(B04_07, B04_07, T4, Assembler::AVX_512bit); + __ evpxorq(B08_11, B08_11, T4, Assembler::AVX_512bit); + __ evpxorq(B12_15, B12_15, T4, Assembler::AVX_512bit); + + for (int i = 1; i < 10; i++) { + ev_load_key(T4, key, i * 16, T5); + roundEncode(T4, B00_03, B04_07, B08_11, B12_15); } + + ev_load_key(T4, key, 10 * 16, T5); __ cmpl(rounds, 52); - __ jcc(Assembler::greaterEqual, AES_192); - __ jmp(LAST_AES_RND); - // AES rounds up to 11 (AES192) or 13 (AES256) - __ bind(AES_192); - roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP18, key, 11 * 16, xmm29); - roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP17, key, 12 * 16, xmm29); + __ jcc(Assembler::less, last_aes_rnd); + __ bind(aes_192); + roundEncode(T4, B00_03, B04_07, B08_11, B12_15); + ev_load_key(T4, key, 16 * 11, T5); + roundEncode(T4, B00_03, B04_07, B08_11, B12_15); + ev_load_key(T4, key, 16 * 12, T5); __ cmpl(rounds, 60); - __ jcc(Assembler::aboveEqual, AES_256); - __ jmp(LAST_AES_RND); - - __ bind(AES_256); - roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP18, key, 13 * 16, xmm29); - roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - ev_load_key(ZTMP17, key, 14 * 16, xmm29); - - __ bind(LAST_AES_RND); - // Second phase of reduction - if (final_reduction) { - __ evpclmulqdq(ZTMP9, ZTMP12, ZTMP10, 0x00, Assembler::AVX_128bit); - __ vpsrldq(ZTMP9, ZTMP9, 4, Assembler::AVX_128bit); // Shift-R 1-DW to obtain 2-DWs shift-R - __ evpclmulqdq(ZTMP11, ZTMP12, ZTMP10, 0x10, Assembler::AVX_128bit); - __ vpslldq(ZTMP11, ZTMP11, 4, Assembler::AVX_128bit); // Shift-L 1-DW for result - // ZTMP5 = ZTMP5 X ZTMP11 X ZTMP9 - __ vpternlogq(ZTMP5, 0x96, ZTMP11, ZTMP9, Assembler::AVX_128bit); - } - // Last AES round - lastroundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - // XOR against plain / cipher text - xorBeforeStore(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP13, ZTMP14, ZTMP15, ZTMP16); - // store cipher / plain text - storeData(out, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + __ jcc(Assembler::less, last_aes_rnd); + __ bind(aes_256); + roundEncode(T4, B00_03, B04_07, B08_11, B12_15); + ev_load_key(T4, key, 16 * 13, T5); + roundEncode(T4, B00_03, B04_07, B08_11, B12_15); + ev_load_key(T4, key, 16 * 14, T5); + + __ bind(last_aes_rnd); + lastroundEncode(T4, B00_03, B04_07, B08_11, B12_15); + + //xor against text + xorBeforeStore(B00_03, B04_07, B08_11, B12_15, T0, T1, T2, T3); + + //store + storeData(out, pos, B00_03, B04_07, B08_11, B12_15); + + __ cmpptr(ct, out); + __ jcc(Assembler::equal, skip_shuffle); + //decryption - cipher text needs to go to GHASH phase + shuffle(B00_03, B04_07, B08_11, B12_15, T0, T1, T2, T3, SHUF_MASK); + __ jmp(cont); + __ bind(skip_shuffle); + shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHUF_MASK); + + //B00_03, B04_07, B08_11, B12_15 overwritten with shuffled cipher text + __ bind(cont); + __ evmovdquq(Address(avx512_subkeyHtbl, 16 * stack_offset), B00_03, Assembler::AVX_512bit); + __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (stack_offset + 4)), B04_07, Assembler::AVX_512bit); + __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (stack_offset + 8)), B08_11, Assembler::AVX_512bit); + __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (stack_offset + 12)), B12_15, Assembler::AVX_512bit); } -void StubGenerator::aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key, - Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter) { - Label ENC_DEC_DONE, GENERATE_HTBL_48_BLKS, AES_192, AES_256, STORE_CT, GHASH_LAST_32, - AES_32_BLOCKS, GHASH_AES_PARALLEL, LOOP, ACCUMULATE, GHASH_16_AES_16; - const XMMRegister CTR_BLOCKx = xmm9; +void StubGenerator::aesgcm_avx512(Register in, Register len, Register ct, Register out, Register key, Register state, + Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter) { + Label ENC_DEC_DONE, MESG_BELOW_32_BLKS, NO_BIG_BLKS, ENCRYPT_BIG_BLKS_NO_HXOR, + ENCRYPT_BIG_NBLKS, ENCRYPT_16_BLKS, ENCRYPT_N_GHASH_32_N_BLKS, GHASH_DONE; + const XMMRegister CTR_BLOCKx = xmm2; const XMMRegister AAD_HASHx = xmm14; - const Register pos = rax; - const Register rounds = r15; - const Register ghash_pos = NOT_WIN64( r14) WIN64_ONLY( r11 ); const XMMRegister ZTMP0 = xmm0; - const XMMRegister ZTMP1 = xmm3; - const XMMRegister ZTMP2 = xmm4; - const XMMRegister ZTMP3 = xmm5; + const XMMRegister ZTMP1 = xmm3; //**sensitive + const XMMRegister ZTMP2 = xmm4; //**sensitive(small data) + const XMMRegister ZTMP3 = xmm5; //**sensitive(small data) const XMMRegister ZTMP4 = xmm6; const XMMRegister ZTMP5 = xmm7; const XMMRegister ZTMP6 = xmm10; @@ -3066,235 +3358,170 @@ void StubGenerator::aesgcm_encrypt(Register in, Register len, Register ct, Regis const XMMRegister ZTMP14 = xmm20; const XMMRegister ZTMP15 = xmm21; const XMMRegister ZTMP16 = xmm30; - const XMMRegister COUNTER_INC_MASK = xmm18; - - __ movl(pos, 0); // Total length processed - // Min data size processed = 768 bytes - __ cmpl(len, 768); - __ jcc(Assembler::less, ENC_DEC_DONE); + const XMMRegister ZTMP17 = xmm31; + const XMMRegister ZTMP18 = xmm1; + const XMMRegister ZTMP19 = xmm18; + const XMMRegister ZTMP20 = xmm8; + const XMMRegister ZTMP21 = xmm22; + const XMMRegister ZTMP22 = xmm23; + const XMMRegister ZTMP23 = xmm26; + const XMMRegister GH = xmm24; + const XMMRegister GL = xmm25; + const XMMRegister SHUF_MASK = xmm29; + const XMMRegister ADDBE_4x4 = xmm27; + const XMMRegister ADDBE_1234 = xmm28; + const XMMRegister ADD_1234 = xmm9; + const KRegister MASKREG = k1; + const Register pos = rax; + const Register rounds = r15; + const Register CTR_CHECK = r14; - // Generate 48 constants for htbl - __ call(GENERATE_HTBL_48_BLKS, relocInfo::none); - int index = 0; // Index for choosing subkeyHtbl entry - __ movl(ghash_pos, 0); // Pointer for ghash read and store operations + const int stack_offset = 64; + const int ghashin_offset = 64; + const int aesout_offset = 64; + const int hashkey_offset = 0; + const int hashkey_gap = 16 * 32; + const int HashKey_32 = 0; + const int HashKey_16 = 16 * 16; - // Move initial counter value and STATE value into variables + __ movl(pos, 0); + __ cmpl(len, 256); + __ jcc(Assembler::lessEqual, ENC_DEC_DONE); + + /* Structure of the Htbl is as follows: + * Where 0 - 31 we have 32 Hashkey's and 32-63 we have 32 HashKeyK (derived from HashKey) + * Rest 8 entries are for storing CTR values post AES rounds + * ---------------------------------------------------------------------------------------- + Hashkey32 -> 16 * 0 + Hashkey31 -> 16 * 1 + Hashkey30 -> 16 * 2 + ........ + Hashkey1 -> 16 * 31 + --------------------- + HaskeyK32 -> 16 * 32 + HashkeyK31 -> 16 * 33 + ......... + HashkeyK1 -> 16 * 63 + --------------------- + 1st set of AES Entries + B00_03 -> 16 * 64 + B04_07 -> 16 * 68 + B08_11 -> 16 * 72 + B12_15 -> 16 * 80 + --------------------- + 2nd set of AES Entries + B00_03 -> 16 * 84 + B04_07 -> 16 * 88 + B08_11 -> 16 * 92 + B12_15 -> 16 * 96 + ---------------------*/ + generateHtbl_32_blocks_avx512(subkeyHtbl, avx512_subkeyHtbl); + + //Move initial counter value and STATE value into variables __ movdqu(CTR_BLOCKx, Address(counter, 0)); __ movdqu(AAD_HASHx, Address(state, 0)); - // Load lswap mask for ghash + + //Load lswap mask for ghash __ movdqu(xmm24, ExternalAddress(ghash_long_swap_mask_addr()), rbx /*rscratch*/); - // Shuffle input state using lswap mask + //Shuffle input state using lswap mask __ vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit); // Compute #rounds for AES based on the length of the key array __ movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - // Broadcast counter value to 512 bit register + __ evmovdquq(ADDBE_4x4, ExternalAddress(counter_mask_addbe_4444_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); + __ evmovdquq(ADDBE_1234, ExternalAddress(counter_mask_addbe_1234_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); + __ evmovdquq(SHUF_MASK, ExternalAddress(counter_shuffle_mask_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); + __ evmovdquq(ADD_1234, ExternalAddress(counter_mask_add_1234_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); + + //Shuffle counter, subtract 1 from the pre-incremented counter value and broadcast counter value to 512 bit register + __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_128bit); + __ vpsubd(CTR_BLOCKx, CTR_BLOCKx, ADD_1234, Assembler::AVX_128bit); __ evshufi64x2(CTR_BLOCKx, CTR_BLOCKx, CTR_BLOCKx, 0, Assembler::AVX_512bit); - // Load counter shuffle mask - __ evmovdquq(xmm24, ExternalAddress(counter_shuffle_mask_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); - // Shuffle counter - __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit); - - // Load mask for incrementing counter - __ evmovdquq(COUNTER_INC_MASK, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); - // Pre-increment counter - __ vpaddd(ZTMP5, CTR_BLOCKx, ExternalAddress(counter_mask_linc0_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); - __ vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit); - __ vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit); - __ vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit); - - // Begin 32 blocks of AES processing - __ bind(AES_32_BLOCKS); - // Save incremented counter before overwriting it with AES data - __ evmovdquq(CTR_BLOCKx, ZTMP8, Assembler::AVX_512bit); - - // Move 256 bytes of data - loadData(in, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - // Load key shuffle mask - __ movdqu(xmm29, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/); - // Load 0th AES round key - ev_load_key(ZTMP4, key, 0, xmm29); - // AES-ROUND0, xmm24 has the shuffle mask - shuffleExorRnd1Key(ZTMP5, ZTMP6, ZTMP7, ZTMP8, xmm24, ZTMP4); - - for (int j = 1; j < 10; j++) { - ev_load_key(ZTMP4, key, j * 16, xmm29); - roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); - } - ev_load_key(ZTMP4, key, 10 * 16, xmm29); - // AES rounds up to 11 (AES192) or 13 (AES256) - __ cmpl(rounds, 52); - __ jcc(Assembler::greaterEqual, AES_192); - lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); - __ jmp(STORE_CT); - - __ bind(AES_192); - roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); - ev_load_key(ZTMP4, key, 11 * 16, xmm29); - roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); - __ cmpl(rounds, 60); - __ jcc(Assembler::aboveEqual, AES_256); - ev_load_key(ZTMP4, key, 12 * 16, xmm29); - lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); - __ jmp(STORE_CT); - - __ bind(AES_256); - ev_load_key(ZTMP4, key, 12 * 16, xmm29); - roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); - ev_load_key(ZTMP4, key, 13 * 16, xmm29); - roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); - ev_load_key(ZTMP4, key, 14 * 16, xmm29); - // Last AES round - lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); - - __ bind(STORE_CT); - // Xor the encrypted key with PT to obtain CT - xorBeforeStore(ZTMP5, ZTMP6, ZTMP7, ZTMP8, ZTMP0, ZTMP1, ZTMP2, ZTMP3); - storeData(out, pos, ZTMP5, ZTMP6, ZTMP7, ZTMP8); - // 16 blocks encryption completed - __ addl(pos, 256); - __ cmpl(pos, 512); - __ jcc(Assembler::aboveEqual, GHASH_AES_PARALLEL); - __ vpaddd(ZTMP5, CTR_BLOCKx, COUNTER_INC_MASK, Assembler::AVX_512bit); - __ vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit); - __ vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit); - __ vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit); - __ jmp(AES_32_BLOCKS); - - __ bind(GHASH_AES_PARALLEL); - // Ghash16_encrypt16_parallel takes place in the order with three reduction values: - // 1) First time -> cipher xor input ghash - // 2) No reduction -> accumulate multiplication values - // 3) Final reduction post 48 blocks -> new ghash value is computed for the next round - // Reduction value = first time - ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK); - __ addl(pos, 256); - __ addl(ghash_pos, 256); - index += 4; - - // At this point we have processed 768 bytes of AES and 256 bytes of GHASH. - // If the remaining length is less than 768, process remaining 512 bytes of ghash in GHASH_LAST_32 code - __ subl(len, 768); - __ cmpl(len, 768); - __ jcc(Assembler::less, GHASH_LAST_32); - - // AES 16 blocks and GHASH 16 blocks in parallel - // For multiples of 48 blocks we will do ghash16_encrypt16 interleaved multiple times - // Reduction value = no reduction means that the carryless multiplication values are accumulated for further calculations - // Each call uses 4 subkeyHtbl values, so increment the index by 4. - __ bind(GHASH_16_AES_16); - // Reduction value = no reduction - ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, false, index, COUNTER_INC_MASK); - __ addl(pos, 256); - __ addl(ghash_pos, 256); - index += 4; - // Reduction value = final reduction means that the accumulated values have to be reduced as we have completed 48 blocks of ghash - ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, true, index, COUNTER_INC_MASK); - __ addl(pos, 256); - __ addl(ghash_pos, 256); - // Calculated ghash value needs to be __ moved to AAD_HASHX so that we can restart the ghash16-aes16 pipeline - __ movdqu(AAD_HASHx, ZTMP5); - index = 0; // Reset subkeyHtbl index - - // Restart the pipeline - // Reduction value = first time - ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK); - __ addl(pos, 256); - __ addl(ghash_pos, 256); - index += 4; - - __ subl(len, 768); - __ cmpl(len, 768); - __ jcc(Assembler::greaterEqual, GHASH_16_AES_16); - - // GHASH last 32 blocks processed here - // GHASH products accumulated in ZMM27, ZMM25 and ZMM26 during GHASH16-AES16 operation is used - __ bind(GHASH_LAST_32); - // Use rbx as a pointer to the htbl; For last 32 blocks of GHASH, use key# 4-11 entry in subkeyHtbl - __ movl(rbx, 256); - // Load cipher blocks - __ evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); - __ evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); - __ vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit); - __ vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit); - // Load ghash keys - __ evmovdquq(ZTMP15, Address(avx512_subkeyHtbl, rbx, Address::times_1, 0 * 64), Assembler::AVX_512bit); - __ evmovdquq(ZTMP16, Address(avx512_subkeyHtbl, rbx, Address::times_1, 1 * 64), Assembler::AVX_512bit); - - // Ghash blocks 0 - 3 - carrylessMultiply(ZTMP2, ZTMP3, ZTMP4, ZTMP1, ZTMP13, ZTMP15); - // Ghash blocks 4 - 7 - carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP14, ZTMP16); - - __ vpternlogq(ZTMP1, 0x96, ZTMP5, xmm27, Assembler::AVX_512bit); // ZTMP1 = ZTMP1 + ZTMP5 + zmm27 - __ vpternlogq(ZTMP2, 0x96, ZTMP6, xmm26, Assembler::AVX_512bit); // ZTMP2 = ZTMP2 + ZTMP6 + zmm26 - __ vpternlogq(ZTMP3, 0x96, ZTMP7, xmm25, Assembler::AVX_512bit); // ZTMP3 = ZTMP3 + ZTMP7 + zmm25 - __ evpxorq(ZTMP4, ZTMP4, ZTMP8, Assembler::AVX_512bit); // ZTMP4 = ZTMP4 + ZTMP8 - - __ addl(ghash_pos, 128); - __ addl(rbx, 128); - - // Ghash remaining blocks - __ bind(LOOP); - __ cmpl(ghash_pos, pos); - __ jcc(Assembler::aboveEqual, ACCUMULATE); - // Load next cipher blocks and corresponding ghash keys - __ evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); - __ evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); - __ vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit); - __ vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit); - __ evmovdquq(ZTMP15, Address(avx512_subkeyHtbl, rbx, Address::times_1, 0 * 64), Assembler::AVX_512bit); - __ evmovdquq(ZTMP16, Address(avx512_subkeyHtbl, rbx, Address::times_1, 1 * 64), Assembler::AVX_512bit); - - // ghash blocks 0 - 3 - carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP13, ZTMP15); - - // ghash blocks 4 - 7 - carrylessMultiply(ZTMP10, ZTMP11, ZTMP12, ZTMP9, ZTMP14, ZTMP16); - - // update sums - // ZTMP1 = ZTMP1 + ZTMP5 + ZTMP9 - // ZTMP2 = ZTMP2 + ZTMP6 + ZTMP10 - // ZTMP3 = ZTMP3 + ZTMP7 xor ZTMP11 - // ZTMP4 = ZTMP4 + ZTMP8 xor ZTMP12 - xorGHASH(ZTMP1, ZTMP2, ZTMP3, ZTMP4, ZTMP5, ZTMP9, ZTMP6, ZTMP10, ZTMP7, ZTMP11, ZTMP8, ZTMP12); - __ addl(ghash_pos, 128); - __ addl(rbx, 128); - __ jmp(LOOP); - // Integrate ZTMP3/ZTMP4 into ZTMP1 and ZTMP2 - __ bind(ACCUMULATE); - __ evpxorq(ZTMP3, ZTMP3, ZTMP4, Assembler::AVX_512bit); - __ vpsrldq(ZTMP7, ZTMP3, 8, Assembler::AVX_512bit); - __ vpslldq(ZTMP8, ZTMP3, 8, Assembler::AVX_512bit); - __ evpxorq(ZTMP1, ZTMP1, ZTMP7, Assembler::AVX_512bit); - __ evpxorq(ZTMP2, ZTMP2, ZTMP8, Assembler::AVX_512bit); - - // Add ZTMP1 and ZTMP2 128 - bit words horizontally - vhpxori4x128(ZTMP1, ZTMP11); - vhpxori4x128(ZTMP2, ZTMP12); - // Load reduction polynomial and compute final reduction - __ evmovdquq(ZTMP15, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, rbx /*rscratch*/); - vclmul_reduce(AAD_HASHx, ZTMP15, ZTMP1, ZTMP2, ZTMP3, ZTMP4); - - // Pre-increment counter for next operation - __ vpaddd(CTR_BLOCKx, CTR_BLOCKx, xmm18, Assembler::AVX_128bit); - // Shuffle counter and save the updated value - __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit); + __ movdl(CTR_CHECK, CTR_BLOCKx); + __ andl(CTR_CHECK, 255); + + // Reshuffle counter + __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_512bit); + + initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl, CTR_CHECK, rounds, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, stack_offset); + __ addl(pos, 16 * 16); + __ cmpl(len, 32 * 16); + __ jcc(Assembler::below, MESG_BELOW_32_BLKS); + + initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl, CTR_CHECK, rounds, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, stack_offset + 16); + __ addl(pos, 16 * 16); + __ subl(len, 32 * 16); + + __ cmpl(len, 32 * 16); + __ jcc(Assembler::below, NO_BIG_BLKS); + + __ bind(ENCRYPT_BIG_BLKS_NO_HXOR); + __ cmpl(len, 2 * 32 * 16); + __ jcc(Assembler::below, ENCRYPT_BIG_NBLKS); + ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, + true, true, false, false, false, ghashin_offset, aesout_offset, HashKey_32); + __ addl(pos, 16 * 16); + + ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, + true, false, true, false, true, ghashin_offset + 16, aesout_offset + 16, HashKey_16); + __ evmovdquq(AAD_HASHx, ZTMP4, Assembler::AVX_512bit); + __ addl(pos, 16 * 16); + __ subl(len, 32 * 16); + __ jmp(ENCRYPT_BIG_BLKS_NO_HXOR); + + __ bind(ENCRYPT_BIG_NBLKS); + ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, + false, true, false, false, false, ghashin_offset, aesout_offset, HashKey_32); + __ addl(pos, 16 * 16); + ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, + false, false, true, true, true, ghashin_offset + 16, aesout_offset + 16, HashKey_16); + + __ movdqu(AAD_HASHx, ZTMP4); + __ addl(pos, 16 * 16); + __ subl(len, 32 * 16); + + __ bind(NO_BIG_BLKS); + __ cmpl(len, 16 * 16); + __ jcc(Assembler::aboveEqual, ENCRYPT_16_BLKS); + + __ bind(ENCRYPT_N_GHASH_32_N_BLKS); + ghash16_avx512(true, false, false, false, true, in, pos, avx512_subkeyHtbl, AAD_HASHx, SHUF_MASK, stack_offset, 0, 0, HashKey_32); + gcm_enc_dec_last_avx512(len, in, pos, AAD_HASHx, SHUF_MASK, avx512_subkeyHtbl, ghashin_offset + 16, HashKey_16, false, true); + __ jmp(GHASH_DONE); + + __ bind(ENCRYPT_16_BLKS); + ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, + false, true, false, false, false, ghashin_offset, aesout_offset, HashKey_32); + + ghash16_avx512(false, true, false, false, true, in, pos, avx512_subkeyHtbl, AAD_HASHx, SHUF_MASK, stack_offset, 16 * 16, 0, HashKey_16); + + __ bind(MESG_BELOW_32_BLKS); + __ subl(len, 16 * 16); + __ addl(pos, 16 * 16); + gcm_enc_dec_last_avx512(len, in, pos, AAD_HASHx, SHUF_MASK, avx512_subkeyHtbl, ghashin_offset, HashKey_16, true, true); + + __ bind(GHASH_DONE); + //Pre-increment counter for next operation, make sure that counter value is incremented on the LSB + __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_128bit); + __ vpaddd(CTR_BLOCKx, CTR_BLOCKx, ADD_1234, Assembler::AVX_128bit); + __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_128bit); __ movdqu(Address(counter, 0), CTR_BLOCKx); - // Load ghash lswap mask + //Load ghash lswap mask __ movdqu(xmm24, ExternalAddress(ghash_long_swap_mask_addr()), rbx /*rscratch*/); - // Shuffle ghash using lbswap_mask and store it + //Shuffle ghash using lbswap_mask and store it __ vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit); __ movdqu(Address(state, 0), AAD_HASHx); - __ jmp(ENC_DEC_DONE); - __ bind(GENERATE_HTBL_48_BLKS); - generateHtbl_48_block_zmm(subkeyHtbl, avx512_subkeyHtbl, rbx /*rscratch*/); + //Zero out sensitive data + __ evpxorq(ZTMP21, ZTMP21, ZTMP21, Assembler::AVX_512bit); + __ evpxorq(ZTMP0, ZTMP0, ZTMP0, Assembler::AVX_512bit); + __ evpxorq(ZTMP1, ZTMP1, ZTMP1, Assembler::AVX_512bit); + __ evpxorq(ZTMP2, ZTMP2, ZTMP2, Assembler::AVX_512bit); + __ evpxorq(ZTMP3, ZTMP3, ZTMP3, Assembler::AVX_512bit); __ bind(ENC_DEC_DONE); - __ movq(rax, pos); } //Implements data * hashkey mod (128, 127, 126, 121, 0) diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp index 2056fa057654e..5a9b084841376 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2019, 2021, Intel Corporation. All rights reserved. +* Copyright (c) 2019, 2024, Intel Corporation. All rights reserved. * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -57,7 +57,10 @@ address StubGenerator::ghash_byte_swap_mask_addr() { // Polynomial x^128+x^127+x^126+x^121+1 ATTRIBUTE_ALIGNED(16) static const uint64_t GHASH_POLYNOMIAL[] = { - 0x0000000000000001UL, 0xC200000000000000UL, + 0x0000000000000001ULL, 0xC200000000000000ULL, + 0x0000000000000001ULL, 0xC200000000000000ULL, + 0x0000000000000001ULL, 0xC200000000000000ULL, + 0x0000000000000001ULL, 0xC200000000000000ULL }; address StubGenerator::ghash_polynomial_addr() { return (address)GHASH_POLYNOMIAL; diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp new file mode 100644 index 0000000000000..92ac78e15cba9 --- /dev/null +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp @@ -0,0 +1,502 @@ +/* +* Copyright (c) 2024, Intel Corporation. All rights reserved. +* Intel Math Library (LIBM) Source Code +* +* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +* +* This code is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License version 2 only, as +* published by the Free Software Foundation. +* +* This code is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +* version 2 for more details (a copy is included in the LICENSE file that +* accompanied this code). +* +* You should have received a copy of the GNU General Public License version +* 2 along with this work; if not, write to the Free Software Foundation, +* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +* +* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +* or visit www.oracle.com if you need additional information or have any +* questions. +* +*/ + +#include "precompiled.hpp" +#include "macroAssembler_x86.hpp" +#include "stubGenerator_x86_64.hpp" + +/******************************************************************************/ +// ALGORITHM DESCRIPTION +// --------------------- +// +// tanh(x)=(exp(x)-exp(-x))/(exp(x)+exp(-x))=(1-exp(-2*x))/(1+exp(-2*x)) +// +// Let |x|=xH+xL (upper 26 bits, lower 27 bits) +// log2(e) rounded to 26 bits (high part) plus a double precision low part is +// L2EH+L2EL (upper 26, lower 53 bits) +// +// Let xH*L2EH=k+f+r`, where (k+f)*2^8*2=int(xH*L2EH*2^9), +// f=0.b1 b2 ... b8, k integer +// 2^{-f} is approximated as Tn[f]+Dn[f] +// Tn stores the high 53 bits, Dn stores (2^{-f}-Tn[f]) rounded to double precision +// +// r=r`+xL*L2EH+|x|*L2EL, |r|<2^{-9}+2^{-14}, +// for |x| in [23/64,3*2^7) +// e^{-2*|x|}=2^{-k-f}*2^{-r} ~ 2^{-k}*(Tn+Dn)*(1+p)=(T0+D0)*(1+p) +// +// For |x| in [2^{-4},2^5): +// 2^{-r}-1 ~ p=c1*r+c2*r^2+..+c5*r^5 +// Let R=1/(1+T0+p*T0), truncated to 35 significant bits +// R=1/(1+T0+D0+p*(T0+D0))*(1+eps), |eps|<2^{-33} +// 1+T0+D0+p*(T0+D0)=KH+KL, where +// KH=(1+T0+c1*r*T0)_high (leading 17 bits) +// KL=T0_low+D0+(c1*r*T0)_low+c1*r*D0+(c2*r^2+..c5*r^5)*T0 +// eps ~ (R*KH-1)+R*KL +// 1/(1+T0+D0+p*(T0+D0)) ~ R-R*eps +// The result is approximated as (1-T0-D0-(T0+D0)*p)*(R-R*eps) +// 1-T0-D0-(T0+D0)*p=-((KH-2)+KL) +// The result is formed as +// (KH-2)*R+(-(KH-2)*R*eps+(KL*R-KL*R*eps)), with the correct sign +// set at the end +// +// For |x| in [2^{-64},2^{-4}): +// A Taylor series expansion is used (x+p3*x^3+..+p13*x^{13}) +// +// For |x|<2^{-64}: x is returned +// +// For |x|>=2^32: return +/-1 +// +// Special cases: +// tanh(NaN) = quiet NaN, and raise invalid exception +// tanh(INF) = that INF +// tanh(+/-0) = +/-0 +// +/******************************************************************************/ + +ATTRIBUTE_ALIGNED(4) static const juint _HALFMASK[] = +{ + 4160749568, 2147483647 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _ONEMASK[] = +{ + 0, 1072693248 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _TWOMASK[] = +{ + 0, 1073741824 +}; + +ATTRIBUTE_ALIGNED(16) static const juint _MASK3[] = +{ + 0, 4294967280, 0, 4294967280 +}; + +ATTRIBUTE_ALIGNED(16) static const juint _RMASK[] = +{ + 4294705152, 4294967295, 4294705152, 4294967295 +}; + +ATTRIBUTE_ALIGNED(16) static const juint _L2E[] = +{ + 1610612736, 1082594631, 4166901572, 1055174155 +}; + +ATTRIBUTE_ALIGNED(16) static const juint _Shifter[] = +{ + 0, 1127743488, 0, 3275227136 +}; + +ATTRIBUTE_ALIGNED(16) static const juint _cv[] = +{ + 3884607281, 3168131199, 3607404735, 3190582024, 1874480759, + 1032041131, 4286760334, 1053736893, 4277811695, 3211144770, + 0, 0 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _pv[] = +{ + 236289503, 1064135997, 463583772, 3215696314, 1441186365, + 3212977891, 286331153, 1069617425, 2284589306, 1066820852, + 1431655765, 3218429269 +}; + +ATTRIBUTE_ALIGNED(16) static const juint _T2_neg_f[] = +{ + 0, 1072693248, 0, 0, 1797923801, 1072687577, + 1950547427, 1013229059, 730821105, 1072681922, 2523232743, 1012067188, + 915592468, 1072676282, 352947894, 3161024371, 2174652632, 1072670657, + 4087714590, 1014450259, 35929225, 1072665048, 2809788041, 3159436968, + 2912730644, 1072659453, 3490067722, 3163405074, 2038973688, 1072653874, + 892941374, 1016046459, 1533953344, 1072648310, 769171851, 1015665633, + 1222472308, 1072642761, 1054357470, 3161021018, 929806999, 1072637227, + 3205336643, 1015259557, 481706282, 1072631708, 1696079173, 3162710528, + 3999357479, 1072626203, 2258941616, 1015924724, 2719515920, 1072620714, + 2760332941, 1015137933, 764307441, 1072615240, 3021057420, 3163329523, + 2256325230, 1072609780, 580117746, 1015317295, 2728693978, 1072604335, + 396109971, 3163462691, 2009970496, 1072598905, 2159039665, 3162572948, + 4224142467, 1072593489, 3389820386, 1015207202, 610758006, 1072588089, + 1965209397, 3161866232, 3884662774, 1072582702, 2158611599, 1014210185, + 991358482, 1072577331, 838715019, 3163157668, 351641897, 1072571974, + 2172261526, 3163010599, 1796832535, 1072566631, 3176955716, 3160585513, + 863738719, 1072561303, 1326992220, 3162613197, 1679558232, 1072555989, + 2390342287, 3163333970, 4076975200, 1072550689, 2029000899, 1015208535, + 3594158869, 1072545404, 2456521700, 3163256561, 64696965, 1072540134, + 1768797490, 1015816960, 1912561781, 1072534877, 3147495102, 1015678253, + 382305176, 1072529635, 2347622376, 3162578625, 3898795731, 1072524406, + 1249994144, 1011869818, 3707479175, 1072519192, 3613079303, 1014164738, + 3939148246, 1072513992, 3210352148, 1015274323, 135105010, 1072508807, + 1906148728, 3163375739, 721996136, 1072503635, 563754734, 1015371318, + 1242007932, 1072498477, 1132034716, 3163339831, 1532734324, 1072493333, + 3094216535, 3163162857, 1432208378, 1072488203, 1401068914, 3162363963, + 778901109, 1072483087, 2248183955, 3161268751, 3706687593, 1072477984, + 3521726940, 1013253067, 1464976603, 1072472896, 3507292405, 3161977534, + 2483480501, 1072467821, 1216371780, 1013034172, 2307442995, 1072462760, + 3190117721, 3162404539, 777507147, 1072457713, 4282924205, 1015187533, + 2029714210, 1072452679, 613660079, 1015099143, 1610600570, 1072447659, + 3766732298, 1015760183, 3657065772, 1072442652, 399025623, 3162957078, + 3716502172, 1072437659, 2303740125, 1014042725, 1631695677, 1072432680, + 2717633076, 3162344026, 1540824585, 1072427714, 1064017011, 3163487690, + 3287523847, 1072422761, 1625971539, 3157009955, 2420883922, 1072417822, + 2049810052, 1014119888, 3080351519, 1072412896, 3379126788, 3157218001, + 815859274, 1072407984, 240396590, 3163487443, 4062661092, 1072403084, + 1422616006, 3163255318, 4076559943, 1072398198, 2119478331, 3160758351, + 703710506, 1072393326, 1384660846, 1015195891, 2380618042, 1072388466, + 3149557219, 3163320799, 364333489, 1072383620, 3923737744, 3161421373, + 3092190715, 1072378786, 814012168, 3159523422, 1822067026, 1072373966, + 1241994956, 1015340290, 697153126, 1072369159, 1283515429, 3163283189, + 3861050111, 1072364364, 254893773, 3162813180, 2572866477, 1072359583, + 878562433, 1015521741, 977020788, 1072354815, 3065100517, 1015541563, + 3218338682, 1072350059, 3404164304, 3162477108, 557149882, 1072345317, + 3672720709, 1014537265, 1434058175, 1072340587, 251133233, 1015085769, + 1405169241, 1072335870, 2998539689, 3162830951, 321958744, 1072331166, + 3401933767, 1015794558, 2331271250, 1072326474, 812057446, 1012207446, + 2990417245, 1072321795, 3683467745, 3163369326, 2152073944, 1072317129, + 1486860576, 3163203456, 3964284211, 1072312475, 2111583915, 1015427164, + 3985553595, 1072307834, 4002146062, 1015834136, 2069751141, 1072303206, + 1562170675, 3162724681, 2366108318, 1072298590, 2867985102, 3161762254, + 434316067, 1072293987, 2028358766, 1013458122, 424392917, 1072289396, + 2749202995, 3162838718, 2191782032, 1072284817, 2960257726, 1013742662, + 1297350157, 1072280251, 1308022040, 3163412558, 1892288442, 1072275697, + 2446255666, 3162600381, 3833209506, 1072271155, 2722920684, 1013754842, + 2682146384, 1072266626, 2082178513, 3163363419, 2591453363, 1072262109, + 2132396182, 3159074198, 3418903055, 1072257604, 2527457337, 3160820604, + 727685349, 1072253112, 2038246809, 3162358742, 2966275557, 1072248631, + 2176155324, 3159842759, 1403662306, 1072244163, 2788809599, 3161671007, + 194117574, 1072239707, 777528612, 3163412089, 3492293770, 1072235262, + 2248032210, 1015386826, 2568320822, 1072230830, 2732824428, 1014352915, + 1577608921, 1072226410, 1875489510, 3162968394, 380978316, 1072222002, + 854188970, 3160462686, 3134592888, 1072217605, 4232266862, 1015991134, + 1110089947, 1072213221, 1451641639, 1015474673, 2759350287, 1072208848, + 1148526634, 1015894933, 3649726105, 1072204487, 4085036346, 1015649474, + 3643909174, 1072200138, 3537586109, 1014354647, 2604962541, 1072195801, + 2614425274, 3163539192, 396319521, 1072191476, 4172420816, 3159074632, + 1176749997, 1072187162, 2738998779, 3162035844, 515457527, 1072182860, + 836709333, 1015651226, 2571947539, 1072178569, 3558159064, 3163376669, + 2916157145, 1072174290, 219487565, 1015309367, 1413356050, 1072170023, + 1651349291, 3162668166, 2224145553, 1072165767, 3482522030, 3161489169, + 919555682, 1072161523, 3121969534, 1012948226, 1660913392, 1072157290, + 4218599604, 1015135707, 19972402, 1072153069, 3507899862, 1016009292, + 158781403, 1072148859, 2221464712, 3163286453, 1944781191, 1072144660, + 3993278767, 3161724279, 950803702, 1072140473, 1655364926, 1015237032, + 1339972927, 1072136297, 167908909, 1015572152, 2980802057, 1072132132, + 378619896, 1015773303, 1447192521, 1072127979, 1462857171, 3162514521, + 903334909, 1072123837, 1636462108, 1015039997, 1218806132, 1072119706, + 1818613052, 3162548441, 2263535754, 1072115586, 752233586, 3162639008, + 3907805044, 1072111477, 2257091225, 3161550407, 1727278727, 1072107380, + 3562710623, 1011471940, 4182873220, 1072103293, 629542646, 3161996303, + 2555984613, 1072099218, 2652555442, 3162552692, 1013258799, 1072095154, + 1748797611, 3160129082, 3721688645, 1072091100, 3069276937, 1015839401, + 1963711167, 1072087058, 1744767757, 3160574294, 4201977662, 1072083026, + 748330254, 1013594357, 1719614413, 1072079006, 330458198, 3163282740, + 2979960120, 1072074996, 2599109725, 1014498493, 3561793907, 1072070997, + 1157054053, 1011890350, 3339203574, 1072067009, 1483497780, 3162408754, + 2186617381, 1072063032, 2270764084, 3163272713, 4273770423, 1072059065, + 3383180809, 3163218901, 885834528, 1072055110, 1973258547, 3162261564, + 488188413, 1072051165, 3199821029, 1015564048, 2956612997, 1072047230, + 2118169751, 3162735553, 3872257780, 1072043306, 1253592103, 1015958334, + 3111574537, 1072039393, 2606161479, 3162759746, 551349105, 1072035491, + 3821916050, 3162106589, 363667784, 1072031599, 813753950, 1015785209, + 2425981843, 1072027717, 2830390851, 3163346599, 2321106615, 1072023846, + 2171176610, 1009535771, 4222122499, 1072019985, 1277378074, 3163256737, + 3712504873, 1072016135, 88491949, 1015427660, 671025100, 1072012296, + 3832014351, 3163022030, 3566716925, 1072008466, 1536826856, 1014142433, + 3689071823, 1072004647, 2321004996, 3162552716, 917841882, 1072000839, + 18715565, 1015659308, 3723038930, 1071997040, 378465264, 3162569582, + 3395129871, 1071993252, 4025345435, 3162335388, 4109806887, 1071989474, + 422403966, 1014469229, 1453150082, 1071985707, 498154669, 3161488062, + 3896463087, 1071981949, 1139797873, 3161233805, 2731501122, 1071978202, + 1774031855, 3162470021, 2135241198, 1071974465, 1236747871, 1013589147, + 1990012071, 1071970738, 3529070563, 3162813193, 2178460671, 1071967021, + 777878098, 3162842493, 2583551245, 1071963314, 3161094195, 1015606491, + 3088564500, 1071959617, 1762311517, 1015045673, 3577096743, 1071955930, + 2951496418, 1013793687, 3933059031, 1071952253, 2133366768, 3161531832, + 4040676318, 1071948586, 4090609238, 1015663458, 3784486610, 1071944929, + 1581883040, 3161698953, 3049340112, 1071941282, 3062915824, 1013170595, + 1720398391, 1071937645, 3980678963, 3163300080, 3978100823, 1071934017, + 3513027190, 1015845963, 1118294578, 1071930400, 2197495694, 3159909401, + 1617004845, 1071926792, 82804944, 1010342778, 1065662932, 1071923194, + 2533670915, 1014530238, 3645941911, 1071919605, 3814685081, 3161573341, + 654919306, 1071916027, 3232961757, 3163047469, 569847338, 1071912458, + 472945272, 3159290729, 3278348324, 1071908898, 3069497416, 1014750712, + 78413852, 1071905349, 4183226867, 3163017251, 3743175029, 1071901808, + 2072812490, 3162175075, 1276261410, 1071898278, 300981948, 1014684169, + 1156440435, 1071894757, 2351451249, 1013967056, 3272845541, 1071891245, + 928852419, 3163488248, 3219942644, 1071887743, 3798990616, 1015368806, + 887463927, 1071884251, 3596744163, 3160794166, 460407023, 1071880768, + 4237175092, 3163138469, 1829099622, 1071877294, 1016661181, 3163461005, + 589198666, 1071873830, 2664346172, 3163157962, 926591435, 1071870375, + 3208833762, 3162913514, 2732492859, 1071866929, 2691479646, 3162255684, + 1603444721, 1071863493, 1548633640, 3162201326, 1726216749, 1071860066, + 2466808228, 3161676405, 2992903935, 1071856648, 2218154406, 1015228193, + 1000925746, 1071853240, 1018491672, 3163309544, 4232894513, 1071849840, + 2383938684, 1014668519, 3991843581, 1071846450, 4092853457, 1014585763, + 171030293, 1071843070, 3526460132, 1014428778, 1253935211, 1071839698, + 1395382931, 3159702613, 2839424854, 1071836335, 1171596163, 1013041679, + 526652809, 1071832982, 4223459736, 1015879375, 2799960843, 1071829637, + 1423655381, 1015022151, 964107055, 1071826302, 2800439588, 3162833221, + 3504003472, 1071822975, 3594001060, 3157330652, 1724976915, 1071819658, + 420909223, 3163117379, 4112506593, 1071816349, 2947355221, 1014371048, + 1972484976, 1071813050, 675290301, 3161640050, 3790955393, 1071809759, + 2352942462, 3163180090, 874372905, 1071806478, 100263788, 1015940732, + 1709341917, 1071803205, 2571168217, 1014152499, 1897844341, 1071799941, + 1254300460, 1015275938, 1337108031, 1071796686, 3203724452, 1014677845, + 4219606026, 1071793439, 2434574742, 1014681548, 1853186616, 1071790202, + 3066496371, 1015656574, 2725843665, 1071786973, 1433917087, 1014838523, + 2440944790, 1071783753, 2492769774, 1014147454, 897099801, 1071780542, + 754756297, 1015241005, 2288159958, 1071777339, 2169144469, 1014876021, + 2218315341, 1071774145, 2694295388, 3163288868, 586995997, 1071770960, + 41662348, 3162627992, 1588871207, 1071767783, 143439582, 3162963416, + 828946858, 1071764615, 10642492, 1015939438, 2502433899, 1071761455, + 2148595913, 1015023991, 2214878420, 1071758304, 892270087, 3163116422, + 4162030108, 1071755161, 2763428480, 1015529349, 3949972341, 1071752027, + 2068408548, 1014913868, 1480023343, 1071748902, 2247196168, 1015327453, + 948735466, 1071745785, 3516338028, 3162574883, 2257959872, 1071742676, + 3802946148, 1012964927, 1014845819, 1071739576, 3117910646, 3161559105, + 1416741826, 1071736484, 2196380210, 1011413563, 3366293073, 1071733400, + 3119426314, 1014120554, 2471440686, 1071730325, 968836267, 3162214888, + 2930322912, 1071727258, 2599499422, 3162714047, 351405227, 1071724200, + 3125337328, 3159822479, 3228316108, 1071721149, 3010241991, 3158422804, + 2875075254, 1071718107, 4144233330, 3163333716, 3490863953, 1071715073, + 960797498, 3162948880, 685187902, 1071712048, 378731989, 1014843115, + 2952712987, 1071709030, 3293494651, 3160120301, 1608493509, 1071706021, + 3159622171, 3162807737, 852742562, 1071703020, 667253586, 1009793559, + 590962156, 1071700027, 3829346666, 3163275597, 728909815, 1071697042, + 383930225, 1015029468, 1172597893, 1071694065, 114433263, 1015347593, + 1828292879, 1071691096, 1255956747, 1015588398, 2602514713, 1071688135, + 2268929336, 1014354284, 3402036099, 1071685182, 405889334, 1015105656, + 4133881824, 1071682237, 2148155345, 3162931299, 410360776, 1071679301, + 1269990655, 1011975870, 728934454, 1071676372, 1413842688, 1014178612, + 702412510, 1071673451, 3803266087, 3162280415, 238821257, 1071670538, + 1469694871, 3162884987, 3541402996, 1071667632, 2759177317, 1014854626, + 1928746161, 1071664735, 983617676, 1014285177, 3899555717, 1071661845, + 427280750, 3162546972, 772914124, 1071658964, 4004372762, 1012230161, + 1048019041, 1071656090, 1398474845, 3160510595, 339411585, 1071653224, + 264588982, 3161636657, 2851812149, 1071650365, 2595802551, 1015767337, + 4200250559, 1071647514, 2808127345, 3161781938 +}; + +#define __ _masm-> + +address StubGenerator::generate_libmTanh() { + StubCodeMark mark(this, "StubRoutines", "libmTanh"); + address start = __ pc(); + + Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; + Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1; + Label B1_2, B1_4; + + address HALFMASK = (address)_HALFMASK; + address ONEMASK = (address)_ONEMASK; + address TWOMASK = (address)_TWOMASK; + address MASK3 = (address)_MASK3; + address RMASK = (address)_RMASK; + address L2E = (address)_L2E; + address Shifter = (address)_Shifter; + address cv = (address)_cv; + address pv = (address)_pv; + address T2_neg_f = (address) _T2_neg_f; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ bind(B1_2); + __ movsd(xmm3, ExternalAddress(HALFMASK), r11 /*rscratch*/); + __ xorpd(xmm4, xmm4); + __ movsd(xmm1, ExternalAddress(L2E), r11 /*rscratch*/); + __ movsd(xmm2, ExternalAddress(L2E + 8), r11 /*rscratch*/); + __ movl(rax, 32768); + __ pinsrw(xmm4, rax, 3); + __ movsd(xmm6, ExternalAddress(Shifter), r11 /*rscratch*/); + __ pextrw(rcx, xmm0, 3); + __ andpd(xmm3, xmm0); + __ andnpd(xmm4, xmm0); + __ pshufd(xmm5, xmm4, 68); + __ movl(rdx, 32768); + __ andl(rdx, rcx); + __ andl(rcx, 32767); + __ subl(rcx, 16304); + __ cmpl(rcx, 144); + __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1); + __ subsd(xmm4, xmm3); + __ mulsd(xmm3, xmm1); + __ mulsd(xmm2, xmm5); + __ cvtsd2siq(rax, xmm3); + __ movq(xmm7, xmm3); + __ addsd(xmm3, xmm6); + __ mulsd(xmm1, xmm4); + __ movsd(xmm4, ExternalAddress(ONEMASK), r11 /*rscratch*/); + __ subsd(xmm3, xmm6); + __ xorpd(xmm0, xmm0); + __ addsd(xmm2, xmm1); + __ subsd(xmm7, xmm3); + __ movdqu(xmm6, ExternalAddress(cv), r11 /*rscratch*/); + __ addsd(xmm2, xmm7); + __ movl(rcx, 255); + __ andl(rcx, rax); + __ addl(rcx, rcx); + __ lea(r8, ExternalAddress(T2_neg_f)); + __ movdqu(xmm5, Address(r8, rcx, Address::times(8))); + __ shrl(rax, 4); + __ andl(rax, 65520); + __ subl(rax, 16368); + __ negl(rax); + __ pinsrw(xmm0, rax, 3); + __ movdqu(xmm1, ExternalAddress(cv + 16), r11 /*rscratch*/); + __ pshufd(xmm0, xmm0, 68); + __ mulpd(xmm0, xmm5); + __ movsd(xmm7, ExternalAddress(cv + 32), r11 /*rscratch*/); + __ pshufd(xmm2, xmm2, 68); + __ movq(xmm5, xmm4); + __ addsd(xmm4, xmm0); + __ mulpd(xmm6, xmm2); + __ mulsd(xmm7, xmm2); + __ mulpd(xmm2, xmm2); + __ addpd(xmm1, xmm6); + __ mulsd(xmm2, xmm2); + __ movsd(xmm3, ExternalAddress(ONEMASK), r11 /*rscratch*/); + __ mulpd(xmm1, xmm2); + __ pshufd(xmm6, xmm1, 78); + __ addsd(xmm1, xmm6); + __ movq(xmm6, xmm1); + __ addsd(xmm1, xmm7); + __ mulsd(xmm1, xmm0); + __ addsd(xmm1, xmm4); + __ andpd(xmm4, ExternalAddress(MASK3), r11 /*rscratch*/); + __ divsd(xmm5, xmm1); + __ subsd(xmm3, xmm4); + __ pshufd(xmm1, xmm0, 238); + __ addsd(xmm3, xmm0); + __ movq(xmm2, xmm4); + __ addsd(xmm3, xmm1); + __ mulsd(xmm1, xmm7); + __ mulsd(xmm7, xmm0); + __ addsd(xmm3, xmm1); + __ addsd(xmm4, xmm7); + __ movsd(xmm1, ExternalAddress(RMASK), r11 /*rscratch*/); + __ mulsd(xmm6, xmm0); + __ andpd(xmm4, ExternalAddress(MASK3), r11 /*rscratch*/); + __ addsd(xmm3, xmm6); + __ movq(xmm6, xmm4); + __ subsd(xmm2, xmm4); + __ addsd(xmm2, xmm7); + __ movsd(xmm7, ExternalAddress(ONEMASK), r11 /*rscratch*/); + __ andpd(xmm5, xmm1); + __ addsd(xmm3, xmm2); + __ mulsd(xmm4, xmm5); + __ xorpd(xmm2, xmm2); + __ mulsd(xmm3, xmm5); + __ subsd(xmm6, ExternalAddress(TWOMASK), r11 /*rscratch*/); + __ subsd(xmm4, xmm7); + __ xorl(rdx, 32768); + __ pinsrw(xmm2, rdx, 3); + __ addsd(xmm4, xmm3); + __ mulsd(xmm6, xmm5); + __ movq(xmm1, xmm3); + __ mulsd(xmm3, xmm4); + __ movq(xmm0, xmm6); + __ mulsd(xmm6, xmm4); + __ subsd(xmm1, xmm3); + __ subsd(xmm1, xmm6); + __ addsd(xmm0, xmm1); + __ xorpd(xmm0, xmm2); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_0_0_1); + __ addl(rcx, 960); + __ cmpl(rcx, 1104); + __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1); + __ movdqu(xmm2, ExternalAddress(pv), r11 /*rscratch*/); + __ pshufd(xmm1, xmm0, 68); + __ movdqu(xmm3, ExternalAddress(pv + 16), r11 /*rscratch*/); + __ mulpd(xmm1, xmm1); + __ movdqu(xmm4, ExternalAddress(pv + 32), r11 /*rscratch*/); + __ mulpd(xmm2, xmm1); + __ pshufd(xmm5, xmm1, 68); + __ addpd(xmm2, xmm3); + __ mulsd(xmm5, xmm5); + __ mulpd(xmm2, xmm1); + __ mulsd(xmm5, xmm5); + __ addpd(xmm2, xmm4); + __ mulpd(xmm2, xmm5); + __ pshufd(xmm5, xmm2, 238); + __ addsd(xmm2, xmm5); + __ mulsd(xmm2, xmm0); + __ addsd(xmm0, xmm2); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_1_0_1); + __ addl(rcx, 15344); + __ cmpl(rcx, 16448); + __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1); + __ cmpl(rcx, 16); + __ jcc(Assembler::below, L_2TAG_PACKET_3_0_1); + __ xorpd(xmm2, xmm2); + __ movl(rax, 17392); + __ pinsrw(xmm2, rax, 3); + __ mulsd(xmm2, xmm0); + __ addsd(xmm2, xmm0); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_3_0_1); + __ movq(xmm2, xmm0); + __ mulsd(xmm2, xmm2); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_2_0_1); + __ cmpl(rcx, 32752); + __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1); + __ xorpd(xmm2, xmm2); + __ movl(rcx, 15344); + __ pinsrw(xmm2, rcx, 3); + __ movq(xmm3, xmm2); + __ mulsd(xmm2, xmm2); + __ addsd(xmm2, xmm3); + + __ bind(L_2TAG_PACKET_5_0_1); + __ xorpd(xmm0, xmm0); + __ orl(rdx, 16368); + __ pinsrw(xmm0, rdx, 3); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_4_0_1); + __ movq(xmm2, xmm0); + __ movdl(rax, xmm0); + __ psrlq(xmm2, 20); + __ movdl(rcx, xmm2); + __ orl(rcx, rax); + __ cmpl(rcx, 0); + __ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1); + __ addsd(xmm0, xmm0); + + __ bind(B1_4); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; +} + +#undef __ diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp index ba9eb32e8c13e..75611524e3b0a 100644 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp +++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -373,6 +373,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M // [ lo(arg) ] // [ hi(arg) ] // + if (kind == Interpreter::java_lang_math_tanh) { + return nullptr; + } + if (kind == Interpreter::java_lang_math_fmaD) { if (!UseFMA) { return nullptr; // Generate a vanilla entry diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp index 26eea4c1d6a5f..5ea2d8eba259b 100644 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -465,6 +465,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M } else { __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)); } + } else if (kind == Interpreter::java_lang_math_tanh) { + assert(StubRoutines::dtanh() != nullptr, "not initialized"); + __ movdbl(xmm0, Address(rsp, wordSize)); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtanh()))); } else if (kind == Interpreter::java_lang_math_abs) { assert(StubRoutines::x86::double_sign_mask() != nullptr, "not initialized"); __ movdbl(xmm0, Address(rsp, wordSize)); diff --git a/src/hotspot/cpu/x86/templateTable_x86.cpp b/src/hotspot/cpu/x86/templateTable_x86.cpp index 5e783225fcbfc..527d961259ecc 100644 --- a/src/hotspot/cpu/x86/templateTable_x86.cpp +++ b/src/hotspot/cpu/x86/templateTable_x86.cpp @@ -4048,6 +4048,7 @@ void TemplateTable::_new() { __ push(rcx); // save the contexts of klass for initializing the header // make sure klass is initialized + // init_state needs acquire, but x86 is TSO, and so we are already good. #ifdef _LP64 assert(VM_Version::supports_fast_class_init_checks(), "must support fast class initialization checks"); __ clinit_barrier(rcx, r15_thread, nullptr /*L_fast_path*/, &slow_case); diff --git a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp b/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp index e5075e180d9d6..d795c751d02b5 100644 --- a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp +++ b/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp @@ -24,7 +24,7 @@ #include "precompiled.hpp" #include "prims/upcallLinker.hpp" -address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, +address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, BasicType* out_sig_bt, int total_out_args, BasicType ret_type, jobject jabi, jobject jconv, diff --git a/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp b/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp index 82179f9022e92..bc261bfd93f44 100644 --- a/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp +++ b/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp @@ -23,7 +23,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.hpp" -#include "code/codeBlob.hpp" +#include "classfile/javaClasses.hpp" #include "code/codeBlob.hpp" #include "code/vmreg.inline.hpp" #include "compiler/disassembler.hpp" @@ -169,10 +169,10 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr __ block_comment("} restore_callee_saved_regs "); } -static const int upcall_stub_code_base_size = 1024; +static const int upcall_stub_code_base_size = 1200; static const int upcall_stub_size_per_arg = 16; -address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, +address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, BasicType* out_sig_bt, int total_out_args, BasicType ret_type, jobject jabi, jobject jconv, @@ -281,7 +281,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, __ block_comment("{ on_entry"); __ vzeroupper(); __ lea(c_rarg0, Address(rsp, frame_data_offset)); - __ movptr(c_rarg1, (intptr_t)receiver); // stack already aligned __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, UpcallLinker::on_entry))); __ movptr(r15_thread, rax); @@ -297,12 +296,10 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, arg_shuffle.generate(_masm, shuffle_reg, abi._shadow_space_bytes, 0); __ block_comment("} argument shuffle"); - __ block_comment("{ receiver "); - __ get_vm_result(j_rarg0, r15_thread); - __ block_comment("} receiver "); - - __ mov_metadata(rbx, entry); - __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized + __ block_comment("{ load target "); + __ movptr(j_rarg0, (intptr_t)receiver); + __ call(RuntimeAddress(StubRoutines::upcall_stub_load_target())); // puts target Method* in rbx + __ block_comment("} load target "); __ push_cont_fastpath(); @@ -377,7 +374,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry, #ifndef PRODUCT stringStream ss; - ss.print("upcall_stub_%s", entry->signature()->as_C_string()); + ss.print("upcall_stub_%s", signature->as_C_string()); const char* name = _masm->code_string(ss.freeze()); #else // PRODUCT const char* name = "upcall_stub"; diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 2549feb8a4069..038797924a92d 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -437,6 +437,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ cmpl(rax, 0x80000); __ jcc(Assembler::notEqual, vector_save_restore); +#ifndef PRODUCT bool save_apx = UseAPX; VM_Version::set_apx_cpuFeatures(); UseAPX = true; @@ -453,6 +454,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ movq(Address(rsi, 8), r31); UseAPX = save_apx; +#endif #endif __ bind(vector_save_restore); // diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 2b29dd14e4b27..b55a1208cf2df 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -2457,6 +2457,10 @@ bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { mstack.push(m, Visit); // m = ShiftCntV return true; } + if (is_encode_and_store_pattern(n, m)) { + mstack.push(m, Visit); + return true; + } return false; } diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 1b271683bd60d..fee265473befe 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -4341,6 +4341,7 @@ instruct loadP(rRegP dst, memory mem) // Load Compressed Pointer instruct loadN(rRegN dst, memory mem) %{ + predicate(n->as_Load()->barrier_data() == 0); match(Set dst (LoadN mem)); ins_cost(125); // XXX @@ -5126,6 +5127,7 @@ instruct storeImmP(memory mem, immP31 src) // Store Compressed Pointer instruct storeN(memory mem, rRegN src) %{ + predicate(n->as_Store()->barrier_data() == 0); match(Set mem (StoreN mem src)); ins_cost(125); // XXX @@ -5150,7 +5152,7 @@ instruct storeNKlass(memory mem, rRegN src) instruct storeImmN0(memory mem, immN0 zero) %{ - predicate(CompressedOops::base() == nullptr); + predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0); match(Set mem (StoreN mem zero)); ins_cost(125); // XXX @@ -5163,6 +5165,7 @@ instruct storeImmN0(memory mem, immN0 zero) instruct storeImmN(memory mem, immN src) %{ + predicate(n->as_Store()->barrier_data() == 0); match(Set mem (StoreN mem src)); ins_cost(150); // XXX @@ -7162,6 +7165,7 @@ instruct compareAndSwapN(rRegI res, memory mem_ptr, rax_RegN oldval, rRegN newval, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); @@ -7249,6 +7253,7 @@ instruct compareAndExchangeN( memory mem_ptr, rax_RegN oldval, rRegN newval, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval))); effect(KILL cr); @@ -7470,6 +7475,7 @@ instruct xchgP( memory mem, rRegP newval) %{ %} instruct xchgN( memory mem, rRegN newval) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set newval (GetAndSetN mem newval)); format %{ "XCHGL $newval,$mem]" %} ins_encode %{ @@ -11659,6 +11665,7 @@ instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2) instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem) %{ + predicate(n->in(2)->as_Load()->barrier_data() == 0); match(Set cr (CmpN src (LoadN mem))); format %{ "cmpl $src, $mem\t# compressed ptr" %} @@ -11680,6 +11687,7 @@ instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{ instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src) %{ + predicate(n->in(2)->as_Load()->barrier_data() == 0); match(Set cr (CmpN src (LoadN mem))); format %{ "cmpl $mem, $src\t# compressed ptr" %} @@ -11720,7 +11728,8 @@ instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{ instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero) %{ - predicate(CompressedOops::base() != nullptr); + predicate(CompressedOops::base() != nullptr && + n->in(1)->as_Load()->barrier_data() == 0); match(Set cr (CmpN (LoadN mem) zero)); ins_cost(500); // XXX @@ -11733,7 +11742,8 @@ instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero) instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero) %{ - predicate(CompressedOops::base() == nullptr); + predicate(CompressedOops::base() == nullptr && + n->in(1)->as_Load()->barrier_data() == 0); match(Set cr (CmpN (LoadN mem) zero)); format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %} diff --git a/src/hotspot/cpu/zero/upcallLinker_zero.cpp b/src/hotspot/cpu/zero/upcallLinker_zero.cpp index 6447dac86c915..408ebc328205d 100644 --- a/src/hotspot/cpu/zero/upcallLinker_zero.cpp +++ b/src/hotspot/cpu/zero/upcallLinker_zero.cpp @@ -24,7 +24,7 @@ #include "precompiled.hpp" #include "prims/upcallLinker.hpp" -address UpcallLinker::make_upcall_stub(jobject mh, Method* entry, +address UpcallLinker::make_upcall_stub(jobject mh, Symbol* signature, BasicType* out_sig_bt, int total_out_args, BasicType ret_type, jobject jabi, jobject jconv, diff --git a/src/hotspot/cpu/zero/vm_version_zero.cpp b/src/hotspot/cpu/zero/vm_version_zero.cpp index 1fcf4b1086253..7312dd116468c 100644 --- a/src/hotspot/cpu/zero/vm_version_zero.cpp +++ b/src/hotspot/cpu/zero/vm_version_zero.cpp @@ -116,11 +116,6 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); } - if ((LockingMode != LM_LEGACY) && (LockingMode != LM_MONITOR)) { - warning("Unsupported locking mode for this CPU."); - FLAG_SET_DEFAULT(LockingMode, LM_LEGACY); - } - // Enable error context decoding on known platforms #if defined(IA32) || defined(AMD64) || defined(ARM) || \ defined(AARCH64) || defined(PPC) || defined(RISCV) || \ diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp index 2b53042ef1017..aab43e733964e 100644 --- a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp +++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp @@ -485,26 +485,30 @@ int ZeroInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) { // Unlock if necessary if (monitor) { - BasicLock *lock = monitor->lock(); - markWord header = lock->displaced_header(); - oop rcvr = monitor->obj(); - monitor->set_obj(nullptr); - - bool dec_monitor_count = true; - if (header.to_pointer() != nullptr) { - markWord old_header = markWord::encode(lock); - if (rcvr->cas_set_mark(header, old_header) != old_header) { - monitor->set_obj(rcvr); - dec_monitor_count = false; - InterpreterRuntime::monitorexit(monitor); + bool success = false; + if (LockingMode == LM_LEGACY) { + BasicLock* lock = monitor->lock(); + oop rcvr = monitor->obj(); + monitor->set_obj(nullptr); + success = true; + markWord header = lock->displaced_header(); + if (header.to_pointer() != nullptr) { // Check for recursive lock + markWord old_header = markWord::encode(lock); + if (rcvr->cas_set_mark(header, old_header) != old_header) { + monitor->set_obj(rcvr); + success = false; + } + } + if (success) { + THREAD->dec_held_monitor_count(); } } - if (dec_monitor_count) { - THREAD->dec_held_monitor_count(); + if (!success) { + InterpreterRuntime::monitorexit(monitor); } } - unwind_and_return: + unwind_and_return: // Unwind the current activation thread->pop_zero_frame(); diff --git a/src/hotspot/os/aix/osThread_aix.cpp b/src/hotspot/os/aix/osThread_aix.cpp index 4049d6b58b777..ab08a766156fe 100644 --- a/src/hotspot/os/aix/osThread_aix.cpp +++ b/src/hotspot/os/aix/osThread_aix.cpp @@ -23,32 +23,27 @@ * */ -// no precompiled headers - -#include "memory/allocation.inline.hpp" -#include "runtime/handles.inline.hpp" -#include "runtime/mutexLocker.hpp" -#include "runtime/os.hpp" +#include "precompiled.hpp" +#include "memory/allocation.hpp" +#include "runtime/mutex.hpp" #include "runtime/osThread.hpp" -#include "runtime/safepoint.hpp" -#include "runtime/vmThread.hpp" - -void OSThread::pd_initialize() { - _thread_id = 0; - _kernel_thread_id = 0; - _siginfo = nullptr; - _ucontext = nullptr; - _expanding_stack = 0; - _alt_sig_stack = nullptr; - _last_cpu_times.sys = _last_cpu_times.user = 0L; +#include +OSThread::OSThread() + : _thread_id(0), + _thread_type(), + _kernel_thread_id(0), + _caller_sigmask(), + sr(), + _siginfo(nullptr), + _ucontext(nullptr), + _expanding_stack(0), + _alt_sig_stack(nullptr), + _startThread_lock(new Monitor(Mutex::event, "startThread_lock")) { sigemptyset(&_caller_sigmask); - - _startThread_lock = new Monitor(Mutex::event, "startThread_lock"); - assert(_startThread_lock != nullptr, "check"); } -void OSThread::pd_destroy() { +OSThread::~OSThread() { delete _startThread_lock; } diff --git a/src/hotspot/os/aix/osThread_aix.hpp b/src/hotspot/os/aix/osThread_aix.hpp index 5feb3c5799aa0..8f3799d070142 100644 --- a/src/hotspot/os/aix/osThread_aix.hpp +++ b/src/hotspot/os/aix/osThread_aix.hpp @@ -26,22 +26,17 @@ #ifndef OS_AIX_OSTHREAD_AIX_HPP #define OS_AIX_OSTHREAD_AIX_HPP - public: - typedef pthread_t thread_id_t; +#include "runtime/osThreadBase.hpp" +#include "suspendResume_posix.hpp" +#include "utilities/globalDefinitions.hpp" - private: - int _thread_type; +class OSThread : public OSThreadBase { + friend class VMStructs; - public: - - int thread_type() const { - return _thread_type; - } - void set_thread_type(int type) { - _thread_type = type; - } + typedef pthread_t thread_id_t; - private: + thread_id_t _thread_id; + int _thread_type; // On AIX, we use the pthread id as OSThread::thread_id and keep the kernel thread id // separately for diagnostic purposes. @@ -54,15 +49,27 @@ sigset_t _caller_sigmask; // Caller's signal mask public: + OSThread(); + ~OSThread(); + + int thread_type() const { + return _thread_type; + } + void set_thread_type(int type) { + _thread_type = type; + } // Methods to save/restore caller's signal mask sigset_t caller_sigmask() const { return _caller_sigmask; } void set_caller_sigmask(sigset_t sigmask) { _caller_sigmask = sigmask; } -#ifndef PRODUCT - // Used for debugging, return a unique integer for each thread. - int thread_identifier() const { return _thread_id; } -#endif + thread_id_t thread_id() const { + return _thread_id; + } + void set_thread_id(thread_id_t id) { + _thread_id = id; + } + tid_t kernel_thread_id() const { return _kernel_thread_id; } @@ -71,7 +78,7 @@ } pthread_t pthread_id() const { - // Here: same as OSThread::thread_id() + // Here: same as thread_id() return _thread_id; } @@ -79,7 +86,6 @@ // suspension support. // *************************************************************** - public: // flags that support signal based suspend/resume on Aix are in a // separate class to avoid confusion with many flags in OSThread that // are used by VM level suspend/resume. @@ -125,22 +131,10 @@ return _startThread_lock; } - // *************************************************************** - // Platform dependent initialization and cleanup - // *************************************************************** - - private: - - void pd_initialize(); - void pd_destroy(); - - public: - - // The last measured values of cpu timing to prevent the "stale - // value return" bug in thread_cpu_time. - volatile struct { - jlong sys; - jlong user; - } _last_cpu_times; + // Printing + uintx thread_id_for_printing() const override { + return (uintx)_thread_id; + } +}; #endif // OS_AIX_OSTHREAD_AIX_HPP diff --git a/src/hotspot/os/aix/vmStructs_aix.hpp b/src/hotspot/os/aix/vmStructs_aix.hpp index 1a2f4c4bf6e21..f3bbc80e62c72 100644 --- a/src/hotspot/os/aix/vmStructs_aix.hpp +++ b/src/hotspot/os/aix/vmStructs_aix.hpp @@ -29,9 +29,20 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) +#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, pthread_t) \ + +#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_unsigned_integer_type(pthread_t) #define VM_INT_CONSTANTS_OS(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp b/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp index 29825a9eab291..2e56c092a79b5 100644 --- a/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp +++ b/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp @@ -22,10 +22,10 @@ */ #include "precompiled.hpp" -#include "gc/shared/gcLogPrecious.hpp" #include "gc/z/zAddress.inline.hpp" #include "gc/z/zErrno.hpp" #include "gc/z/zGlobals.hpp" +#include "gc/z/zInitialize.hpp" #include "gc/z/zLargePages.inline.hpp" #include "gc/z/zPhysicalMemory.inline.hpp" #include "gc/z/zPhysicalMemoryBacking_bsd.hpp" @@ -82,7 +82,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity) _base = (uintptr_t)os::reserve_memory(max_capacity); if (_base == 0) { // Failed - log_error_pd(gc)("Failed to reserve address space for backing memory"); + ZInitialize::error("Failed to reserve address space for backing memory"); return; } diff --git a/src/hotspot/os/bsd/osThread_bsd.cpp b/src/hotspot/os/bsd/osThread_bsd.cpp index 7b9ad1f76a855..4080ea1bf297b 100644 --- a/src/hotspot/os/bsd/osThread_bsd.cpp +++ b/src/hotspot/os/bsd/osThread_bsd.cpp @@ -22,30 +22,32 @@ * */ -// no precompiled headers -#include "memory/allocation.inline.hpp" -#include "runtime/mutexLocker.hpp" +#include "precompiled.hpp" +#include "memory/allocation.hpp" +#include "runtime/mutex.hpp" #include "runtime/osThread.hpp" #include -void OSThread::pd_initialize() { +OSThread::OSThread() + : _thread_id( #ifdef __APPLE__ - _thread_id = 0; + 0 #else - _thread_id = nullptr; + nullptr #endif - _unique_thread_id = 0; - _pthread_id = nullptr; - _siginfo = nullptr; - _ucontext = nullptr; - _expanding_stack = 0; - _alt_sig_stack = nullptr; - + ), + _thread_type(), + _pthread_id(nullptr), + _unique_thread_id(0), + _caller_sigmask(), + sr(), + _siginfo(nullptr), + _ucontext(nullptr), + _expanding_stack(0), + _alt_sig_stack(nullptr), + _startThread_lock(new Monitor(Mutex::event, "startThread_lock")) { sigemptyset(&_caller_sigmask); - - _startThread_lock = new Monitor(Mutex::event, "startThread_lock"); - assert(_startThread_lock !=nullptr, "check"); } // Additional thread_id used to correlate threads in SA @@ -64,6 +66,6 @@ void OSThread::set_unique_thread_id() { #endif } -void OSThread::pd_destroy() { +OSThread::~OSThread() { delete _startThread_lock; } diff --git a/src/hotspot/os/bsd/osThread_bsd.hpp b/src/hotspot/os/bsd/osThread_bsd.hpp index 11376835063c4..e54e7195f9870 100644 --- a/src/hotspot/os/bsd/osThread_bsd.hpp +++ b/src/hotspot/os/bsd/osThread_bsd.hpp @@ -25,19 +25,12 @@ #ifndef OS_BSD_OSTHREAD_BSD_HPP #define OS_BSD_OSTHREAD_BSD_HPP - private: - int _thread_type; +#include "runtime/osThreadBase.hpp" +#include "suspendResume_posix.hpp" +#include "utilities/globalDefinitions.hpp" - public: - - int thread_type() const { - return _thread_type; - } - void set_thread_type(int type) { - _thread_type = type; - } - - private: +class OSThread : public OSThreadBase { + friend class VMStructs; #ifdef __APPLE__ typedef thread_t thread_id_t; @@ -45,6 +38,9 @@ typedef pid_t thread_id_t; #endif + thread_id_t _thread_id; + int _thread_type; + // _pthread_id is the pthread id, which is used by library calls // (e.g. pthread_kill). pthread_t _pthread_id; @@ -57,15 +53,26 @@ sigset_t _caller_sigmask; // Caller's signal mask public: + OSThread(); + ~OSThread(); + + int thread_type() const { + return _thread_type; + } + void set_thread_type(int type) { + _thread_type = type; + } // Methods to save/restore caller's signal mask sigset_t caller_sigmask() const { return _caller_sigmask; } void set_caller_sigmask(sigset_t sigmask) { _caller_sigmask = sigmask; } -#ifndef PRODUCT - // Used for debugging, return a unique integer for each thread. - intptr_t thread_identifier() const { return (intptr_t)_pthread_id; } -#endif + thread_id_t thread_id() const { + return _thread_id; + } + void set_thread_id(thread_id_t id) { + _thread_id = id; + } pthread_t pthread_id() const { return _pthread_id; @@ -80,7 +87,6 @@ // suspension support. // *************************************************************** -public: // flags that support signal based suspend/resume on Bsd are in a // separate class to avoid confusion with many flags in OSThread that // are used by VM level suspend/resume. @@ -126,17 +132,9 @@ return _startThread_lock; } - // *************************************************************** - // Platform dependent initialization and cleanup - // *************************************************************** - -private: - - void pd_initialize(); - void pd_destroy(); - -// Reconciliation History -// osThread_solaris.hpp 1.24 99/08/27 13:11:54 -// End + uintx thread_id_for_printing() const override { + return (uintx)_thread_id; + } +}; #endif // OS_BSD_OSTHREAD_BSD_HPP diff --git a/src/hotspot/os/bsd/vmStructs_bsd.hpp b/src/hotspot/os/bsd/vmStructs_bsd.hpp index 84c1be77374d0..8c9c132e1c25c 100644 --- a/src/hotspot/os/bsd/vmStructs_bsd.hpp +++ b/src/hotspot/os/bsd/vmStructs_bsd.hpp @@ -31,9 +31,21 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) +#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ + nonstatic_field(OSThread, _unique_thread_id, uint64_t) + +#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Thread IDs */ \ + /**********************/ \ + \ + declare_unsigned_integer_type(OSThread::thread_id_t) #define VM_INT_CONSTANTS_OS(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp b/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp index 527573644a816..56dcadd670f82 100644 --- a/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp +++ b/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp @@ -64,16 +64,16 @@ class CgroupV2CpuController: public CgroupCpuController { bool is_read_only() override { return reader()->is_read_only(); } - const char* subsystem_path() { + const char* subsystem_path() override { return reader()->subsystem_path(); } bool needs_hierarchy_adjustment() override { return reader()->needs_hierarchy_adjustment(); } - void set_subsystem_path(const char* cgroup_path) { + void set_subsystem_path(const char* cgroup_path) override { reader()->set_subsystem_path(cgroup_path); } - const char* mount_point() { return reader()->mount_point(); } + const char* mount_point() override { return reader()->mount_point(); } const char* cgroup_path() override { return reader()->cgroup_path(); } }; @@ -97,16 +97,16 @@ class CgroupV2MemoryController final: public CgroupMemoryController { bool is_read_only() override { return reader()->is_read_only(); } - const char* subsystem_path() { + const char* subsystem_path() override { return reader()->subsystem_path(); } bool needs_hierarchy_adjustment() override { return reader()->needs_hierarchy_adjustment(); } - void set_subsystem_path(const char* cgroup_path) { + void set_subsystem_path(const char* cgroup_path) override { reader()->set_subsystem_path(cgroup_path); } - const char* mount_point() { return reader()->mount_point(); } + const char* mount_point() override { return reader()->mount_point(); } const char* cgroup_path() override { return reader()->cgroup_path(); } }; diff --git a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp index f967fee930579..b648876ac602c 100644 --- a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp +++ b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp @@ -27,6 +27,7 @@ #include "gc/z/zArray.inline.hpp" #include "gc/z/zErrno.hpp" #include "gc/z/zGlobals.hpp" +#include "gc/z/zInitialize.hpp" #include "gc/z/zLargePages.inline.hpp" #include "gc/z/zMountPoint_linux.hpp" #include "gc/z/zNUMA.inline.hpp" @@ -129,6 +130,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity) // Create backing file _fd = create_fd(ZFILENAME_HEAP); if (_fd == -1) { + ZInitialize::error("Failed to create heap backing file"); return; } @@ -136,7 +138,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity) while (ftruncate(_fd, max_capacity) == -1) { if (errno != EINTR) { ZErrno err; - log_error_p(gc)("Failed to truncate backing file (%s)", err.to_string()); + ZInitialize::error("Failed to truncate backing file (%s)", err.to_string()); return; } } @@ -145,7 +147,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity) struct statfs buf; if (fstatfs(_fd, &buf) == -1) { ZErrno err; - log_error_p(gc)("Failed to determine filesystem type for backing file (%s)", err.to_string()); + ZInitialize::error("Failed to determine filesystem type for backing file (%s)", err.to_string()); return; } @@ -158,39 +160,39 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity) // Make sure the filesystem type matches requested large page type if (ZLargePages::is_transparent() && !is_tmpfs()) { - log_error_p(gc)("-XX:+UseTransparentHugePages can only be enabled when using a %s filesystem", - ZFILESYSTEM_TMPFS); + ZInitialize::error("-XX:+UseTransparentHugePages can only be enabled when using a %s filesystem", + ZFILESYSTEM_TMPFS); return; } if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) { - log_error_p(gc)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel", - ZFILESYSTEM_TMPFS); + ZInitialize::error("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel", + ZFILESYSTEM_TMPFS); return; } if (ZLargePages::is_explicit() && !is_hugetlbfs()) { - log_error_p(gc)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled " - "when using a %s filesystem", ZFILESYSTEM_HUGETLBFS); + ZInitialize::error("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled " + "when using a %s filesystem", ZFILESYSTEM_HUGETLBFS); return; } if (!ZLargePages::is_explicit() && is_hugetlbfs()) { - log_error_p(gc)("-XX:+UseLargePages must be enabled when using a %s filesystem", - ZFILESYSTEM_HUGETLBFS); + ZInitialize::error("-XX:+UseLargePages must be enabled when using a %s filesystem", + ZFILESYSTEM_HUGETLBFS); return; } // Make sure the filesystem block size is compatible if (ZGranuleSize % _block_size != 0) { - log_error_p(gc)("Filesystem backing the heap has incompatible block size (" SIZE_FORMAT ")", - _block_size); + ZInitialize::error("Filesystem backing the heap has incompatible block size (" SIZE_FORMAT ")", + _block_size); return; } if (is_hugetlbfs() && _block_size != ZGranuleSize) { - log_error_p(gc)("%s filesystem has unexpected block size " SIZE_FORMAT " (expected " SIZE_FORMAT ")", - ZFILESYSTEM_HUGETLBFS, _block_size, ZGranuleSize); + ZInitialize::error("%s filesystem has unexpected block size " SIZE_FORMAT " (expected " SIZE_FORMAT ")", + ZFILESYSTEM_HUGETLBFS, _block_size, ZGranuleSize); return; } diff --git a/src/hotspot/os/linux/osThread_linux.cpp b/src/hotspot/os/linux/osThread_linux.cpp index 9c77cb32f6d1c..3dd6e3bbcd15c 100644 --- a/src/hotspot/os/linux/osThread_linux.cpp +++ b/src/hotspot/os/linux/osThread_linux.cpp @@ -22,27 +22,27 @@ * */ -// no precompiled headers -#include "memory/allocation.inline.hpp" +#include "precompiled.hpp" +#include "memory/allocation.hpp" #include "runtime/mutex.hpp" #include "runtime/osThread.hpp" #include -void OSThread::pd_initialize() { - _thread_id = 0; - _pthread_id = 0; - _siginfo = nullptr; - _ucontext = nullptr; - _expanding_stack = 0; - _alt_sig_stack = nullptr; - +OSThread::OSThread() + : _thread_id(0), + _thread_type(), + _pthread_id(0), + _caller_sigmask(), + sr(), + _siginfo(nullptr), + _ucontext(nullptr), + _expanding_stack(0), + _alt_sig_stack(nullptr), + _startThread_lock(new Monitor(Mutex::event, "startThread_lock")) { sigemptyset(&_caller_sigmask); - - _startThread_lock = new Monitor(Mutex::event, "startThread_lock"); - assert(_startThread_lock !=nullptr, "check"); } -void OSThread::pd_destroy() { +OSThread::~OSThread() { delete _startThread_lock; } diff --git a/src/hotspot/os/linux/osThread_linux.hpp b/src/hotspot/os/linux/osThread_linux.hpp index a849673af62db..f8dfd5a213bbb 100644 --- a/src/hotspot/os/linux/osThread_linux.hpp +++ b/src/hotspot/os/linux/osThread_linux.hpp @@ -24,13 +24,28 @@ #ifndef OS_LINUX_OSTHREAD_LINUX_HPP #define OS_LINUX_OSTHREAD_LINUX_HPP - public: + +#include "runtime/osThreadBase.hpp" +#include "suspendResume_posix.hpp" +#include "utilities/globalDefinitions.hpp" + +class OSThread : public OSThreadBase { + friend class VMStructs; + typedef pid_t thread_id_t; - private: + thread_id_t _thread_id; int _thread_type; + // _pthread_id is the pthread id, which is used by library calls + // (e.g. pthread_kill). + pthread_t _pthread_id; + + sigset_t _caller_sigmask; // Caller's signal mask + public: + OSThread(); + ~OSThread(); int thread_type() const { return _thread_type; @@ -39,22 +54,16 @@ _thread_type = type; } - // _pthread_id is the pthread id, which is used by library calls - // (e.g. pthread_kill). - pthread_t _pthread_id; - - sigset_t _caller_sigmask; // Caller's signal mask - - public: - // Methods to save/restore caller's signal mask sigset_t caller_sigmask() const { return _caller_sigmask; } void set_caller_sigmask(sigset_t sigmask) { _caller_sigmask = sigmask; } -#ifndef PRODUCT - // Used for debugging, return a unique integer for each thread. - int thread_identifier() const { return _thread_id; } -#endif + thread_id_t thread_id() const { + return _thread_id; + } + void set_thread_id(thread_id_t id) { + _thread_id = id; + } pthread_t pthread_id() const { return _pthread_id; @@ -67,7 +76,6 @@ // suspension support. // *************************************************************** -public: // flags that support signal based suspend/resume on Linux are in a // separate class to avoid confusion with many flags in OSThread that // are used by VM level suspend/resume. @@ -113,17 +121,10 @@ return _startThread_lock; } - // *************************************************************** - // Platform dependent initialization and cleanup - // *************************************************************** - -private: - - void pd_initialize(); - void pd_destroy(); - -// Reconciliation History -// osThread_solaris.hpp 1.24 99/08/27 13:11:54 -// End + // Printing + uintx thread_id_for_printing() const override { + return (uintx)_thread_id; + } +}; #endif // OS_LINUX_OSTHREAD_LINUX_HPP diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp index 609317df45fc2..c9968fc9f3580 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp @@ -817,7 +817,7 @@ static void *thread_native_entry(Thread *thread) { OSThread* osthread = thread->osthread(); Monitor* sync = osthread->startThread_lock(); - osthread->set_thread_id(checked_cast(os::current_thread_id())); + osthread->set_thread_id(checked_cast(os::current_thread_id())); if (UseNUMA) { int lgrp_id = os::numa_get_group_id(); diff --git a/src/hotspot/os/linux/vmStructs_linux.hpp b/src/hotspot/os/linux/vmStructs_linux.hpp index 818f6bb188fe8..3b82ac58ac697 100644 --- a/src/hotspot/os/linux/vmStructs_linux.hpp +++ b/src/hotspot/os/linux/vmStructs_linux.hpp @@ -31,9 +31,22 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) +#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, pid_t) \ + nonstatic_field(OSThread, _pthread_id, pthread_t) + +#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_integer_type(pid_t) \ + declare_unsigned_integer_type(pthread_t) #define VM_INT_CONSTANTS_OS(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os/windows/globals_windows.hpp b/src/hotspot/os/windows/globals_windows.hpp index 78cbac6e9ccc5..8f0a6261cc0db 100644 --- a/src/hotspot/os/windows/globals_windows.hpp +++ b/src/hotspot/os/windows/globals_windows.hpp @@ -38,6 +38,10 @@ product(bool, UseAllWindowsProcessorGroups, false, \ "Use all processor groups on supported Windows versions") \ \ +product(bool, EnableAllLargePageSizesForWindows, false, \ + "Enable support for multiple large page sizes on " \ + "Windows Server") \ + \ product(bool, UseOSErrorReporting, false, \ "Let VM fatal error propagate to the OS (ie. WER on Windows)") diff --git a/src/hotspot/os/windows/osThread_windows.cpp b/src/hotspot/os/windows/osThread_windows.cpp index 5f369bb7aa05b..922b4b0104be4 100644 --- a/src/hotspot/os/windows/osThread_windows.cpp +++ b/src/hotspot/os/windows/osThread_windows.cpp @@ -22,17 +22,17 @@ * */ -// no precompiled headers -#include "runtime/os.hpp" +#include "precompiled.hpp" #include "runtime/osThread.hpp" -void OSThread::pd_initialize() { - set_thread_handle(nullptr); - set_thread_id(0); - set_interrupt_event(nullptr); -} +#include + +OSThread::OSThread() + : _thread_id(0), + _thread_handle(nullptr), + _interrupt_event(nullptr) {} -void OSThread::pd_destroy() { +OSThread::~OSThread() { if (_interrupt_event != nullptr) { CloseHandle(_interrupt_event); } diff --git a/src/hotspot/os/windows/osThread_windows.hpp b/src/hotspot/os/windows/osThread_windows.hpp index 5bd07646b1718..e54783aef1c15 100644 --- a/src/hotspot/os/windows/osThread_windows.hpp +++ b/src/hotspot/os/windows/osThread_windows.hpp @@ -25,17 +25,29 @@ #ifndef OS_WINDOWS_OSTHREAD_WINDOWS_HPP #define OS_WINDOWS_OSTHREAD_WINDOWS_HPP - typedef void* HANDLE; - public: +#include "runtime/osThreadBase.hpp" +#include "utilities/globalDefinitions.hpp" + +class OSThread : public OSThreadBase { + friend class VMStructs; + typedef unsigned long thread_id_t; + typedef void* HANDLE; + + thread_id_t _thread_id; - private: // Win32-specific thread information HANDLE _thread_handle; // Win32 thread handle HANDLE _interrupt_event; // Event signalled on thread interrupt for use by // Process.waitFor(). public: + OSThread(); + ~OSThread(); + + thread_id_t thread_id() const { return _thread_id; } + void set_thread_id(thread_id_t id) { _thread_id = id; } + // The following will only apply in the Win32 implementation, and should only // be visible in the concrete class, not this which should be an abstract base class HANDLE thread_handle() const { return _thread_handle; } @@ -45,13 +57,9 @@ // This is specialized on Windows to interact with the _interrupt_event. void set_interrupted(bool z); -#ifndef PRODUCT - // Used for debugging, return a unique integer for each thread. - int thread_identifier() const { return _thread_id; } -#endif - - private: - void pd_initialize(); - void pd_destroy(); + uintx thread_id_for_printing() const override { + return (uintx)_thread_id; + } +}; #endif // OS_WINDOWS_OSTHREAD_WINDOWS_HPP diff --git a/src/hotspot/os/windows/os_windows.cpp b/src/hotspot/os/windows/os_windows.cpp index ed62fe612aae3..0d5727b98f443 100644 --- a/src/hotspot/os/windows/os_windows.cpp +++ b/src/hotspot/os/windows/os_windows.cpp @@ -63,6 +63,7 @@ #include "runtime/sharedRuntime.hpp" #include "runtime/statSampler.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/suspendedThreadTask.hpp" #include "runtime/threadCritical.hpp" #include "runtime/threads.hpp" #include "runtime/timer.hpp" @@ -3126,7 +3127,7 @@ class NUMANodeListHolder { static size_t _large_page_size = 0; -static bool request_lock_memory_privilege() { +bool os::win32::request_lock_memory_privilege() { HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, os::current_process_id()); @@ -3310,14 +3311,14 @@ static char* allocate_pages_individually(size_t bytes, char* addr, DWORD flags, return p_buf; } -static size_t large_page_init_decide_size() { +size_t os::win32::large_page_init_decide_size() { // print a warning if any large page related flag is specified on command line bool warn_on_failure = !FLAG_IS_DEFAULT(UseLargePages) || !FLAG_IS_DEFAULT(LargePageSizeInBytes); -#define WARN(msg) if (warn_on_failure) { warning(msg); } +#define WARN(...) if (warn_on_failure) { warning(__VA_ARGS__); } - if (!request_lock_memory_privilege()) { + if (!os::win32::request_lock_memory_privilege()) { WARN("JVM cannot use large page memory because it does not have enough privilege to lock pages in memory."); return 0; } @@ -3328,15 +3329,26 @@ static size_t large_page_init_decide_size() { return 0; } -#if defined(IA32) || defined(AMD64) - if (size > 4*M || LargePageSizeInBytes > 4*M) { +#if defined(IA32) + if (size > 4 * M || LargePageSizeInBytes > 4 * M) { WARN("JVM cannot use large pages bigger than 4mb."); return 0; } +#elif defined(AMD64) + if (!EnableAllLargePageSizesForWindows) { + if (size > 4 * M || LargePageSizeInBytes > 4 * M) { + WARN("JVM cannot use large pages bigger than 4mb."); + return 0; + } + } #endif - if (LargePageSizeInBytes > 0 && LargePageSizeInBytes % size == 0) { - size = LargePageSizeInBytes; + if (LargePageSizeInBytes > 0) { + if (LargePageSizeInBytes % size == 0) { + size = LargePageSizeInBytes; + } else { + WARN("The specified large page size (%d) is not a multiple of the minimum large page size (%d), defaulting to minimum page size.", LargePageSizeInBytes, size); + } } #undef WARN @@ -3349,12 +3361,23 @@ void os::large_page_init() { return; } - _large_page_size = large_page_init_decide_size(); + _large_page_size = os::win32::large_page_init_decide_size(); const size_t default_page_size = os::vm_page_size(); if (_large_page_size > default_page_size) { +#if !defined(IA32) + if (EnableAllLargePageSizesForWindows) { + size_t min_size = GetLargePageMinimum(); + + // Populate _page_sizes with large page sizes less than or equal to _large_page_size, ensuring each page size is double the size of the previous one. + for (size_t page_size = min_size; page_size < _large_page_size; page_size *= 2) { + _page_sizes.add(page_size); + } + } +#endif + _page_sizes.add(_large_page_size); } - + // Set UseLargePages based on whether a large page size was successfully determined UseLargePages = _large_page_size != 0; } @@ -3618,7 +3641,6 @@ static char* reserve_large_pages_aligned(size_t size, size_t alignment, bool exe char* os::pd_reserve_memory_special(size_t bytes, size_t alignment, size_t page_size, char* addr, bool exec) { assert(UseLargePages, "only for large pages"); - assert(page_size == os::large_page_size(), "Currently only support one large page size on Windows"); assert(is_aligned(addr, alignment), "Must be"); assert(is_aligned(addr, page_size), "Must be"); @@ -3627,11 +3649,17 @@ char* os::pd_reserve_memory_special(size_t bytes, size_t alignment, size_t page_ return nullptr; } + // Ensure GetLargePageMinimum() returns a valid positive value + size_t large_page_min = GetLargePageMinimum(); + if (large_page_min <= 0) { + return nullptr; + } + // The requested alignment can be larger than the page size, for example with G1 // the alignment is bound to the heap region size. So this reservation needs to // ensure that the requested alignment is met. When there is a requested address // this solves it self, since it must be properly aligned already. - if (addr == nullptr && alignment > page_size) { + if (addr == nullptr && alignment > large_page_min) { return reserve_large_pages_aligned(bytes, alignment, exec); } @@ -4093,6 +4121,39 @@ int os::win32::_build_minor = 0; bool os::win32::_processor_group_warning_displayed = false; bool os::win32::_job_object_processor_group_warning_displayed = false; +void getWindowsInstallationType(char* buffer, int bufferSize) { + HKEY hKey; + const char* subKey = "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion"; + const char* valueName = "InstallationType"; + + DWORD valueLength = bufferSize; + + // Initialize buffer with empty string + buffer[0] = '\0'; + + // Open the registry key + if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, subKey, 0, KEY_READ, &hKey) != ERROR_SUCCESS) { + // Return empty buffer if key cannot be opened + return; + } + + // Query the value + if (RegQueryValueExA(hKey, valueName, NULL, NULL, (LPBYTE)buffer, &valueLength) != ERROR_SUCCESS) { + RegCloseKey(hKey); + buffer[0] = '\0'; + return; + } + + RegCloseKey(hKey); +} + +bool isNanoServer() { + const int BUFFER_SIZE = 256; + char installationType[BUFFER_SIZE]; + getWindowsInstallationType(installationType, BUFFER_SIZE); + return (strcmp(installationType, "Nano Server") == 0); +} + void os::win32::initialize_windows_version() { assert(_major_version == 0, "windows version already initialized."); @@ -4110,7 +4171,13 @@ void os::win32::initialize_windows_version() { warning("Attempt to determine system directory failed: %s", buf_len != 0 ? error_msg_buffer : ""); return; } - strncat(kernel32_path, "\\kernel32.dll", MAX_PATH - ret); + + if (isNanoServer()) { + // On Windows Nanoserver the kernel32.dll is located in the forwarders subdirectory + strncat(kernel32_path, "\\forwarders\\kernel32.dll", MAX_PATH - ret); + } else { + strncat(kernel32_path, "\\kernel32.dll", MAX_PATH - ret); + } DWORD version_size = GetFileVersionInfoSize(kernel32_path, nullptr); if (version_size == 0) { @@ -5926,7 +5993,7 @@ static void do_resume(HANDLE* h) { // retrieve a suspend/resume context capable handle // from the tid. Caller validates handle return value. void get_thread_handle_for_extended_context(HANDLE* h, - OSThread::thread_id_t tid) { + DWORD tid) { if (h != nullptr) { *h = OpenThread(THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT | THREAD_QUERY_INFORMATION, FALSE, tid); } diff --git a/src/hotspot/os/windows/os_windows.hpp b/src/hotspot/os/windows/os_windows.hpp index 3bc5ab9eef1f3..1d5237243000b 100644 --- a/src/hotspot/os/windows/os_windows.hpp +++ b/src/hotspot/os/windows/os_windows.hpp @@ -65,6 +65,8 @@ class os::win32 { static void setmode_streams(); static bool is_windows_11_or_greater(); static bool is_windows_server_2022_or_greater(); + static bool request_lock_memory_privilege(); + static size_t large_page_init_decide_size(); static int windows_major_version() { assert(_major_version > 0, "windows version not initialized."); return _major_version; diff --git a/src/hotspot/os/windows/vmStructs_windows.hpp b/src/hotspot/os/windows/vmStructs_windows.hpp index 2550e685f16e2..93f4ea7c8111d 100644 --- a/src/hotspot/os/windows/vmStructs_windows.hpp +++ b/src/hotspot/os/windows/vmStructs_windows.hpp @@ -29,9 +29,18 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) +#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + \ + nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ + unchecked_nonstatic_field(OSThread, _thread_handle, sizeof(HANDLE)) /* NOTE: no type */ + +#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + declare_unsigned_integer_type(OSThread::thread_id_t) #define VM_INT_CONSTANTS_OS(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os_cpu/aix_ppc/vmStructs_aix_ppc.hpp b/src/hotspot/os_cpu/aix_ppc/vmStructs_aix_ppc.hpp index 157d57f8e0fa2..123cd67248f86 100644 --- a/src/hotspot/os_cpu/aix_ppc/vmStructs_aix_ppc.hpp +++ b/src/hotspot/os_cpu/aix_ppc/vmStructs_aix_ppc.hpp @@ -30,21 +30,9 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ - \ - /******************************/ \ - /* Threads (NOTE: incomplete) */ \ - /******************************/ \ - nonstatic_field(OSThread, _thread_id, pthread_t) \ +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ - \ - /**********************/ \ - /* Posix Thread IDs */ \ - /**********************/ \ - \ - declare_unsigned_integer_type(pthread_t) +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) #define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os_cpu/bsd_aarch64/vmStructs_bsd_aarch64.hpp b/src/hotspot/os_cpu/bsd_aarch64/vmStructs_bsd_aarch64.hpp index 07b878106cfcd..c384afac7ecff 100644 --- a/src/hotspot/os_cpu/bsd_aarch64/vmStructs_bsd_aarch64.hpp +++ b/src/hotspot/os_cpu/bsd_aarch64/vmStructs_bsd_aarch64.hpp @@ -31,22 +31,9 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ - \ - /******************************/ \ - /* Threads (NOTE: incomplete) */ \ - /******************************/ \ - nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ - nonstatic_field(OSThread, _unique_thread_id, uint64_t) +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ - \ - /**********************/ \ - /* Thread IDs */ \ - /**********************/ \ - \ - declare_unsigned_integer_type(OSThread::thread_id_t) +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) #define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os_cpu/bsd_x86/vmStructs_bsd_x86.hpp b/src/hotspot/os_cpu/bsd_x86/vmStructs_bsd_x86.hpp index fb43541fa775a..b48ea82712ecd 100644 --- a/src/hotspot/os_cpu/bsd_x86/vmStructs_bsd_x86.hpp +++ b/src/hotspot/os_cpu/bsd_x86/vmStructs_bsd_x86.hpp @@ -29,22 +29,9 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ - \ - /******************************/ \ - /* Threads (NOTE: incomplete) */ \ - /******************************/ \ - nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ - nonstatic_field(OSThread, _unique_thread_id, uint64_t) +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ - \ - /**********************/ \ - /* Thread IDs */ \ - /**********************/ \ - \ - declare_unsigned_integer_type(OSThread::thread_id_t) +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) #define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os_cpu/linux_aarch64/vmStructs_linux_aarch64.hpp b/src/hotspot/os_cpu/linux_aarch64/vmStructs_linux_aarch64.hpp index f2ad002996b5c..3c8e9c4441477 100644 --- a/src/hotspot/os_cpu/linux_aarch64/vmStructs_linux_aarch64.hpp +++ b/src/hotspot/os_cpu/linux_aarch64/vmStructs_linux_aarch64.hpp @@ -30,23 +30,9 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ - \ - /******************************/ \ - /* Threads (NOTE: incomplete) */ \ - /******************************/ \ - nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ - nonstatic_field(OSThread, _pthread_id, pthread_t) +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ - \ - /**********************/ \ - /* Posix Thread IDs */ \ - /**********************/ \ - \ - declare_integer_type(OSThread::thread_id_t) \ - declare_unsigned_integer_type(pthread_t) +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) #define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os_cpu/linux_arm/vmStructs_linux_arm.hpp b/src/hotspot/os_cpu/linux_arm/vmStructs_linux_arm.hpp index 9b4bd0faf0ad9..120726bf55fcd 100644 --- a/src/hotspot/os_cpu/linux_arm/vmStructs_linux_arm.hpp +++ b/src/hotspot/os_cpu/linux_arm/vmStructs_linux_arm.hpp @@ -29,22 +29,9 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ - \ - /******************************/ \ - /* Threads (NOTE: incomplete) */ \ - /******************************/ \ - nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ - nonstatic_field(OSThread, _pthread_id, pthread_t) +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) -#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ - \ - /**********************/ \ - /* Posix Thread IDs */ \ - /**********************/ \ - \ - declare_integer_type(OSThread::thread_id_t) \ - declare_unsigned_integer_type(pthread_t) +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) #define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os_cpu/linux_ppc/vmStructs_linux_ppc.hpp b/src/hotspot/os_cpu/linux_ppc/vmStructs_linux_ppc.hpp index 9464c35977078..ae948c7303101 100644 --- a/src/hotspot/os_cpu/linux_ppc/vmStructs_linux_ppc.hpp +++ b/src/hotspot/os_cpu/linux_ppc/vmStructs_linux_ppc.hpp @@ -30,23 +30,9 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ - \ - /******************************/ \ - /* Threads (NOTE: incomplete) */ \ - /******************************/ \ - nonstatic_field(OSThread, _thread_id, pid_t) \ - nonstatic_field(OSThread, _pthread_id, pthread_t) +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ - \ - /**********************/ \ - /* Posix Thread IDs */ \ - /**********************/ \ - \ - declare_integer_type(pid_t) \ - declare_unsigned_integer_type(pthread_t) +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) #define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp index a7dc84770f84c..368d6c971fae0 100644 --- a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp +++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp @@ -54,6 +54,24 @@ inline void OrderAccess::fence() { } inline void OrderAccess::cross_modify_fence_impl() { + // From 3 “Zifencei” Instruction-Fetch Fence, Version 2.0 + // "RISC-V does not guarantee that stores to instruction memory will be made + // visible to instruction fetches on a RISC-V hart until that hart executes a + // FENCE.I instruction. A FENCE.I instruction ensures that a subsequent + // instruction fetch on a RISC-V hart will see any previous data stores + // already visible to the same RISC-V hart. FENCE.I does not ensure that other + // RISC-V harts’ instruction fetches will observe the local hart’s stores in a + // multiprocessor system." + // + // Hence to be able to use fence.i directly we need a kernel that supports + // PR_RISCV_CTX_SW_FENCEI_ON. Thus if context switch to another hart we are + // ensured that instruction fetch will see any previous data stores + // + // The alternative is using full system IPI (system wide icache sync) then + // this barrier is not strictly needed. As this is emitted in runtime slow-path + // we will just always emit it, typically after a safepoint. + guarantee(VM_Version::supports_fencei_barrier(), "Linux kernel require fence.i"); + __asm__ volatile("fence.i" : : : "memory"); } #endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp index 6cf7683a58602..3946394c19b1f 100644 --- a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp +++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp @@ -30,23 +30,9 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ - \ - /******************************/ \ - /* Threads (NOTE: incomplete) */ \ - /******************************/ \ - nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ - nonstatic_field(OSThread, _pthread_id, pthread_t) +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ - \ - /**********************/ \ - /* Posix Thread IDs */ \ - /**********************/ \ - \ - declare_integer_type(OSThread::thread_id_t) \ - declare_unsigned_integer_type(pthread_t) +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) #define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp index 3f9f26b525ba5..a3a226502f6fc 100644 --- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp +++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #ifndef HWCAP_ISA_I #define HWCAP_ISA_I nth_bit('I' - 'A') @@ -82,6 +83,23 @@ __v; \ }) +// prctl PR_RISCV_SET_ICACHE_FLUSH_CTX is from Linux 6.9 +#ifndef PR_RISCV_SET_ICACHE_FLUSH_CTX +#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71 +#endif +#ifndef PR_RISCV_CTX_SW_FENCEI_ON +#define PR_RISCV_CTX_SW_FENCEI_ON 0 +#endif +#ifndef PR_RISCV_CTX_SW_FENCEI_OFF +#define PR_RISCV_CTX_SW_FENCEI_OFF 1 +#endif +#ifndef PR_RISCV_SCOPE_PER_PROCESS +#define PR_RISCV_SCOPE_PER_PROCESS 0 +#endif +#ifndef PR_RISCV_SCOPE_PER_THREAD +#define PR_RISCV_SCOPE_PER_THREAD 1 +#endif + uint32_t VM_Version::cpu_vector_length() { assert(ext_V.enabled(), "should not call this"); return (uint32_t)read_csr(CSR_VLENB); @@ -102,6 +120,7 @@ void VM_Version::setup_cpu_available_features() { if (!RiscvHwprobe::probe_features()) { os_aux_features(); } + char* uarch = os_uarch_additional_features(); vendor_features(); @@ -155,6 +174,24 @@ void VM_Version::setup_cpu_available_features() { i++; } + // Linux kernel require Zifencei + if (!ext_Zifencei.enabled()) { + log_info(os, cpu)("Zifencei not found, required by Linux, enabling."); + ext_Zifencei.enable_feature(); + } + + if (UseCtxFencei) { + // Note that we can set this up only for effected threads + // via PR_RISCV_SCOPE_PER_THREAD, i.e. on VM attach/deattach. + int ret = prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS); + if (ret == 0) { + log_debug(os, cpu)("UseCtxFencei (PR_RISCV_CTX_SW_FENCEI_ON) enabled."); + } else { + FLAG_SET_ERGO(UseCtxFencei, false); + log_info(os, cpu)("UseCtxFencei (PR_RISCV_CTX_SW_FENCEI_ON) disabled, unsupported by kernel."); + } + } + _features_string = os::strdup(buf); } diff --git a/src/hotspot/os_cpu/linux_s390/vmStructs_linux_s390.hpp b/src/hotspot/os_cpu/linux_s390/vmStructs_linux_s390.hpp index 0442510fa247a..a0fb5eb1a6ab9 100644 --- a/src/hotspot/os_cpu/linux_s390/vmStructs_linux_s390.hpp +++ b/src/hotspot/os_cpu/linux_s390/vmStructs_linux_s390.hpp @@ -30,23 +30,9 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ - \ - /******************************/ \ - /* Threads (NOTE: incomplete) */ \ - /******************************/ \ - nonstatic_field(OSThread, _thread_id, pid_t) \ - nonstatic_field(OSThread, _pthread_id, pthread_t) +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ - \ - /**********************/ \ - /* Posix Thread IDs */ \ - /**********************/ \ - \ - declare_integer_type(pid_t) \ - declare_unsigned_integer_type(pthread_t) +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) #define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os_cpu/linux_x86/vmStructs_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/vmStructs_linux_x86.hpp index 277486549c035..8f6d365723700 100644 --- a/src/hotspot/os_cpu/linux_x86/vmStructs_linux_x86.hpp +++ b/src/hotspot/os_cpu/linux_x86/vmStructs_linux_x86.hpp @@ -29,23 +29,9 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ - \ - /******************************/ \ - /* Threads (NOTE: incomplete) */ \ - /******************************/ \ - nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ - nonstatic_field(OSThread, _pthread_id, pthread_t) +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) - -#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ - \ - /**********************/ \ - /* Posix Thread IDs */ \ - /**********************/ \ - \ - declare_integer_type(OSThread::thread_id_t) \ - declare_unsigned_integer_type(pthread_t) +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) #define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os_cpu/windows_aarch64/vmStructs_windows_aarch64.hpp b/src/hotspot/os_cpu/windows_aarch64/vmStructs_windows_aarch64.hpp index 220787823dc69..18a5588b743b9 100644 --- a/src/hotspot/os_cpu/windows_aarch64/vmStructs_windows_aarch64.hpp +++ b/src/hotspot/os_cpu/windows_aarch64/vmStructs_windows_aarch64.hpp @@ -29,18 +29,9 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ - \ - /******************************/ \ - /* Threads (NOTE: incomplete) */ \ - /******************************/ \ - \ - nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ - unchecked_nonstatic_field(OSThread, _thread_handle, sizeof(HANDLE)) /* NOTE: no type */ +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) -#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ - \ - declare_unsigned_integer_type(OSThread::thread_id_t) +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) #define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/os_cpu/windows_x86/vmStructs_windows_x86.hpp b/src/hotspot/os_cpu/windows_x86/vmStructs_windows_x86.hpp index 9f50a7ed9ae29..985a6a331daba 100644 --- a/src/hotspot/os_cpu/windows_x86/vmStructs_windows_x86.hpp +++ b/src/hotspot/os_cpu/windows_x86/vmStructs_windows_x86.hpp @@ -29,18 +29,9 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ - \ - /******************************/ \ - /* Threads (NOTE: incomplete) */ \ - /******************************/ \ - \ - nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ - unchecked_nonstatic_field(OSThread, _thread_handle, sizeof(HANDLE)) /* NOTE: no type */ +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) -#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ - \ - declare_unsigned_integer_type(OSThread::thread_id_t) +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) #define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/share/adlc/adlArena.cpp b/src/hotspot/share/adlc/adlArena.cpp index d5a1dd500fa66..ebd1f74911d57 100644 --- a/src/hotspot/share/adlc/adlArena.cpp +++ b/src/hotspot/share/adlc/adlArena.cpp @@ -63,8 +63,6 @@ void AdlChunk::chop() { AdlChunk *k = this; while( k ) { AdlChunk *tmp = k->_next; - // clear out this chunk (to detect allocation bugs) - memset(k, 0xBE, k->_len); free(k); // Free chunk (was malloc'd) k = tmp; } diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp index e7df38ff221a9..15bc7ddc67d60 100644 --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -4357,7 +4357,7 @@ bool MatchRule::is_vector() const { "RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector", "LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked", "VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert", - "VectorRearrange","VectorLoadShuffle", "VectorLoadConst", + "VectorRearrange", "VectorLoadShuffle", "VectorLoadConst", "VectorCastB2X", "VectorCastS2X", "VectorCastI2X", "VectorCastL2X", "VectorCastF2X", "VectorCastD2X", "VectorCastF2HF", "VectorCastHF2F", "VectorUCastB2X", "VectorUCastS2X", "VectorUCastI2X", diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp index e1c4e90d0637d..a0944c864e68f 100644 --- a/src/hotspot/share/c1/c1_Compiler.cpp +++ b/src/hotspot/share/c1/c1_Compiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -167,6 +167,9 @@ bool Compiler::is_intrinsic_supported(vmIntrinsics::ID id) { case vmIntrinsics::_dsin: case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: + #if defined(AMD64) + case vmIntrinsics::_dtanh: + #endif case vmIntrinsics::_dlog: case vmIntrinsics::_dlog10: case vmIntrinsics::_dexp: diff --git a/src/hotspot/share/c1/c1_GraphBuilder.cpp b/src/hotspot/share/c1/c1_GraphBuilder.cpp index a2e903edc342f..02be6f8d49e4a 100644 --- a/src/hotspot/share/c1/c1_GraphBuilder.cpp +++ b/src/hotspot/share/c1/c1_GraphBuilder.cpp @@ -3339,6 +3339,7 @@ GraphBuilder::GraphBuilder(Compilation* compilation, IRScope* scope) case vmIntrinsics::_dsin : // fall through case vmIntrinsics::_dcos : // fall through case vmIntrinsics::_dtan : // fall through + case vmIntrinsics::_dtanh : // fall through case vmIntrinsics::_dlog : // fall through case vmIntrinsics::_dlog10 : // fall through case vmIntrinsics::_dexp : // fall through diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 5d73ab5b88dba..c568caeca4b30 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,6 +29,7 @@ #include "c1/c1_ValueType.hpp" #include "oops/method.hpp" #include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" class BlockBegin; class BlockList; @@ -1122,7 +1123,7 @@ class LIR_Op: public CompilationResourceObj { } #endif - virtual const char * name() const PRODUCT_RETURN0; + virtual const char * name() const PRODUCT_RETURN_NULL; virtual void visit(LIR_OpVisitState* state); int id() const { return _id; } @@ -1400,7 +1401,7 @@ class LIR_Op1: public LIR_Op { virtual bool is_patching() { return _patch != lir_patch_none; } virtual void emit_code(LIR_Assembler* masm); virtual LIR_Op1* as_Op1() { return this; } - virtual const char * name() const PRODUCT_RETURN0; + virtual const char * name() const PRODUCT_RETURN_NULL; void set_in_opr(LIR_Opr opr) { _opr = opr; } @@ -2033,8 +2034,9 @@ class LIR_OpProfileCall : public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; bool should_profile_receiver_type() const { bool callee_is_static = _profiled_callee->is_loaded() && _profiled_callee->is_static(); + bool callee_is_private = _profiled_callee->is_loaded() && _profiled_callee->is_private(); Bytecodes::Code bc = _profiled_method->java_code_at_bci(_profiled_bci); - bool call_is_virtual = (bc == Bytecodes::_invokevirtual && !_profiled_callee->can_be_statically_bound()) || bc == Bytecodes::_invokeinterface; + bool call_is_virtual = (bc == Bytecodes::_invokevirtual && !callee_is_private) || bc == Bytecodes::_invokeinterface; return C1ProfileVirtualCalls && call_is_virtual && !callee_is_static; } }; diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 4e63736503fe0..74fdf7a5b76a3 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -2971,6 +2971,7 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) { case vmIntrinsics::_dsqrt: // fall through case vmIntrinsics::_dsqrt_strict: // fall through case vmIntrinsics::_dtan: // fall through + case vmIntrinsics::_dtanh: // fall through case vmIntrinsics::_dsin : // fall through case vmIntrinsics::_dcos : // fall through case vmIntrinsics::_dexp : // fall through diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp index 5b44d5c0f1983..915f00f77c523 100644 --- a/src/hotspot/share/c1/c1_Runtime1.cpp +++ b/src/hotspot/share/c1/c1_Runtime1.cpp @@ -347,6 +347,7 @@ const char* Runtime1::name_for_address(address entry) { FUNCTION_CASE(entry, StubRoutines::dsin()); FUNCTION_CASE(entry, StubRoutines::dcos()); FUNCTION_CASE(entry, StubRoutines::dtan()); + FUNCTION_CASE(entry, StubRoutines::dtanh()); #undef FUNCTION_CASE diff --git a/src/hotspot/share/cds/archiveHeapLoader.cpp b/src/hotspot/share/cds/archiveHeapLoader.cpp index 6325fb6f49d73..0e7ef08064c37 100644 --- a/src/hotspot/share/cds/archiveHeapLoader.cpp +++ b/src/hotspot/share/cds/archiveHeapLoader.cpp @@ -376,13 +376,12 @@ void ArchiveHeapLoader::finish_initialization() { intptr_t bottom = is_loaded() ? _loaded_heap_bottom : _mapped_heap_bottom; // The heap roots are stored in one or more segments that are laid out consecutively. - // The byte size of each segment (except for the last one) is max_size. + // The size of each segment (except for the last one) is max_size_in_{elems,bytes}. HeapRootSegments segments = FileMapInfo::current_info()->heap_root_segments(); - int max_size = segments.max_size_in_bytes(); - HeapShared::init_root_segment_sizes(max_size); + HeapShared::init_root_segment_sizes(segments.max_size_in_elems()); intptr_t first_segment_addr = bottom + segments.base_offset(); for (size_t c = 0; c < segments.count(); c++) { - oop segment_oop = cast_to_oop(first_segment_addr + (c * max_size)); + oop segment_oop = cast_to_oop(first_segment_addr + (c * segments.max_size_in_bytes())); assert(segment_oop->is_objArray(), "Must be"); HeapShared::add_root_segment((objArrayOop)segment_oop); } diff --git a/src/hotspot/share/cds/archiveHeapWriter.cpp b/src/hotspot/share/cds/archiveHeapWriter.cpp index 853d459c691dd..710e693bfdb14 100644 --- a/src/hotspot/share/cds/archiveHeapWriter.cpp +++ b/src/hotspot/share/cds/archiveHeapWriter.cpp @@ -223,6 +223,7 @@ void ArchiveHeapWriter::copy_roots_to_buffer(GrowableArrayCHeapat(root_index++)); @@ -245,6 +245,8 @@ void ArchiveHeapWriter::copy_roots_to_buffer(GrowableArrayCHeaplength(), "Post-condition: All roots are handled"); + _heap_root_segments = segments; } diff --git a/src/hotspot/share/cds/archiveUtils.hpp b/src/hotspot/share/cds/archiveUtils.hpp index 2e361ab0c4650..5a78bc26ee627 100644 --- a/src/hotspot/share/cds/archiveUtils.hpp +++ b/src/hotspot/share/cds/archiveUtils.hpp @@ -277,7 +277,6 @@ class HeapRootSegments { memset(this, 0, sizeof(*this)); } HeapRootSegments(size_t base_offset, int roots_count, int max_size_in_bytes, int max_size_in_elems) { - assert(is_power_of_2(max_size_in_bytes), "must be"); memset(this, 0, sizeof(*this)); _base_offset = base_offset; _count = (roots_count + max_size_in_elems - 1) / max_size_in_elems; diff --git a/src/hotspot/share/cds/cdsConfig.cpp b/src/hotspot/share/cds/cdsConfig.cpp index a0a562eca21a0..5915424c4fe87 100644 --- a/src/hotspot/share/cds/cdsConfig.cpp +++ b/src/hotspot/share/cds/cdsConfig.cpp @@ -236,7 +236,7 @@ void CDSConfig::init_shared_archive_paths() { } void CDSConfig::check_internal_module_property(const char* key, const char* value) { - if (Arguments::is_internal_module_property(key)) { + if (Arguments::is_internal_module_property(key) && !Arguments::is_module_path_property(key)) { stop_using_optimized_module_handling(); log_info(cds)("optimized module handling: disabled due to incompatible property: %s=%s", key, value); } diff --git a/src/hotspot/share/cds/classListParser.cpp b/src/hotspot/share/cds/classListParser.cpp index f8d24295a12e5..694a179d7ee6c 100644 --- a/src/hotspot/share/cds/classListParser.cpp +++ b/src/hotspot/share/cds/classListParser.cpp @@ -508,7 +508,9 @@ InstanceKlass* ClassListParser::load_class_from_source(Symbol* class_name, TRAPS THROW_NULL(vmSymbols::java_lang_ClassNotFoundException()); } - InstanceKlass* k = UnregisteredClasses::load_class(class_name, _source, CHECK_NULL); + ResourceMark rm; + char * source_path = os::strdup_check_oom(ClassLoader::uri_to_path(_source)); + InstanceKlass* k = UnregisteredClasses::load_class(class_name, source_path, CHECK_NULL); if (k->local_interfaces()->length() != _interfaces->length()) { print_specified_interfaces(); print_actual_interfaces(k); diff --git a/src/hotspot/share/cds/classListWriter.cpp b/src/hotspot/share/cds/classListWriter.cpp index 78cd092445b70..1b9f589f1c5e5 100644 --- a/src/hotspot/share/cds/classListWriter.cpp +++ b/src/hotspot/share/cds/classListWriter.cpp @@ -174,6 +174,8 @@ void ClassListWriter::write_to_stream(const InstanceKlass* k, outputStream* stre } } + // NB: the string following "source: " is not really a proper file name, but rather + // a truncated URI referring to a file. It must be decoded after reading. #ifdef _WINDOWS // "file:/C:/dir/foo.jar" -> "C:/dir/foo.jar" stream->print(" source: %s", cfs->source() + 6); diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp index c935541d7cf0a..715fce5f3fc86 100644 --- a/src/hotspot/share/cds/filemap.cpp +++ b/src/hotspot/share/cds/filemap.cpp @@ -581,7 +581,7 @@ int FileMapInfo::get_module_shared_path_index(Symbol* location) { // skip_uri_protocol was also called during dump time -- see ClassLoaderExt::process_module_table() ResourceMark rm; - const char* file = ClassLoader::skip_uri_protocol(location->as_C_string()); + const char* file = ClassLoader::uri_to_path(location->as_C_string()); for (int i = ClassLoaderExt::app_module_paths_start_index(); i < get_number_of_shared_paths(); i++) { SharedClassPathEntry* ent = shared_path(i); if (!ent->is_non_existent()) { @@ -781,12 +781,12 @@ bool FileMapInfo::check_paths(int shared_path_start_idx, int num_paths, Growable assert(strlen(rp_array->at(i)) > (size_t)runtime_prefix_len, "sanity"); const char* runtime_path = rp_array->at(i) + runtime_prefix_len; if (!os::same_files(dumptime_path, runtime_path)) { - return true; + return false; } i++; j++; } - return false; + return true; } bool FileMapInfo::validate_boot_class_paths() { @@ -810,7 +810,7 @@ bool FileMapInfo::validate_boot_class_paths() { char* rp = skip_first_path_entry(runtime_boot_path); assert(shared_path(0)->is_modules_image(), "first shared_path must be the modules image"); int dp_len = header()->app_class_paths_start_index() - 1; // ignore the first path to the module image - bool mismatch = false; + bool match = true; bool relaxed_check = !header()->has_platform_or_app_classes(); if (dp_len == 0 && rp == nullptr) { @@ -823,7 +823,7 @@ bool FileMapInfo::validate_boot_class_paths() { if (check_paths_existence(rp)) { // If a path exists in the runtime boot paths, it is considered a mismatch // since there's no boot path specified during dump time. - mismatch = true; + match = false; } } } else if (dp_len > 0 && rp != nullptr) { @@ -840,16 +840,16 @@ bool FileMapInfo::validate_boot_class_paths() { // check the full runtime boot path, must match with dump time num = rp_len; } - mismatch = check_paths(1, num, rp_array, 0, 0); + match = check_paths(1, num, rp_array, 0, 0); } else { // create_path_array() ignores non-existing paths. Although the dump time and runtime boot classpath lengths // are the same initially, after the call to create_path_array(), the runtime boot classpath length could become // shorter. We consider boot classpath mismatch in this case. - mismatch = true; + match = false; } } - if (mismatch) { + if (!match) { // The paths are different return classpath_failure("[BOOT classpath mismatch, actual =", runtime_boot_path); } @@ -860,7 +860,7 @@ bool FileMapInfo::validate_app_class_paths(int shared_app_paths_len) { const char *appcp = Arguments::get_appclasspath(); assert(appcp != nullptr, "null app classpath"); int rp_len = num_paths(appcp); - bool mismatch = false; + bool match = false; if (rp_len < shared_app_paths_len) { return classpath_failure("Run time APP classpath is shorter than the one at dump time: ", appcp); } @@ -889,8 +889,8 @@ bool FileMapInfo::validate_app_class_paths(int shared_app_paths_len) { // run 2: -cp x.jar:NE4:b.jar -> x.jar:b.jar -> mismatched int j = header()->app_class_paths_start_index(); - mismatch = check_paths(j, shared_app_paths_len, rp_array, 0, 0); - if (mismatch) { + match = check_paths(j, shared_app_paths_len, rp_array, 0, 0); + if (!match) { // To facilitate app deployment, we allow the JAR files to be moved *together* to // a different location, as long as they are still stored under the same directory // structure. E.g., the following is OK. @@ -901,10 +901,10 @@ bool FileMapInfo::validate_app_class_paths(int shared_app_paths_len) { if (dumptime_prefix_len != 0 || runtime_prefix_len != 0) { log_info(class, path)("LCP length for app classpath (dumptime: %u, runtime: %u)", dumptime_prefix_len, runtime_prefix_len); - mismatch = check_paths(j, shared_app_paths_len, rp_array, + match = check_paths(j, shared_app_paths_len, rp_array, dumptime_prefix_len, runtime_prefix_len); } - if (mismatch) { + if (!match) { return classpath_failure("[APP classpath mismatch, actual: -Djava.class.path=", appcp); } } @@ -926,15 +926,35 @@ void FileMapInfo::log_paths(const char* msg, int start_idx, int end_idx) { } } +void FileMapInfo::extract_module_paths(const char* runtime_path, GrowableArray* module_paths) { + GrowableArray* path_array = create_path_array(runtime_path); + int num_paths = path_array->length(); + for (int i = 0; i < num_paths; i++) { + const char* name = path_array->at(i); + ClassLoaderExt::extract_jar_files_from_path(name, module_paths); + } + // module paths are stored in sorted order in the CDS archive. + module_paths->sort(ClassLoaderExt::compare_module_path_by_name); +} + bool FileMapInfo::check_module_paths() { - const char* rp = Arguments::get_property("jdk.module.path"); - int num_paths = CDSConfig::num_archives(rp); - if (num_paths != header()->num_module_paths()) { + const char* runtime_path = Arguments::get_property("jdk.module.path"); + int archived_num_module_paths = header()->num_module_paths(); + if (runtime_path == nullptr && archived_num_module_paths == 0) { + return true; + } + if ((runtime_path == nullptr && archived_num_module_paths > 0) || + (runtime_path != nullptr && archived_num_module_paths == 0)) { return false; } ResourceMark rm; - GrowableArray* rp_array = create_path_array(rp); - return check_paths(header()->app_module_paths_start_index(), num_paths, rp_array, 0, 0); + GrowableArray* module_paths = new GrowableArray(3); + extract_module_paths(runtime_path, module_paths); + int num_paths = module_paths->length(); + if (num_paths != archived_num_module_paths) { + return false; + } + return check_paths(header()->app_module_paths_start_index(), num_paths, module_paths, 0, 0); } bool FileMapInfo::validate_shared_path_table() { @@ -944,6 +964,16 @@ bool FileMapInfo::validate_shared_path_table() { // Load the shared path table info from the archive header _shared_path_table = header()->shared_path_table(); + + bool matched_module_paths = true; + if (CDSConfig::is_dumping_dynamic_archive() || header()->has_full_module_graph()) { + matched_module_paths = check_module_paths(); + } + if (header()->has_full_module_graph() && !matched_module_paths) { + CDSConfig::stop_using_optimized_module_handling(); + log_info(cds)("optimized module handling: disabled because of mismatched module paths"); + } + if (CDSConfig::is_dumping_dynamic_archive()) { // Only support dynamic dumping with the usage of the default CDS archive // or a simple base archive. @@ -959,7 +989,7 @@ bool FileMapInfo::validate_shared_path_table() { "Dynamic archiving is disabled because base layer archive has appended boot classpath"); } if (header()->num_module_paths() > 0) { - if (!check_module_paths()) { + if (!matched_module_paths) { CDSConfig::disable_dumping_dynamic_archive(); log_warning(cds)( "Dynamic archiving is disabled because base layer archive has a different module path"); diff --git a/src/hotspot/share/cds/filemap.hpp b/src/hotspot/share/cds/filemap.hpp index 1bf2510a3351c..6650f52440881 100644 --- a/src/hotspot/share/cds/filemap.hpp +++ b/src/hotspot/share/cds/filemap.hpp @@ -271,6 +271,7 @@ class FileMapHeader: private CDSFileMapHeaderBase { bool compressed_oops() const { return _compressed_oops; } bool compressed_class_pointers() const { return _compressed_class_ptrs; } HeapRootSegments heap_root_segments() const { return _heap_root_segments; } + bool has_full_module_graph() const { return _has_full_module_graph; } size_t heap_oopmap_start_pos() const { return _heap_oopmap_start_pos; } size_t heap_ptrmap_start_pos() const { return _heap_ptrmap_start_pos; } size_t rw_ptrmap_start_pos() const { return _rw_ptrmap_start_pos; } @@ -554,6 +555,7 @@ class FileMapInfo : public CHeapObj { GrowableArray* rp_array, unsigned int dumptime_prefix_len, unsigned int runtime_prefix_len) NOT_CDS_RETURN_(false); + void extract_module_paths(const char* runtime_path, GrowableArray* module_paths); bool validate_boot_class_paths() NOT_CDS_RETURN_(false); bool validate_app_class_paths(int shared_app_paths_len) NOT_CDS_RETURN_(false); bool map_heap_region_impl() NOT_CDS_JAVA_HEAP_RETURN_(false); diff --git a/src/hotspot/share/cds/heapShared.cpp b/src/hotspot/share/cds/heapShared.cpp index 1fddcb0d81f8a..81aa7ac94dc21 100644 --- a/src/hotspot/share/cds/heapShared.cpp +++ b/src/hotspot/share/cds/heapShared.cpp @@ -33,6 +33,7 @@ #include "cds/heapShared.hpp" #include "cds/metaspaceShared.hpp" #include "classfile/classLoaderData.hpp" +#include "classfile/classLoaderExt.hpp" #include "classfile/javaClasses.inline.hpp" #include "classfile/modules.hpp" #include "classfile/stringTable.hpp" @@ -55,6 +56,7 @@ #include "oops/oop.inline.hpp" #include "oops/typeArrayOop.inline.hpp" #include "prims/jvmtiExport.hpp" +#include "runtime/arguments.hpp" #include "runtime/fieldDescriptor.inline.hpp" #include "runtime/init.hpp" #include "runtime/javaCalls.hpp" @@ -134,8 +136,7 @@ static ArchivableStaticFieldInfo fmg_archive_subgraph_entry_fields[] = { KlassSubGraphInfo* HeapShared::_default_subgraph_info; GrowableArrayCHeap* HeapShared::_pending_roots = nullptr; GrowableArrayCHeap* HeapShared::_root_segments; -int HeapShared::_root_segment_max_size_shift; -int HeapShared::_root_segment_max_size_mask; +int HeapShared::_root_segment_max_size_elems; OopHandle HeapShared::_scratch_basic_type_mirrors[T_VOID+1]; MetaspaceObjToOopHandleTable* HeapShared::_scratch_java_mirror_table = nullptr; MetaspaceObjToOopHandleTable* HeapShared::_scratch_references_table = nullptr; @@ -242,15 +243,29 @@ objArrayOop HeapShared::root_segment(int segment_idx) { return segment; } +void HeapShared::get_segment_indexes(int idx, int& seg_idx, int& int_idx) { + assert(_root_segment_max_size_elems > 0, "sanity"); + + // Try to avoid divisions for the common case. + if (idx < _root_segment_max_size_elems) { + seg_idx = 0; + int_idx = idx; + } else { + seg_idx = idx / _root_segment_max_size_elems; + int_idx = idx % _root_segment_max_size_elems; + } + + assert(idx == seg_idx * _root_segment_max_size_elems + int_idx, + "sanity: %d index maps to %d segment and %d internal", idx, seg_idx, int_idx); +} + // Returns an objArray that contains all the roots of the archived objects oop HeapShared::get_root(int index, bool clear) { - assert(_root_segment_max_size_shift > 0, "sanity"); - assert(_root_segment_max_size_mask > 0, "sanity"); assert(index >= 0, "sanity"); assert(!CDSConfig::is_dumping_heap() && CDSConfig::is_using_archive(), "runtime only"); assert(!_root_segments->is_empty(), "must have loaded shared heap"); - int seg_idx = index >> _root_segment_max_size_shift; - int int_idx = index & _root_segment_max_size_mask; + int seg_idx, int_idx; + get_segment_indexes(index, seg_idx, int_idx); oop result = root_segment(seg_idx)->obj_at(int_idx); if (clear) { clear_root(index); @@ -262,10 +277,8 @@ void HeapShared::clear_root(int index) { assert(index >= 0, "sanity"); assert(CDSConfig::is_using_archive(), "must be"); if (ArchiveHeapLoader::is_in_use()) { - assert(_root_segment_max_size_shift > 0, "sanity"); - assert(_root_segment_max_size_mask > 0, "sanity"); - int seg_idx = index >> _root_segment_max_size_shift; - int int_idx = index & _root_segment_max_size_mask; + int seg_idx, int_idx; + get_segment_indexes(index, seg_idx, int_idx); if (log_is_enabled(Debug, cds, heap)) { oop old = root_segment(seg_idx)->obj_at(int_idx); log_debug(cds, heap)("Clearing root %d: was " PTR_FORMAT, index, p2i(old)); @@ -785,10 +798,8 @@ void HeapShared::add_root_segment(objArrayOop segment_oop) { _root_segments->push(OopHandle(Universe::vm_global(), segment_oop)); } -void HeapShared::init_root_segment_sizes(int max_size) { - assert(is_power_of_2(max_size), "must be"); - _root_segment_max_size_shift = log2i_exact(max_size); - _root_segment_max_size_mask = max_size - 1; +void HeapShared::init_root_segment_sizes(int max_size_elems) { + _root_segment_max_size_elems = max_size_elems; } void HeapShared::serialize_tables(SerializeClosure* soc) { @@ -875,6 +886,17 @@ void HeapShared::initialize_from_archived_subgraph(JavaThread* current, Klass* k return; // nothing to do } + if (k->name()->equals("jdk/internal/module/ArchivedModuleGraph") && + !CDSConfig::is_using_optimized_module_handling() && + // archive was created with --module-path + ClassLoaderExt::num_module_paths() > 0) { + // ArchivedModuleGraph was created with a --module-path that's different than the runtime --module-path. + // Thus, it might contain references to modules that do not exist at runtime. We cannot use it. + log_info(cds, heap)("Skip initializing ArchivedModuleGraph subgraph: is_using_optimized_module_handling=%s num_module_paths=%d", + BOOL_TO_STR(CDSConfig::is_using_optimized_module_handling()), ClassLoaderExt::num_module_paths()); + return; + } + ExceptionMark em(THREAD); const ArchivedKlassSubGraphInfoRecord* record = resolve_or_init_classes_for_subgraph_of(k, /*do_init=*/true, THREAD); @@ -1123,6 +1145,13 @@ bool HeapShared::archive_reachable_objects_from(int level, // these objects that are referenced (directly or indirectly) by static fields. ResourceMark rm; log_error(cds, heap)("Cannot archive object of class %s", orig_obj->klass()->external_name()); + if (log_is_enabled(Trace, cds, heap)) { + WalkOopAndArchiveClosure* walker = WalkOopAndArchiveClosure::current(); + if (walker != nullptr) { + LogStream ls(Log(cds, heap)::trace()); + CDSHeapVerifier::trace_to_root(&ls, walker->referencing_obj()); + } + } MetaspaceShared::unrecoverable_writing_error(); } diff --git a/src/hotspot/share/cds/heapShared.hpp b/src/hotspot/share/cds/heapShared.hpp index 01610ebe64e15..01d664945ee74 100644 --- a/src/hotspot/share/cds/heapShared.hpp +++ b/src/hotspot/share/cds/heapShared.hpp @@ -291,8 +291,7 @@ class HeapShared: AllStatic { static GrowableArrayCHeap* _pending_roots; static GrowableArrayCHeap* _root_segments; - static int _root_segment_max_size_shift; - static int _root_segment_max_size_mask; + static int _root_segment_max_size_elems; static OopHandle _scratch_basic_type_mirrors[T_VOID+1]; static MetaspaceObjToOopHandleTable* _scratch_java_mirror_table; static MetaspaceObjToOopHandleTable* _scratch_references_table; @@ -407,6 +406,8 @@ class HeapShared: AllStatic { // Run-time only static void clear_root(int index); + static void get_segment_indexes(int index, int& segment_index, int& internal_index); + static void setup_test_class(const char* test_class_name) PRODUCT_RETURN; #endif // INCLUDE_CDS_JAVA_HEAP @@ -425,7 +426,7 @@ class HeapShared: AllStatic { static void init_for_dumping(TRAPS) NOT_CDS_JAVA_HEAP_RETURN; static void write_subgraph_info_table() NOT_CDS_JAVA_HEAP_RETURN; static void add_root_segment(objArrayOop segment_oop) NOT_CDS_JAVA_HEAP_RETURN; - static void init_root_segment_sizes(int max_size) NOT_CDS_JAVA_HEAP_RETURN; + static void init_root_segment_sizes(int max_size_elems) NOT_CDS_JAVA_HEAP_RETURN; static void serialize_tables(SerializeClosure* soc) NOT_CDS_JAVA_HEAP_RETURN; #ifndef PRODUCT diff --git a/src/hotspot/share/cds/metaspaceShared.cpp b/src/hotspot/share/cds/metaspaceShared.cpp index c66398cefac5a..6f646e162ecac 100644 --- a/src/hotspot/share/cds/metaspaceShared.cpp +++ b/src/hotspot/share/cds/metaspaceShared.cpp @@ -77,6 +77,7 @@ #include "runtime/globals.hpp" #include "runtime/globals_extension.hpp" #include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" #include "runtime/os.inline.hpp" #include "runtime/safepointVerifiers.hpp" #include "runtime/sharedRuntime.hpp" @@ -300,6 +301,7 @@ void MetaspaceShared::post_initialize(TRAPS) { } ClassLoaderExt::init_paths_start_index(info->app_class_paths_start_index()); ClassLoaderExt::init_app_module_paths_start_index(info->app_module_paths_start_index()); + ClassLoaderExt::init_num_module_paths(info->header()->num_module_paths()); } } } @@ -791,9 +793,22 @@ void MetaspaceShared::preload_and_dump_impl(StaticArchiveBuilder& builder, TRAPS // Do this at the very end, when no Java code will be executed. Otherwise // some new strings may be added to the intern table. StringTable::allocate_shared_strings_array(CHECK); + } else { + log_info(cds)("Not dumping heap, reset CDSConfig::_is_using_optimized_module_handling"); + CDSConfig::stop_using_optimized_module_handling(); } #endif + // Dummy call to load classes used at CDS runtime + JavaValue result(T_OBJECT); + Handle path_string = java_lang_String::create_from_str("dummy.jar", CHECK); + JavaCalls::call_static(&result, + vmClasses::jdk_internal_loader_ClassLoaders_klass(), + vmSymbols::toFileURL_name(), + vmSymbols::toFileURL_signature(), + path_string, + CHECK); + VM_PopulateDumpSharedSpace op(builder); VMThread::execute(&op); diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp index 9caf89628ccfd..155ce032400e8 100644 --- a/src/hotspot/share/ci/ciEnv.cpp +++ b/src/hotspot/share/ci/ciEnv.cpp @@ -1616,7 +1616,10 @@ void ciEnv::dump_replay_data_helper(outputStream* out) { for (int i = 0; i < objects->length(); i++) { objects->at(i)->dump_replay_data(out); } - dump_compile_data(out); + + if (this->task() != nullptr) { + dump_compile_data(out); + } out->flush(); } diff --git a/src/hotspot/share/classfile/classLoader.cpp b/src/hotspot/share/classfile/classLoader.cpp index 5309fcd20a8be..9a68e2640443f 100644 --- a/src/hotspot/share/classfile/classLoader.cpp +++ b/src/hotspot/share/classfile/classLoader.cpp @@ -81,6 +81,9 @@ #include "utilities/ostream.hpp" #include "utilities/utf8.hpp" +#include +#include + // Entry point in java.dll for path canonicalization typedef int (*canonicalize_fn_t)(const char *orig, char *out, int len); @@ -579,6 +582,8 @@ void ClassLoader::setup_module_search_path(JavaThread* current, const char* path new_entry = create_class_path_entry(current, path, &st, false /*is_boot_append */, false /* from_class_path_attr */); if (new_entry != nullptr) { + // ClassLoaderExt::process_module_table() filters out non-jar entries before calling this function. + assert(new_entry->is_jar_file(), "module path entry %s is not a jar file", new_entry->name()); add_to_module_path_entries(path, new_entry); } } @@ -1209,7 +1214,7 @@ InstanceKlass* ClassLoader::load_class(Symbol* name, PackageEntry* pkg_entry, bo } #if INCLUDE_CDS -char* ClassLoader::skip_uri_protocol(char* source) { +static const char* skip_uri_protocol(const char* source) { if (strncmp(source, "file:", 5) == 0) { // file: protocol path could start with file:/ or file:/// // locate the char after all the forward slashes @@ -1228,6 +1233,47 @@ char* ClassLoader::skip_uri_protocol(char* source) { return source; } +static char decode_percent_encoded(const char *str, size_t& index) { + if (str[index] == '%' + && isxdigit(str[index + 1]) + && isxdigit(str[index + 2])) { + char hex[3]; + hex[0] = str[index + 1]; + hex[1] = str[index + 2]; + hex[2] = '\0'; + index += 2; + return (char) strtol(hex, NULL, 16); + } + return str[index]; +} + +char* ClassLoader::uri_to_path(const char* uri) { + const size_t len = strlen(uri) + 1; + char* path = NEW_RESOURCE_ARRAY(char, len); + + uri = skip_uri_protocol(uri); + + if (strncmp(uri, "//", 2) == 0) { + // Skip the empty "authority" part + uri += 2; + } + +#ifdef _WINDOWS + if (uri[0] == '/') { + // Absolute path name on Windows does not begin with a slash + uri += 1; + } +#endif + + size_t path_index = 0; + for (size_t i = 0; i < strlen(uri); ++i) { + char decoded = decode_percent_encoded(uri, i); + path[path_index++] = decoded; + } + path[path_index] = '\0'; + return path; +} + // Record the shared classpath index and loader type for classes loaded // by the builtin loaders at dump time. void ClassLoader::record_result(JavaThread* current, InstanceKlass* ik, @@ -1261,7 +1307,7 @@ void ClassLoader::record_result(JavaThread* current, InstanceKlass* ik, // Save the path from the file: protocol or the module name from the jrt: protocol // if no protocol prefix is found, path is the same as stream->source(). This path // must be valid since the class has been successfully parsed. - char* path = skip_uri_protocol(src); + const char* path = ClassLoader::uri_to_path(src); assert(path != nullptr, "sanity"); for (int i = 0; i < FileMapInfo::get_number_of_shared_paths(); i++) { SharedClassPathEntry* ent = FileMapInfo::shared_path(i); diff --git a/src/hotspot/share/classfile/classLoader.hpp b/src/hotspot/share/classfile/classLoader.hpp index af625082ddabf..e44059b724769 100644 --- a/src/hotspot/share/classfile/classLoader.hpp +++ b/src/hotspot/share/classfile/classLoader.hpp @@ -382,7 +382,7 @@ class ClassLoader: AllStatic { // entries during shared classpath setup time. static int num_module_path_entries(); static void exit_with_path_failure(const char* error, const char* message); - static char* skip_uri_protocol(char* source); + static char* uri_to_path(const char* uri); static void record_result(JavaThread* current, InstanceKlass* ik, const ClassFileStream* stream, bool redefined); #endif diff --git a/src/hotspot/share/classfile/classLoaderExt.cpp b/src/hotspot/share/classfile/classLoaderExt.cpp index 3cd7dd7cd3ba6..16981669deb3a 100644 --- a/src/hotspot/share/classfile/classLoaderExt.cpp +++ b/src/hotspot/share/classfile/classLoaderExt.cpp @@ -55,6 +55,7 @@ jshort ClassLoaderExt::_app_class_paths_start_index = ClassLoaderExt::max_classpath_index; jshort ClassLoaderExt::_app_module_paths_start_index = ClassLoaderExt::max_classpath_index; jshort ClassLoaderExt::_max_used_path_index = 0; +int ClassLoaderExt::_num_module_paths = 0; bool ClassLoaderExt::_has_app_classes = false; bool ClassLoaderExt::_has_platform_classes = false; bool ClassLoaderExt::_has_non_jar_in_classpath = false; @@ -89,23 +90,25 @@ void ClassLoaderExt::setup_app_search_path(JavaThread* current) { os::free(app_class_path); } +int ClassLoaderExt::compare_module_path_by_name(const char** p1, const char** p2) { + return strcmp(*p1, *p2); +} + void ClassLoaderExt::process_module_table(JavaThread* current, ModuleEntryTable* met) { ResourceMark rm(current); - GrowableArray* module_paths = new GrowableArray(5); + GrowableArray* module_paths = new GrowableArray(5); class ModulePathsGatherer : public ModuleClosure { JavaThread* _current; - GrowableArray* _module_paths; + GrowableArray* _module_paths; public: - ModulePathsGatherer(JavaThread* current, GrowableArray* module_paths) : + ModulePathsGatherer(JavaThread* current, GrowableArray* module_paths) : _current(current), _module_paths(module_paths) {} void do_module(ModuleEntry* m) { - char* path = m->location()->as_C_string(); - if (strncmp(path, "file:", 5) == 0) { - path = ClassLoader::skip_uri_protocol(path); - char* path_copy = NEW_RESOURCE_ARRAY(char, strlen(path) + 1); - strcpy(path_copy, path); - _module_paths->append(path_copy); + char* uri = m->location()->as_C_string(); + if (strncmp(uri, "file:", 5) == 0) { + char* path = ClassLoader::uri_to_path(uri); + extract_jar_files_from_path(path, _module_paths); } } }; @@ -116,6 +119,10 @@ void ClassLoaderExt::process_module_table(JavaThread* current, ModuleEntryTable* met->modules_do(&gatherer); } + // Sort the module paths before storing into CDS archive for simpler + // checking at runtime. + module_paths->sort(compare_module_path_by_name); + for (int i = 0; i < module_paths->length(); i++) { ClassLoader::setup_module_search_path(current, module_paths->at(i)); } @@ -131,6 +138,38 @@ void ClassLoaderExt::setup_module_paths(JavaThread* current) { process_module_table(current, met); } +bool ClassLoaderExt::has_jar_suffix(const char* filename) { + // In jdk.internal.module.ModulePath.readModule(), it checks for the ".jar" suffix. + // Performing the same check here. + const char* dot = strrchr(filename, '.'); + if (dot != nullptr && strcmp(dot + 1, "jar") == 0) { + return true; + } + return false; +} + +void ClassLoaderExt::extract_jar_files_from_path(const char* path, GrowableArray* module_paths) { + DIR* dirp = os::opendir(path); + if (dirp == nullptr && errno == ENOTDIR && has_jar_suffix(path)) { + module_paths->append(path); + } else { + if (dirp != nullptr) { + struct dirent* dentry; + while ((dentry = os::readdir(dirp)) != nullptr) { + const char* file_name = dentry->d_name; + if (has_jar_suffix(file_name)) { + size_t full_name_len = strlen(path) + strlen(file_name) + strlen(os::file_separator()) + 1; + char* full_name = NEW_RESOURCE_ARRAY(char, full_name_len); + int n = os::snprintf(full_name, full_name_len, "%s%s%s", path, os::file_separator(), file_name); + assert((size_t)n == full_name_len - 1, "Unexpected number of characters in string"); + module_paths->append(full_name); + } + } + os::closedir(dirp); + } + } +} + char* ClassLoaderExt::read_manifest(JavaThread* current, ClassPathEntry* entry, jint *manifest_size, bool clean_text) { const char* name = "META-INF/MANIFEST.MF"; diff --git a/src/hotspot/share/classfile/classLoaderExt.hpp b/src/hotspot/share/classfile/classLoaderExt.hpp index b76ce3ff33a32..c3c0b00d55e43 100644 --- a/src/hotspot/share/classfile/classLoaderExt.hpp +++ b/src/hotspot/share/classfile/classLoaderExt.hpp @@ -53,12 +53,15 @@ class ClassLoaderExt: public ClassLoader { // AllStatic static jshort _app_module_paths_start_index; // the largest path index being used during CDS dump time static jshort _max_used_path_index; + // number of module paths + static int _num_module_paths; static bool _has_app_classes; static bool _has_platform_classes; static bool _has_non_jar_in_classpath; static char* read_manifest(JavaThread* current, ClassPathEntry* entry, jint *manifest_size, bool clean_text); + static bool has_jar_suffix(const char* filename); public: static void process_jar_manifest(JavaThread* current, ClassPathEntry* entry); @@ -68,6 +71,8 @@ class ClassLoaderExt: public ClassLoader { // AllStatic static void setup_search_paths(JavaThread* current); static void setup_module_paths(JavaThread* current); + static void extract_jar_files_from_path(const char* path, GrowableArray* module_paths); + static int compare_module_path_by_name(const char** p1, const char** p2); static char* read_manifest(JavaThread* current, ClassPathEntry* entry, jint *manifest_size) { // Remove all the new-line continuations (which wrap long lines at 72 characters, see @@ -87,6 +92,8 @@ class ClassLoaderExt: public ClassLoader { // AllStatic static jshort max_used_path_index() { return _max_used_path_index; } + static int num_module_paths() { return _num_module_paths; } + static void set_max_used_path_index(jshort used_index) { _max_used_path_index = used_index; } @@ -99,6 +106,10 @@ class ClassLoaderExt: public ClassLoader { // AllStatic _app_module_paths_start_index = module_start; } + static void init_num_module_paths(int num_module_paths) { + _num_module_paths = num_module_paths; + } + static bool is_boot_classpath(int classpath_index) { return classpath_index < _app_class_paths_start_index; } diff --git a/src/hotspot/share/classfile/javaClasses.cpp b/src/hotspot/share/classfile/javaClasses.cpp index b6ef682ae0965..0ad36cd21dbf3 100644 --- a/src/hotspot/share/classfile/javaClasses.cpp +++ b/src/hotspot/share/classfile/javaClasses.cpp @@ -3052,9 +3052,10 @@ void java_lang_ClassFrameInfo::serialize_offsets(SerializeClosure* f) { static int get_flags(const methodHandle& m) { int flags = (jushort)( m->access_flags().as_short() & JVM_RECOGNIZED_METHOD_MODIFIERS ); - if (m->is_initializer()) { + if (m->is_object_initializer()) { flags |= java_lang_invoke_MemberName::MN_IS_CONSTRUCTOR; } else { + // Note: Static initializers can be here. Record them as plain methods. flags |= java_lang_invoke_MemberName::MN_IS_METHOD; } if (m->caller_sensitive()) { diff --git a/src/hotspot/share/classfile/systemDictionary.cpp b/src/hotspot/share/classfile/systemDictionary.cpp index bcddddc0c7c78..7b307a0b8a37c 100644 --- a/src/hotspot/share/classfile/systemDictionary.cpp +++ b/src/hotspot/share/classfile/systemDictionary.cpp @@ -1069,7 +1069,7 @@ bool SystemDictionary::check_shared_class_super_type(InstanceKlass* klass, Insta } Klass *found = resolve_with_circularity_detection(klass->name(), super_type->name(), - class_loader, protection_domain, is_superclass, CHECK_0); + class_loader, protection_domain, is_superclass, CHECK_false); if (found == super_type) { return true; } else { @@ -1088,16 +1088,21 @@ bool SystemDictionary::check_shared_class_super_types(InstanceKlass* ik, Handle // If unexpected superclass or interfaces are found, we cannot // load from the shared archive. - if (ik->super() != nullptr && - !check_shared_class_super_type(ik, InstanceKlass::cast(ik->super()), - class_loader, protection_domain, true, THREAD)) { - return false; + if (ik->super() != nullptr) { + bool check_super = check_shared_class_super_type(ik, InstanceKlass::cast(ik->super()), + class_loader, protection_domain, true, + CHECK_false); + if (!check_super) { + return false; + } } Array* interfaces = ik->local_interfaces(); int num_interfaces = interfaces->length(); for (int index = 0; index < num_interfaces; index++) { - if (!check_shared_class_super_type(ik, interfaces->at(index), class_loader, protection_domain, false, THREAD)) { + bool check_interface = check_shared_class_super_type(ik, interfaces->at(index), class_loader, protection_domain, false, + CHECK_false); + if (!check_interface) { return false; } } @@ -1153,7 +1158,8 @@ InstanceKlass* SystemDictionary::load_shared_class(InstanceKlass* ik, return nullptr; } - if (!check_shared_class_super_types(ik, class_loader, protection_domain, THREAD)) { + bool check = check_shared_class_super_types(ik, class_loader, protection_domain, CHECK_NULL); + if (!check) { ik->set_shared_loading_failed(); return nullptr; } diff --git a/src/hotspot/share/classfile/systemDictionary.hpp b/src/hotspot/share/classfile/systemDictionary.hpp index ee50aa38dd0cf..04980291716c7 100644 --- a/src/hotspot/share/classfile/systemDictionary.hpp +++ b/src/hotspot/share/classfile/systemDictionary.hpp @@ -293,13 +293,6 @@ class SystemDictionary : AllStatic { const char* message); static const char* find_nest_host_error(const constantPoolHandle& pool, int which); -protected: - static InstanceKlass* _well_known_klasses[]; - -private: - // table of box klasses (int_klass, etc.) - static InstanceKlass* _box_klasses[T_VOID+1]; - static OopHandle _java_system_loader; static OopHandle _java_platform_loader; diff --git a/src/hotspot/share/classfile/vmClasses.cpp b/src/hotspot/share/classfile/vmClasses.cpp index 0b9b437c67b78..b62d699dfe20e 100644 --- a/src/hotspot/share/classfile/vmClasses.cpp +++ b/src/hotspot/share/classfile/vmClasses.cpp @@ -45,14 +45,6 @@ InstanceKlass* vmClasses::_klasses[static_cast(vmClassID::LIMIT)] = { nullptr /*, nullptr...*/ }; InstanceKlass* vmClasses::_box_klasses[T_VOID+1] = { nullptr /*, nullptr...*/ }; - -// CDS: scan and relocate all classes referenced by _klasses[]. -void vmClasses::metaspace_pointers_do(MetaspaceClosure* it) { - for (auto id : EnumRange{}) { - it->push(klass_addr_at(id)); - } -} - bool vmClasses::is_loaded(InstanceKlass* klass) { return klass != nullptr && klass->is_loaded(); } @@ -205,8 +197,6 @@ void vmClasses::resolve_all(TRAPS) { _box_klasses[T_SHORT] = vmClasses::Short_klass(); _box_klasses[T_INT] = vmClasses::Integer_klass(); _box_klasses[T_LONG] = vmClasses::Long_klass(); - //_box_klasses[T_OBJECT] = vmClasses::object_klass(); - //_box_klasses[T_ARRAY] = vmClasses::object_klass(); #ifdef ASSERT if (CDSConfig::is_using_archive()) { diff --git a/src/hotspot/share/classfile/vmClasses.hpp b/src/hotspot/share/classfile/vmClasses.hpp index f2b8c5666eeb1..4fa078c50cd80 100644 --- a/src/hotspot/share/classfile/vmClasses.hpp +++ b/src/hotspot/share/classfile/vmClasses.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,7 +32,6 @@ class ClassLoaderData; class InstanceKlass; -class MetaspaceClosure; class vmClasses : AllStatic { friend class VMStructs; @@ -95,7 +94,6 @@ class vmClasses : AllStatic { return &_klasses[as_int(id)]; } - static void metaspace_pointers_do(MetaspaceClosure* it); static void resolve_all(TRAPS); static BasicType box_klass_type(Klass* k); // inverse of box_klass diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp index b470eb9b8380d..5e352e42efbc1 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.cpp +++ b/src/hotspot/share/classfile/vmIntrinsics.cpp @@ -90,6 +90,7 @@ bool vmIntrinsics::preserves_state(vmIntrinsics::ID id) { case vmIntrinsics::_dsin: case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: + case vmIntrinsics::_dtanh: case vmIntrinsics::_dlog: case vmIntrinsics::_dlog10: case vmIntrinsics::_dexp: @@ -141,6 +142,7 @@ bool vmIntrinsics::can_trap(vmIntrinsics::ID id) { case vmIntrinsics::_dsin: case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: + case vmIntrinsics::_dtanh: case vmIntrinsics::_dlog: case vmIntrinsics::_dlog10: case vmIntrinsics::_dexp: @@ -288,6 +290,7 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) { case vmIntrinsics::_dsin: case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: + case vmIntrinsics::_dtanh: case vmIntrinsics::_dlog: case vmIntrinsics::_dexp: case vmIntrinsics::_dpow: diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp index 4b772c171d5a6..b6ce21797a618 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.hpp +++ b/src/hotspot/share/classfile/vmIntrinsics.hpp @@ -135,7 +135,7 @@ class methodHandle; do_name(log_name,"log") do_name(log10_name,"log10") do_name(pow_name,"pow") \ do_name(exp_name,"exp") do_name(min_name,"min") do_name(max_name,"max") \ do_name(floor_name, "floor") do_name(ceil_name, "ceil") do_name(rint_name, "rint") \ - do_name(round_name, "round") \ + do_name(round_name, "round") do_name(tanh_name,"tanh") \ \ do_name(addExact_name,"addExact") \ do_name(decrementExact_name,"decrementExact") \ @@ -161,6 +161,7 @@ class methodHandle; do_intrinsic(_dcos, java_lang_Math, cos_name, double_double_signature, F_S) \ do_intrinsic(_dtan, java_lang_Math, tan_name, double_double_signature, F_S) \ do_intrinsic(_datan2, java_lang_Math, atan2_name, double2_double_signature, F_S) \ + do_intrinsic(_dtanh, java_lang_Math, tanh_name, double_double_signature, F_S) \ do_intrinsic(_dsqrt, java_lang_Math, sqrt_name, double_double_signature, F_S) \ do_intrinsic(_dlog, java_lang_Math, log_name, double_double_signature, F_S) \ do_intrinsic(_dlog10, java_lang_Math, log10_name, double_double_signature, F_S) \ @@ -1007,6 +1008,15 @@ class methodHandle; "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ do_name(vector_shuffle_to_vector_name, "shuffleToVector") \ \ + do_intrinsic(_VectorWrapShuffleIndexes, jdk_internal_vm_vector_VectorSupport, vector_wrap_shuffle_indexes_name, \ + vector_wrap_shuffle_indexes_sig, F_S) \ + do_signature(vector_wrap_shuffle_indexes_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \ + "ILjdk/internal/vm/vector/VectorSupport$WrapShuffleIndexesOperation;)" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \ + do_name(vector_wrap_shuffle_indexes_name, "wrapShuffleIndexes") \ + \ do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \ do_signature(vector_load_op_sig, "(Ljava/lang/Class;" \ "Ljava/lang/Class;" \ @@ -1128,6 +1138,18 @@ class methodHandle; "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ do_name(vector_rearrange_name, "rearrangeOp") \ \ + do_intrinsic(_VectorSelectFrom, jdk_internal_vm_vector_VectorSupport, vector_select_from_name, vector_select_from_sig, F_S) \ + do_signature(vector_select_from_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorSelectFromOp;)" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ + do_name(vector_select_from_name, "selectFromOp") \ + \ do_intrinsic(_VectorExtract, jdk_internal_vm_vector_VectorSupport, vector_extract_name, vector_extract_sig, F_S) \ do_signature(vector_extract_sig, "(Ljava/lang/Class;" \ "Ljava/lang/Class;" \ diff --git a/src/hotspot/share/code/codeBlob.cpp b/src/hotspot/share/code/codeBlob.cpp index 81c4d001078cb..23f621ffec832 100644 --- a/src/hotspot/share/code/codeBlob.cpp +++ b/src/hotspot/share/code/codeBlob.cpp @@ -41,7 +41,7 @@ #include "runtime/handles.inline.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/javaFrameAnchor.hpp" -#include "runtime/jniHandles.hpp" +#include "runtime/jniHandles.inline.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/safepoint.hpp" #include "runtime/sharedRuntime.hpp" @@ -623,7 +623,7 @@ UpcallStub* UpcallStub::create(const char* name, CodeBuffer* cb, jobject receive // Track memory usage statistic after releasing CodeCache_lock MemoryService::track_code_cache_memory_usage(); - trace_new_stub(blob, "UpcallStub"); + trace_new_stub(blob, "UpcallStub - ", name); return blob; } @@ -772,6 +772,10 @@ void UpcallStub::verify() { void UpcallStub::print_on(outputStream* st) const { RuntimeBlob::print_on(st); print_value_on(st); + st->print_cr("Frame data offset: %d", (int) _frame_data_offset); + oop recv = JNIHandles::resolve(_receiver); + st->print("Receiver MH="); + recv->print_on(st); Disassembler::decode((RuntimeBlob*)this, st); } diff --git a/src/hotspot/share/code/compiledIC.cpp b/src/hotspot/share/code/compiledIC.cpp index f142e306a6b02..684aee509ee53 100644 --- a/src/hotspot/share/code/compiledIC.cpp +++ b/src/hotspot/share/code/compiledIC.cpp @@ -293,7 +293,7 @@ bool CompiledIC::is_monomorphic() const { } bool CompiledIC::is_megamorphic() const { - return VtableStubs::entry_point(destination()) != nullptr;; + return VtableStubs::entry_point(destination()) != nullptr; } bool CompiledIC::is_speculated_klass(Klass* receiver_klass) { diff --git a/src/hotspot/share/code/dependencyContext.cpp b/src/hotspot/share/code/dependencyContext.cpp index d7ce8e92acf37..0e6b99d172dcb 100644 --- a/src/hotspot/share/code/dependencyContext.cpp +++ b/src/hotspot/share/code/dependencyContext.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -227,6 +227,10 @@ void DependencyContext::remove_and_mark_for_deoptimization_all_dependents(Deopti } #ifndef PRODUCT +bool DependencyContext::is_empty() { + return dependencies() == nullptr; +} + void DependencyContext::print_dependent_nmethods(bool verbose) { int idx = 0; for (nmethodBucket* b = dependencies_not_unloading(); b != nullptr; b = b->next_not_unloading()) { diff --git a/src/hotspot/share/code/dependencyContext.hpp b/src/hotspot/share/code/dependencyContext.hpp index e8d2ac41d0d1d..13b845cb59dde 100644 --- a/src/hotspot/share/code/dependencyContext.hpp +++ b/src/hotspot/share/code/dependencyContext.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -124,6 +124,7 @@ class DependencyContext : public StackObj { #ifndef PRODUCT void print_dependent_nmethods(bool verbose); + bool is_empty(); #endif //PRODUCT bool is_dependent_nmethod(nmethod* nm); }; diff --git a/src/hotspot/share/compiler/compilerDefinitions.cpp b/src/hotspot/share/compiler/compilerDefinitions.cpp index ee0c73254f180..7b091d8ade50c 100644 --- a/src/hotspot/share/compiler/compilerDefinitions.cpp +++ b/src/hotspot/share/compiler/compilerDefinitions.cpp @@ -497,11 +497,6 @@ bool CompilerConfig::check_args_consistency(bool status) { "Invalid NonNMethodCodeHeapSize=%dK. Must be at least %uK.\n", NonNMethodCodeHeapSize/K, min_code_cache_size/K); status = false; - } else if (InlineCacheBufferSize > NonNMethodCodeHeapSize / 2) { - jio_fprintf(defaultStream::error_stream(), - "Invalid InlineCacheBufferSize=" SIZE_FORMAT "K. Must be less than or equal to " SIZE_FORMAT "K.\n", - InlineCacheBufferSize/K, NonNMethodCodeHeapSize/2/K); - status = false; } #ifdef _LP64 diff --git a/src/hotspot/share/compiler/oopMap.inline.hpp b/src/hotspot/share/compiler/oopMap.inline.hpp index f2a3b3ba834df..05ef53f823142 100644 --- a/src/hotspot/share/compiler/oopMap.inline.hpp +++ b/src/hotspot/share/compiler/oopMap.inline.hpp @@ -66,12 +66,10 @@ void OopMapDo::iterate_oops_do(const frame continue; #ifndef COMPILER2 - COMPILER1_PRESENT(ShouldNotReachHere();) #if INCLUDE_JVMCI - if (UseJVMCICompiler) { - ShouldNotReachHere(); - } + if (!EnableJVMCI) #endif + ShouldNotReachHere(); #endif // !COMPILER2 address loc = fr->oopmapreg_to_location(omv.reg(), reg_map); diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp index 13b993546cde4..4ec7e10cd9a86 100644 --- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp +++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp @@ -24,49 +24,32 @@ #include "precompiled.hpp" #include "classfile/javaClasses.hpp" +#include "code/vmreg.inline.hpp" #include "gc/g1/c2/g1BarrierSetC2.hpp" #include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1BarrierSetAssembler.hpp" #include "gc/g1/g1BarrierSetRuntime.hpp" #include "gc/g1/g1CardTable.hpp" #include "gc/g1/g1ThreadLocalData.hpp" #include "gc/g1/g1HeapRegion.hpp" #include "opto/arraycopynode.hpp" +#include "opto/block.hpp" #include "opto/compile.hpp" #include "opto/escape.hpp" #include "opto/graphKit.hpp" #include "opto/idealKit.hpp" +#include "opto/machnode.hpp" #include "opto/macro.hpp" +#include "opto/memnode.hpp" +#include "opto/node.hpp" +#include "opto/output.hpp" +#include "opto/regalloc.hpp" #include "opto/rootnode.hpp" +#include "opto/runtime.hpp" #include "opto/type.hpp" +#include "utilities/growableArray.hpp" #include "utilities/macros.hpp" -const TypeFunc *G1BarrierSetC2::write_ref_field_pre_entry_Type() { - const Type **fields = TypeTuple::fields(2); - fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value - fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread - const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); - - // create result type (range) - fields = TypeTuple::fields(0); - const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields); - - return TypeFunc::make(domain, range); -} - -const TypeFunc *G1BarrierSetC2::write_ref_field_post_entry_Type() { - const Type **fields = TypeTuple::fields(2); - fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Card addr - fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread - const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); - - // create result type (range) - fields = TypeTuple::fields(0); - const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields); - - return TypeFunc::make(domain, range); -} - -#define __ ideal. /* * Determine if the G1 pre-barrier can be removed. The pre-barrier is * required by SATB to make sure all objects live at the start of the @@ -84,8 +67,6 @@ const TypeFunc *G1BarrierSetC2::write_ref_field_post_entry_Type() { * The compiler needs to determine that the object in which a field is about * to be written is newly allocated, and that no prior store to the same field * has happened since the allocation. - * - * Returns true if the pre-barrier can be removed */ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit, PhaseValues* phase, @@ -97,34 +78,28 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit, AllocateNode* alloc = AllocateNode::Ideal_allocation(base); if (offset == Type::OffsetBot) { - return false; // cannot unalias unless there are precise offsets + return false; // Cannot unalias unless there are precise offsets. } - if (alloc == nullptr) { - return false; // No allocation found + return false; // No allocation found. } intptr_t size_in_bytes = type2aelembytes(bt); - - Node* mem = kit->memory(adr_idx); // start searching here... + Node* mem = kit->memory(adr_idx); // Start searching here. for (int cnt = 0; cnt < 50; cnt++) { - if (mem->is_Store()) { - Node* st_adr = mem->in(MemNode::Address); intptr_t st_offset = 0; Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset); if (st_base == nullptr) { - break; // inscrutable pointer + break; // Inscrutable pointer. } - - // Break we have found a store with same base and offset as ours so break if (st_base == base && st_offset == offset) { + // We have found a store with same base and offset as ours. break; } - if (st_offset != offset && st_offset != Type::OffsetBot) { const int MAX_STORE = BytesPerLong; if (st_offset >= offset + size_in_bytes || @@ -136,20 +111,18 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit, // in the same sequence of RawMem effects. We sometimes initialize // a whole 'tile' of array elements with a single jint or jlong.) mem = mem->in(MemNode::Memory); - continue; // advance through independent store memory + continue; // Advance through independent store memory. } } - if (st_base != base && MemNode::detect_ptr_independence(base, alloc, st_base, AllocateNode::Ideal_allocation(st_base), phase)) { - // Success: The bases are provably independent. + // Success: the bases are provably independent. mem = mem->in(MemNode::Memory); - continue; // advance through independent store memory + continue; // Advance through independent store memory. } } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) { - InitializeNode* st_init = mem->in(0)->as_Initialize(); AllocateNode* st_alloc = st_init->allocation(); @@ -157,7 +130,7 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit, // The alloc variable is guaranteed to not be null here from earlier check. if (alloc == st_alloc) { // Check that the initialization is storing null so that no previous store - // has been moved up and directly write a reference + // has been moved up and directly write a reference. Node* captured_store = st_init->find_captured_store(offset, type2aelembytes(T_OBJECT), phase); @@ -166,164 +139,55 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit, } } } - // Unless there is an explicit 'continue', we must bail out here, // because 'mem' is an inscrutable memory state (e.g., a call). break; } - return false; } -// G1 pre/post barriers -void G1BarrierSetC2::pre_barrier(GraphKit* kit, - bool do_load, - Node* ctl, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt) const { - // Some sanity checks - // Note: val is unused in this routine. - - if (do_load) { - // We need to generate the load of the previous value - assert(obj != nullptr, "must have a base"); - assert(adr != nullptr, "where are loading from?"); - assert(pre_val == nullptr, "loaded already?"); - assert(val_type != nullptr, "need a type"); - - if (use_ReduceInitialCardMarks() - && g1_can_remove_pre_barrier(kit, &kit->gvn(), adr, bt, alias_idx)) { - return; - } - - } else { - // In this case both val_type and alias_idx are unused. - assert(pre_val != nullptr, "must be loaded already"); - // Nothing to be done if pre_val is null. - if (pre_val->bottom_type() == TypePtr::NULL_PTR) return; - assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here"); - } - assert(bt == T_OBJECT, "or we shouldn't be here"); - - IdealKit ideal(kit, true); - - Node* tls = __ thread(); // ThreadLocalStorage - - Node* no_base = __ top(); - Node* zero = __ ConI(0); - Node* zeroX = __ ConX(0); - - float likely = PROB_LIKELY(0.999); - float unlikely = PROB_UNLIKELY(0.999); - - BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE; - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width"); - - // Offsets into the thread - const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - const int index_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()); - const int buffer_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()); - - // Now the actual pointers into the thread - Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset)); - Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); - Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); - - // Now some of the values - Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); - - // if (!marking) - __ if_then(marking, BoolTest::ne, zero, unlikely); { - BasicType index_bt = TypeX_X->basic_type(); - assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size."); - Node* index = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw); - - if (do_load) { - // load original value - pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx, false, MemNode::unordered, LoadNode::Pinned); - } - - // if (pre_val != nullptr) - __ if_then(pre_val, BoolTest::ne, kit->null()); { - Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); - - // is the queue for this thread full? - __ if_then(index, BoolTest::ne, zeroX, likely); { - - // decrement the index - Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); - - // Now get the buffer location we will log the previous value into and store it - Node *log_addr = __ AddP(no_base, buffer, next_index); - __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered); - // update the index - __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered); - - } __ else_(); { - - // logging buffer is full, call the runtime - const TypeFunc *tf = write_ref_field_pre_entry_Type(); - __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), "write_ref_field_pre_entry", pre_val, tls); - } __ end_if(); // (!index) - } __ end_if(); // (pre_val != nullptr) - } __ end_if(); // (!marking) - - // Final sync IdealKit and GraphKit. - kit->final_sync(ideal); -} - /* - * G1 similar to any GC with a Young Generation requires a way to keep track of - * references from Old Generation to Young Generation to make sure all live + * G1, similar to any GC with a Young Generation, requires a way to keep track + * of references from Old Generation to Young Generation to make sure all live * objects are found. G1 also requires to keep track of object references * between different regions to enable evacuation of old regions, which is done - * as part of mixed collections. References are tracked in remembered sets and - * is continuously updated as reference are written to with the help of the - * post-barrier. + * as part of mixed collections. References are tracked in remembered sets, + * which are continuously updated as references are written to with the help of + * the post-barrier. * - * To reduce the number of updates to the remembered set the post-barrier - * filters updates to fields in objects located in the Young Generation, - * the same region as the reference, when the null is being written or - * if the card is already marked as dirty by an earlier write. + * To reduce the number of updates to the remembered set, the post-barrier + * filters out updates to fields in objects located in the Young Generation, the + * same region as the reference, when null is being written, or if the card is + * already marked as dirty by an earlier write. * * Under certain circumstances it is possible to avoid generating the - * post-barrier completely if it is possible during compile time to prove - * the object is newly allocated and that no safepoint exists between the - * allocation and the store. - * - * In the case of slow allocation the allocation code must handle the barrier - * as part of the allocation in the case the allocated object is not located - * in the nursery; this would happen for humongous objects. + * post-barrier completely, if it is possible during compile time to prove the + * object is newly allocated and that no safepoint exists between the allocation + * and the store. This can be seen as a compile-time version of the + * above-mentioned Young Generation filter. * - * Returns true if the post barrier can be removed + * In the case of a slow allocation, the allocation code must handle the barrier + * as part of the allocation if the allocated object is not located in the + * nursery; this would happen for humongous objects. */ bool G1BarrierSetC2::g1_can_remove_post_barrier(GraphKit* kit, - PhaseValues* phase, Node* store, + PhaseValues* phase, Node* store_ctrl, Node* adr) const { intptr_t offset = 0; Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); AllocateNode* alloc = AllocateNode::Ideal_allocation(base); if (offset == Type::OffsetBot) { - return false; // cannot unalias unless there are precise offsets + return false; // Cannot unalias unless there are precise offsets. } - if (alloc == nullptr) { - return false; // No allocation found + return false; // No allocation found. } - // Start search from Store node - Node* mem = store->in(MemNode::Control); + Node* mem = store_ctrl; // Start search from Store node. if (mem->is_Proj() && mem->in(0)->is_Initialize()) { - InitializeNode* st_init = mem->in(0)->as_Initialize(); AllocateNode* st_alloc = st_init->allocation(); - // Make sure we are looking at the same allocation if (alloc == st_alloc) { return true; @@ -333,725 +197,362 @@ bool G1BarrierSetC2::g1_can_remove_post_barrier(GraphKit* kit, return false; } -// -// Update the card table and add card address to the queue -// -void G1BarrierSetC2::g1_mark_card(GraphKit* kit, - IdealKit& ideal, - Node* card_adr, - Node* oop_store, - uint oop_alias_idx, - Node* index, - Node* index_adr, - Node* buffer, - const TypeFunc* tf) const { - Node* zero = __ ConI(0); - Node* zeroX = __ ConX(0); - Node* no_base = __ top(); - BasicType card_bt = T_BYTE; - // Smash zero into card. MUST BE ORDERED WRT TO STORE - __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw); - - // Now do the queue work - __ if_then(index, BoolTest::ne, zeroX); { - - Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); - Node* log_addr = __ AddP(no_base, buffer, next_index); - - // Order, see storeCM. - __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered); - __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered); - - } __ else_(); { - __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), "write_ref_field_post_entry", card_adr, __ thread()); - } __ end_if(); - +Node* G1BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { + DecoratorSet decorators = access.decorators(); + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0; + // If we are reading the value of the referent field of a Reference object, we + // need to record the referent in an SATB log buffer using the pre-barrier + // mechanism. Also we need to add a memory barrier to prevent commoning reads + // from this field across safepoints, since GC can change its value. + bool need_read_barrier = ((on_weak || on_phantom) && !no_keepalive); + if (access.is_oop() && need_read_barrier) { + access.set_barrier_data(G1C2BarrierPre); + } + return CardTableBarrierSetC2::load_at_resolved(access, val_type); } -void G1BarrierSetC2::post_barrier(GraphKit* kit, - Node* ctl, - Node* oop_store, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - BasicType bt, - bool use_precise) const { - // If we are writing a null then we need no post barrier +void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { + eliminate_gc_barrier_data(node); +} - if (val != nullptr && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) { - // Must be null - const Type* t = val->bottom_type(); - assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be null"); - // No post barrier if writing null - return; +void G1BarrierSetC2::eliminate_gc_barrier_data(Node* node) const { + if (node->is_LoadStore()) { + LoadStoreNode* loadstore = node->as_LoadStore(); + loadstore->set_barrier_data(0); + } else if (node->is_Mem()) { + MemNode* mem = node->as_Mem(); + mem->set_barrier_data(0); } +} - if (use_ReduceInitialCardMarks() && obj == kit->just_allocated_object(kit->control())) { - // We can skip marks on a freshly-allocated object in Eden. - // Keep this code in sync with CardTableBarrierSet::on_slowpath_allocation_exit. - // That routine informs GC to take appropriate compensating steps, - // upon a slow-path allocation, so as to make this card-mark - // elision safe. +static void refine_barrier_by_new_val_type(const Node* n) { + if (n->Opcode() != Op_StoreP && + n->Opcode() != Op_StoreN) { return; } - - if (use_ReduceInitialCardMarks() - && g1_can_remove_post_barrier(kit, &kit->gvn(), oop_store, adr)) { + MemNode* store = n->as_Mem(); + const Node* newval = n->in(MemNode::ValueIn); + assert(newval != nullptr, ""); + const Type* newval_bottom = newval->bottom_type(); + TypePtr::PTR newval_type = newval_bottom->make_ptr()->ptr(); + uint8_t barrier_data = store->barrier_data(); + if (!newval_bottom->isa_oopptr() && + !newval_bottom->isa_narrowoop() && + newval_type != TypePtr::Null) { + // newval is neither an OOP nor null, so there is no barrier to refine. + assert(barrier_data == 0, "non-OOP stores should have no barrier data"); return; } - - if (!use_precise) { - // All card marks for a (non-array) instance are in one place: - adr = obj; + if (barrier_data == 0) { + // No barrier to refine. + return; } - // (Else it's an array (or unknown), and we want more precise card marks.) - assert(adr != nullptr, ""); - - IdealKit ideal(kit, true); - - Node* tls = __ thread(); // ThreadLocalStorage - - Node* no_base = __ top(); - float likely = PROB_LIKELY_MAG(3); - float unlikely = PROB_UNLIKELY_MAG(3); - Node* young_card = __ ConI((jint)G1CardTable::g1_young_card_val()); - Node* dirty_card = __ ConI((jint)G1CardTable::dirty_card_val()); - Node* zeroX = __ ConX(0); - - const TypeFunc *tf = write_ref_field_post_entry_Type(); - - // Offsets into the thread - const int index_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()); - const int buffer_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()); - - // Pointers into the thread - - Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); - Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); - - // Now some values - // Use ctrl to avoid hoisting these values past a safepoint, which could - // potentially reset these fields in the JavaThread. - Node* index = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw); - Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); - - // Convert the store obj pointer to an int prior to doing math on it - // Must use ctrl to prevent "integerized oop" existing across safepoint - Node* cast = __ CastPX(__ ctrl(), adr); - - // Divide pointer by card size - Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift()) ); - - // Combine card table base and card offset - Node* card_adr = __ AddP(no_base, byte_map_base_node(kit), card_offset ); - - // If we know the value being stored does it cross regions? - - if (val != nullptr) { - // Does the store cause us to cross regions? - - // Should be able to do an unsigned compare of region_size instead of - // and extra shift. Do we have an unsigned compare?? - // Node* region_size = __ ConI(1 << G1HeapRegion::LogOfHRGrainBytes); - Node* xor_res = __ URShiftX ( __ XorX( cast, __ CastPX(__ ctrl(), val)), __ ConI(checked_cast(G1HeapRegion::LogOfHRGrainBytes))); - - // if (xor_res == 0) same region so skip - __ if_then(xor_res, BoolTest::ne, zeroX, likely); { - - // No barrier if we are storing a null. - __ if_then(val, BoolTest::ne, kit->null(), likely); { - - // Ok must mark the card if not already dirty - - // load the original value of the card - Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - - __ if_then(card_val, BoolTest::ne, young_card, unlikely); { - kit->sync_kit(ideal); - kit->insert_mem_bar(Op_MemBarVolatile, oop_store); - __ sync_kit(kit); - - Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - __ if_then(card_val_reload, BoolTest::ne, dirty_card); { - g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); - } __ end_if(); - } __ end_if(); - } __ end_if(); - } __ end_if(); - } else { - // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks. - // We don't need a barrier here if the destination is a newly allocated object - // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden - // are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()). - assert(!use_ReduceInitialCardMarks(), "can only happen with card marking"); - Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - __ if_then(card_val, BoolTest::ne, young_card); { - g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); - } __ end_if(); + if (newval_type == TypePtr::Null) { + // Simply elide post-barrier if writing null. + barrier_data &= ~G1C2BarrierPost; + barrier_data &= ~G1C2BarrierPostNotNull; + } else if (((barrier_data & G1C2BarrierPost) != 0) && + newval_type == TypePtr::NotNull) { + // If the post-barrier has not been elided yet (e.g. due to newval being + // freshly allocated), mark it as not-null (simplifies barrier tests and + // compressed OOPs logic). + barrier_data |= G1C2BarrierPostNotNull; } - - // Final sync IdealKit and GraphKit. - kit->final_sync(ideal); + store->set_barrier_data(barrier_data); + return; } -// Helper that guards and inserts a pre-barrier. -void G1BarrierSetC2::insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, - Node* pre_val, bool need_mem_bar) const { - // We could be accessing the referent field of a reference object. If so, when G1 - // is enabled, we need to log the value in the referent field in an SATB buffer. - // This routine performs some compile time filters and generates suitable - // runtime filters that guard the pre-barrier code. - // Also add memory barrier for non volatile load from the referent field - // to prevent commoning of loads across safepoint. - - // Some compile time checks. - - // If offset is a constant, is it java_lang_ref_Reference::_reference_offset? - const TypeX* otype = offset->find_intptr_t_type(); - if (otype != nullptr && otype->is_con() && - otype->get_con() != java_lang_ref_Reference::referent_offset()) { - // Constant offset but not the reference_offset so just return - return; - } - - // We only need to generate the runtime guards for instances. - const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr(); - if (btype != nullptr) { - if (btype->isa_aryptr()) { - // Array type so nothing to do - return; +// Refine (not really expand) G1 barriers by looking at the new value type +// (whether it is necessarily null or necessarily non-null). +bool G1BarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const { + ResourceMark rm; + VectorSet visited; + Node_List worklist; + worklist.push(C->root()); + while (worklist.size() > 0) { + Node* n = worklist.pop(); + if (visited.test_set(n->_idx)) { + continue; } - - const TypeInstPtr* itype = btype->isa_instptr(); - if (itype != nullptr) { - // Can the klass of base_oop be statically determined to be - // _not_ a sub-class of Reference and _not_ Object? - ciKlass* klass = itype->instance_klass(); - if (klass->is_loaded() && - !klass->is_subtype_of(kit->env()->Reference_klass()) && - !kit->env()->Object_klass()->is_subtype_of(klass)) { - return; + refine_barrier_by_new_val_type(n); + for (uint j = 0; j < n->req(); j++) { + Node* in = n->in(j); + if (in != nullptr) { + worklist.push(in); } } } + return false; +} - // The compile time filters did not reject base_oop/offset so - // we need to generate the following runtime filters - // - // if (offset == java_lang_ref_Reference::_reference_offset) { - // if (instance_of(base, java.lang.ref.Reference)) { - // pre_barrier(_, pre_val, ...); +uint G1BarrierSetC2::estimated_barrier_size(const Node* node) const { + // These Ideal node counts are extracted from the pre-matching Ideal graph + // generated when compiling the following method with early barrier expansion: + // static void write(MyObject obj1, Object o) { + // obj1.o1 = o; // } - // } - - float likely = PROB_LIKELY( 0.999); - float unlikely = PROB_UNLIKELY(0.999); - - IdealKit ideal(kit); - - Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset()); - - __ if_then(offset, BoolTest::eq, referent_off, unlikely); { - // Update graphKit memory and control from IdealKit. - kit->sync_kit(ideal); - - Node* ref_klass_con = kit->makecon(TypeKlassPtr::make(kit->env()->Reference_klass())); - Node* is_instof = kit->gen_instanceof(base_oop, ref_klass_con); - - // Update IdealKit memory and control from graphKit. - __ sync_kit(kit); - - Node* one = __ ConI(1); - // is_instof == 0 if base_oop == nullptr - __ if_then(is_instof, BoolTest::eq, one, unlikely); { - - // Update graphKit from IdeakKit. - kit->sync_kit(ideal); - - // Use the pre-barrier to record the value in the referent field - pre_barrier(kit, false /* do_load */, - __ ctrl(), - nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */, - pre_val /* pre_val */, - T_OBJECT); - if (need_mem_bar) { - // Add memory barrier to prevent commoning reads from this field - // across safepoint since GC can change its value. - kit->insert_mem_bar(Op_MemBarCPUOrder); - } - // Update IdealKit from graphKit. - __ sync_kit(kit); - - } __ end_if(); // _ref_type != ref_none - } __ end_if(); // offset == referent_offset + uint8_t barrier_data = MemNode::barrier_data(node); + uint nodes = 0; + if ((barrier_data & G1C2BarrierPre) != 0) { + nodes += 50; + } + if ((barrier_data & G1C2BarrierPost) != 0) { + nodes += 60; + } + return nodes; +} - // Final sync IdealKit and GraphKit. - kit->final_sync(ideal); +bool G1BarrierSetC2::can_initialize_object(const StoreNode* store) const { + assert(store->Opcode() == Op_StoreP || store->Opcode() == Op_StoreN, "OOP store expected"); + // It is OK to move the store across the object initialization boundary only + // if it does not have any barrier, or if it has barriers that can be safely + // elided (because of the compensation steps taken on the allocation slow path + // when ReduceInitialCardMarks is enabled). + return (MemNode::barrier_data(store) == 0) || use_ReduceInitialCardMarks(); } -#undef __ +void G1BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const { + if (ac->is_clone_inst() && !use_ReduceInitialCardMarks()) { + clone_in_runtime(phase, ac, G1BarrierSetRuntime::clone_addr(), "G1BarrierSetRuntime::clone"); + return; + } + BarrierSetC2::clone_at_expansion(phase, ac); +} -Node* G1BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { +Node* G1BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { DecoratorSet decorators = access.decorators(); - Node* adr = access.addr().node(); - Node* obj = access.base(); - - bool anonymous = (decorators & C2_UNSAFE_ACCESS) != 0; - bool mismatched = (decorators & C2_MISMATCHED) != 0; - bool unknown = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; bool in_heap = (decorators & IN_HEAP) != 0; - bool in_native = (decorators & IN_NATIVE) != 0; - bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; - bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; - bool is_unordered = (decorators & MO_UNORDERED) != 0; - bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0; - bool is_mixed = !in_heap && !in_native; - bool need_cpu_mem_bar = !is_unordered || mismatched || is_mixed; - - Node* top = Compile::current()->top(); - Node* offset = adr->is_AddP() ? adr->in(AddPNode::Offset) : top; - - // If we are reading the value of the referent field of a Reference - // object (either by using Unsafe directly or through reflection) - // then, if G1 is enabled, we need to record the referent in an - // SATB log buffer using the pre-barrier mechanism. - // Also we need to add memory barrier to prevent commoning reads - // from this field across safepoint since GC can change its value. - bool need_read_barrier = (((on_weak || on_phantom) && !no_keepalive) || - (in_heap && unknown && offset != top && obj != top)); + bool tightly_coupled_alloc = (decorators & C2_TIGHTLY_COUPLED_ALLOC) != 0; + bool need_store_barrier = !(tightly_coupled_alloc && use_ReduceInitialCardMarks()) && (in_heap || anonymous); + if (access.is_oop() && need_store_barrier) { + access.set_barrier_data(get_store_barrier(access)); + if (tightly_coupled_alloc) { + assert(!use_ReduceInitialCardMarks(), + "post-barriers are only needed for tightly-coupled initialization stores when ReduceInitialCardMarks is disabled"); + // Pre-barriers are unnecessary for tightly-coupled initialization stores. + access.set_barrier_data(access.barrier_data() & ~G1C2BarrierPre); + } + } + return BarrierSetC2::store_at_resolved(access, val); +} - if (!access.is_oop() || !need_read_barrier) { - return CardTableBarrierSetC2::load_at_resolved(access, val_type); +Node* G1BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + if (!access.is_oop()) { + return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); } + access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); +} - assert(access.is_parse_access(), "entry not supported at optimization time"); +Node* G1BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + if (!access.is_oop()) { + return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); + } + access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); +} - C2ParseAccess& parse_access = static_cast(access); - GraphKit* kit = parse_access.kit(); - Node* load; +Node* G1BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + if (!access.is_oop()) { + return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type); + } + access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type); +} - Node* control = kit->control(); - const TypePtr* adr_type = access.addr().type(); - MemNode::MemOrd mo = access.mem_node_mo(); - bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; - bool unaligned = (decorators & C2_UNALIGNED) != 0; - bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; - // Pinned control dependency is the strictest. So it's ok to substitute it for any other. - load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo, - LoadNode::Pinned, requires_atomic_access, unaligned, mismatched, unsafe, - access.barrier_data()); +class G1BarrierSetC2State : public BarrierSetC2State { +private: + GrowableArray* _stubs; +public: + G1BarrierSetC2State(Arena* arena) + : BarrierSetC2State(arena), + _stubs(new (arena) GrowableArray(arena, 8, 0, nullptr)) {} - if (on_weak || on_phantom) { - // Use the pre-barrier to record the value in the referent field - pre_barrier(kit, false /* do_load */, - kit->control(), - nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */, - load /* pre_val */, T_OBJECT); - // Add memory barrier to prevent commoning reads from this field - // across safepoint since GC can change its value. - kit->insert_mem_bar(Op_MemBarCPUOrder); - } else if (unknown) { - // We do not require a mem bar inside pre_barrier if need_mem_bar - // is set: the barriers would be emitted by us. - insert_pre_barrier(kit, obj, offset, load, !need_cpu_mem_bar); + GrowableArray* stubs() { + return _stubs; } - return load; -} - -bool G1BarrierSetC2::is_gc_barrier_node(Node* node) const { - if (CardTableBarrierSetC2::is_gc_barrier_node(node)) { - return true; + bool needs_liveness_data(const MachNode* mach) const { + return G1PreBarrierStubC2::needs_barrier(mach) || + G1PostBarrierStubC2::needs_barrier(mach); } - if (node->Opcode() != Op_CallLeaf) { - return false; - } - CallLeafNode *call = node->as_CallLeaf(); - if (call->_name == nullptr) { + + bool needs_livein_data() const { return false; } +}; - return strcmp(call->_name, "write_ref_field_pre_entry") == 0 || strcmp(call->_name, "write_ref_field_post_entry") == 0; +static G1BarrierSetC2State* barrier_set_state() { + return reinterpret_cast(Compile::current()->barrier_set_state()); } -bool G1BarrierSetC2::is_g1_pre_val_load(Node* n) { - if (n->is_Load() && n->as_Load()->has_pinned_control_dependency()) { - // Make sure the only users of it are: CmpP, StoreP, and a call to write_ref_field_pre_entry +G1BarrierStubC2::G1BarrierStubC2(const MachNode* node) : BarrierStubC2(node) {} - // Skip possible decode - if (n->outcnt() == 1 && n->unique_out()->is_DecodeN()) { - n = n->unique_out(); - } +G1PreBarrierStubC2::G1PreBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {} - if (n->outcnt() == 3) { - int found = 0; - for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) { - Node* use = iter.get(); - if (use->is_Cmp() || use->is_Store()) { - ++found; - } else if (use->is_CallLeaf()) { - CallLeafNode* call = use->as_CallLeaf(); - if (strcmp(call->_name, "write_ref_field_pre_entry") == 0) { - ++found; - } - } - } - if (found == 3) { - return true; - } - } +bool G1PreBarrierStubC2::needs_barrier(const MachNode* node) { + return (node->barrier_data() & G1C2BarrierPre) != 0; +} + +G1PreBarrierStubC2* G1PreBarrierStubC2::create(const MachNode* node) { + G1PreBarrierStubC2* const stub = new (Compile::current()->comp_arena()) G1PreBarrierStubC2(node); + if (!Compile::current()->output()->in_scratch_emit_size()) { + barrier_set_state()->stubs()->append(stub); } - return false; + return stub; } -bool G1BarrierSetC2::is_gc_pre_barrier_node(Node *node) const { - return is_g1_pre_val_load(node); +void G1PreBarrierStubC2::initialize_registers(Register obj, Register pre_val, Register thread, Register tmp1, Register tmp2) { + _obj = obj; + _pre_val = pre_val; + _thread = thread; + _tmp1 = tmp1; + _tmp2 = tmp2; } -void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { - if (is_g1_pre_val_load(node)) { - macro->replace_node(node, macro->zerocon(node->as_Load()->bottom_type()->basic_type())); - } else { - assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required"); - assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes"); - // It could be only one user, URShift node, in Object.clone() intrinsic - // but the new allocation is passed to arraycopy stub and it could not - // be scalar replaced. So we don't check the case. +Register G1PreBarrierStubC2::obj() const { + return _obj; +} - // An other case of only one user (Xor) is when the value check for null - // in G1 post barrier is folded after CCP so the code which used URShift - // is removed. +Register G1PreBarrierStubC2::pre_val() const { + return _pre_val; +} - // Take Region node before eliminating post barrier since it also - // eliminates CastP2X node when it has only one user. - Node* this_region = node->in(0); - assert(this_region != nullptr, ""); +Register G1PreBarrierStubC2::thread() const { + return _thread; +} - // Remove G1 post barrier. +Register G1PreBarrierStubC2::tmp1() const { + return _tmp1; +} + +Register G1PreBarrierStubC2::tmp2() const { + return _tmp2; +} - // Search for CastP2X->Xor->URShift->Cmp path which - // checks if the store done to a different from the value's region. - // And replace Cmp with #0 (false) to collapse G1 post barrier. - Node* xorx = node->find_out_with(Op_XorX); - if (xorx != nullptr) { - Node* shift = xorx->unique_out(); - Node* cmpx = shift->unique_out(); - assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() && - cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne, - "missing region check in G1 post barrier"); - macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ)); +void G1PreBarrierStubC2::emit_code(MacroAssembler& masm) { + G1BarrierSetAssembler* bs = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + bs->generate_c2_pre_barrier_stub(&masm, this); +} - // Remove G1 pre barrier. +G1PostBarrierStubC2::G1PostBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {} - // Search "if (marking != 0)" check and set it to "false". - // There is no G1 pre barrier if previous stored value is null - // (for example, after initialization). - if (this_region->is_Region() && this_region->req() == 3) { - int ind = 1; - if (!this_region->in(ind)->is_IfFalse()) { - ind = 2; - } - if (this_region->in(ind)->is_IfFalse() && - this_region->in(ind)->in(0)->Opcode() == Op_If) { - Node* bol = this_region->in(ind)->in(0)->in(1); - assert(bol->is_Bool(), ""); - cmpx = bol->in(1); - if (bol->as_Bool()->_test._test == BoolTest::ne && - cmpx->is_Cmp() && cmpx->in(2) == macro->intcon(0) && - cmpx->in(1)->is_Load()) { - Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address); - const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - if (adr->is_AddP() && adr->in(AddPNode::Base) == macro->top() && - adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal && - adr->in(AddPNode::Offset) == macro->MakeConX(marking_offset)) { - macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ)); - } - } - } - } - } else { - assert(!use_ReduceInitialCardMarks(), "can only happen with card marking"); - // This is a G1 post barrier emitted by the Object.clone() intrinsic. - // Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card - // is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier. - Node* shift = node->find_out_with(Op_URShiftX); - assert(shift != nullptr, "missing G1 post barrier"); - Node* addp = shift->unique_out(); - Node* load = addp->find_out_with(Op_LoadB); - assert(load != nullptr, "missing G1 post barrier"); - Node* cmpx = load->unique_out(); - assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() && - cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne, - "missing card value check in G1 post barrier"); - macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ)); - // There is no G1 pre barrier in this case - } - // Now CastP2X can be removed since it is used only on dead path - // which currently still alive until igvn optimize it. - assert(node->outcnt() == 0 || node->unique_out()->Opcode() == Op_URShiftX, ""); - macro->replace_node(node, macro->top()); - } +bool G1PostBarrierStubC2::needs_barrier(const MachNode* node) { + return (node->barrier_data() & G1C2BarrierPost) != 0; } -Node* G1BarrierSetC2::step_over_gc_barrier(Node* c) const { - if (!use_ReduceInitialCardMarks() && - c != nullptr && c->is_Region() && c->req() == 3) { - for (uint i = 1; i < c->req(); i++) { - if (c->in(i) != nullptr && c->in(i)->is_Region() && - c->in(i)->req() == 3) { - Node* r = c->in(i); - for (uint j = 1; j < r->req(); j++) { - if (r->in(j) != nullptr && r->in(j)->is_Proj() && - r->in(j)->in(0) != nullptr && - r->in(j)->in(0)->Opcode() == Op_CallLeaf && - r->in(j)->in(0)->as_Call()->entry_point() == CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)) { - Node* call = r->in(j)->in(0); - c = c->in(i == 1 ? 2 : 1); - if (c != nullptr && c->Opcode() != Op_Parm) { - c = c->in(0); - if (c != nullptr) { - c = c->in(0); - assert(call->in(0) == nullptr || - call->in(0)->in(0) == nullptr || - call->in(0)->in(0)->in(0) == nullptr || - call->in(0)->in(0)->in(0)->in(0) == nullptr || - call->in(0)->in(0)->in(0)->in(0)->in(0) == nullptr || - c == call->in(0)->in(0)->in(0)->in(0)->in(0), "bad barrier shape"); - return c; - } - } - } - } - } - } +G1PostBarrierStubC2* G1PostBarrierStubC2::create(const MachNode* node) { + G1PostBarrierStubC2* const stub = new (Compile::current()->comp_arena()) G1PostBarrierStubC2(node); + if (!Compile::current()->output()->in_scratch_emit_size()) { + barrier_set_state()->stubs()->append(stub); } - return c; + return stub; } -#ifdef ASSERT -bool G1BarrierSetC2::has_cas_in_use_chain(Node *n) const { - Unique_Node_List visited; - Node_List worklist; - worklist.push(n); - while (worklist.size() > 0) { - Node* x = worklist.pop(); - if (visited.member(x)) { - continue; - } else { - visited.push(x); - } +void G1PostBarrierStubC2::initialize_registers(Register thread, Register tmp1, Register tmp2, Register tmp3) { + _thread = thread; + _tmp1 = tmp1; + _tmp2 = tmp2; + _tmp3 = tmp3; +} - if (x->is_LoadStore()) { - int op = x->Opcode(); - if (op == Op_CompareAndExchangeP || op == Op_CompareAndExchangeN || - op == Op_CompareAndSwapP || op == Op_CompareAndSwapN || - op == Op_WeakCompareAndSwapP || op == Op_WeakCompareAndSwapN) { - return true; - } - } - if (!x->is_CFG()) { - for (SimpleDUIterator iter(x); iter.has_next(); iter.next()) { - Node* use = iter.get(); - worklist.push(use); - } - } - } - return false; +Register G1PostBarrierStubC2::thread() const { + return _thread; } -void G1BarrierSetC2::verify_pre_load(Node* marking_if, Unique_Node_List& loads /*output*/) const { - assert(loads.size() == 0, "Loads list should be empty"); - Node* pre_val_if = marking_if->find_out_with(Op_IfTrue)->find_out_with(Op_If); - if (pre_val_if != nullptr) { - Unique_Node_List visited; - Node_List worklist; - Node* pre_val = pre_val_if->in(1)->in(1)->in(1); +Register G1PostBarrierStubC2::tmp1() const { + return _tmp1; +} - worklist.push(pre_val); - while (worklist.size() > 0) { - Node* x = worklist.pop(); - if (visited.member(x)) { - continue; - } else { - visited.push(x); - } +Register G1PostBarrierStubC2::tmp2() const { + return _tmp2; +} - if (has_cas_in_use_chain(x)) { - loads.clear(); - return; - } +Register G1PostBarrierStubC2::tmp3() const { + return _tmp3; +} - if (x->is_Con()) { - continue; - } - if (x->is_EncodeP() || x->is_DecodeN()) { - worklist.push(x->in(1)); - continue; - } - if (x->is_Load() || x->is_LoadStore()) { - assert(x->in(0) != nullptr, "Pre-val load has to have a control"); - loads.push(x); - continue; - } - if (x->is_Phi()) { - for (uint i = 1; i < x->req(); i++) { - worklist.push(x->in(i)); - } - continue; - } - assert(false, "Pre-val anomaly"); - } - } +void G1PostBarrierStubC2::emit_code(MacroAssembler& masm) { + G1BarrierSetAssembler* bs = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + bs->generate_c2_post_barrier_stub(&masm, this); } -void G1BarrierSetC2::verify_no_safepoints(Compile* compile, Node* marking_check_if, const Unique_Node_List& loads) const { - if (loads.size() == 0) { - return; - } +void* G1BarrierSetC2::create_barrier_state(Arena* comp_arena) const { + return new (comp_arena) G1BarrierSetC2State(comp_arena); +} - if (loads.size() == 1) { // Handle the typical situation when there a single pre-value load - // that is dominated by the marking_check_if, that's true when the - // barrier itself does the pre-val load. - Node *pre_val = loads.at(0); - if (pre_val->in(0)->in(0) == marking_check_if) { // IfTrue->If - return; - } +int G1BarrierSetC2::get_store_barrier(C2Access& access) const { + if (!access.is_parse_access()) { + // Only support for eliding barriers at parse time for now. + return G1C2BarrierPre | G1C2BarrierPost; } - - // All other cases are when pre-value loads dominate the marking check. - Unique_Node_List controls; - for (uint i = 0; i < loads.size(); i++) { - Node *c = loads.at(i)->in(0); - controls.push(c); + GraphKit* kit = (static_cast(access)).kit(); + Node* ctl = kit->control(); + Node* adr = access.addr().node(); + uint adr_idx = kit->C->get_alias_index(access.addr().type()); + assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory"); + + bool can_remove_pre_barrier = g1_can_remove_pre_barrier(kit, &kit->gvn(), adr, access.type(), adr_idx); + + // We can skip marks on a freshly-allocated object in Eden. Keep this code in + // sync with CardTableBarrierSet::on_slowpath_allocation_exit. That routine + // informs GC to take appropriate compensating steps, upon a slow-path + // allocation, so as to make this card-mark elision safe. + // The post-barrier can also be removed if null is written. This case is + // handled by G1BarrierSetC2::expand_barriers, which runs at the end of C2's + // platform-independent optimizations to exploit stronger type information. + bool can_remove_post_barrier = use_ReduceInitialCardMarks() && + ((access.base() == kit->just_allocated_object(ctl)) || + g1_can_remove_post_barrier(kit, &kit->gvn(), ctl, adr)); + + int barriers = 0; + if (!can_remove_pre_barrier) { + barriers |= G1C2BarrierPre; + } + if (!can_remove_post_barrier) { + barriers |= G1C2BarrierPost; } - Unique_Node_List visited; - Unique_Node_List safepoints; - Node_List worklist; - uint found = 0; + return barriers; +} - worklist.push(marking_check_if); - while (worklist.size() > 0 && found < controls.size()) { - Node* x = worklist.pop(); - if (x == nullptr || x == compile->top()) continue; - if (visited.member(x)) { - continue; - } else { - visited.push(x); - } +void G1BarrierSetC2::late_barrier_analysis() const { + compute_liveness_at_stubs(); +} - if (controls.member(x)) { - found++; - } - if (x->is_Region()) { - for (uint i = 1; i < x->req(); i++) { - worklist.push(x->in(i)); - } - } else { - if (!x->is_SafePoint()) { - worklist.push(x->in(0)); - } else { - safepoints.push(x); - } +void G1BarrierSetC2::emit_stubs(CodeBuffer& cb) const { + MacroAssembler masm(&cb); + GrowableArray* const stubs = barrier_set_state()->stubs(); + for (int i = 0; i < stubs->length(); i++) { + // Make sure there is enough space in the code buffer + if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) { + ciEnv::current()->record_failure("CodeCache is full"); + return; } + stubs->at(i)->emit_code(masm); } - assert(found == controls.size(), "Pre-barrier structure anomaly or possible safepoint"); + masm.flush(); } -void G1BarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const { - if (phase != BarrierSetC2::BeforeCodeGen) { - return; +#ifndef PRODUCT +void G1BarrierSetC2::dump_barrier_data(const MachNode* mach, outputStream* st) const { + if ((mach->barrier_data() & G1C2BarrierPre) != 0) { + st->print("pre "); } - // Verify G1 pre-barriers - const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - - Unique_Node_List visited; - Node_List worklist; - // We're going to walk control flow backwards starting from the Root - worklist.push(compile->root()); - while (worklist.size() > 0) { - Node* x = worklist.pop(); - if (x == nullptr || x == compile->top()) continue; - if (visited.member(x)) { - continue; - } else { - visited.push(x); - } - - if (x->is_Region()) { - for (uint i = 1; i < x->req(); i++) { - worklist.push(x->in(i)); - } - } else { - worklist.push(x->in(0)); - // We are looking for the pattern: - // /->ThreadLocal - // If->Bool->CmpI->LoadB->AddP->ConL(marking_offset) - // \->ConI(0) - // We want to verify that the If and the LoadB have the same control - // See GraphKit::g1_write_barrier_pre() - if (x->is_If()) { - IfNode *iff = x->as_If(); - if (iff->in(1)->is_Bool() && iff->in(1)->in(1)->is_Cmp()) { - CmpNode *cmp = iff->in(1)->in(1)->as_Cmp(); - if (cmp->Opcode() == Op_CmpI && cmp->in(2)->is_Con() && cmp->in(2)->bottom_type()->is_int()->get_con() == 0 - && cmp->in(1)->is_Load()) { - LoadNode* load = cmp->in(1)->as_Load(); - if (load->Opcode() == Op_LoadB && load->in(2)->is_AddP() && load->in(2)->in(2)->Opcode() == Op_ThreadLocal - && load->in(2)->in(3)->is_Con() - && load->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset) { - - Node* if_ctrl = iff->in(0); - Node* load_ctrl = load->in(0); - - if (if_ctrl != load_ctrl) { - // Skip possible CProj->NeverBranch in infinite loops - if ((if_ctrl->is_Proj() && if_ctrl->Opcode() == Op_CProj) - && if_ctrl->in(0)->is_NeverBranch()) { - if_ctrl = if_ctrl->in(0)->in(0); - } - } - assert(load_ctrl != nullptr && if_ctrl == load_ctrl, "controls must match"); - - Unique_Node_List loads; - verify_pre_load(iff, loads); - verify_no_safepoints(compile, iff, loads); - } - } - } - } - } + if ((mach->barrier_data() & G1C2BarrierPost) != 0) { + st->print("post "); } -} -#endif - -bool G1BarrierSetC2::escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const { - if (opcode == Op_StoreP) { - Node* adr = n->in(MemNode::Address); - const Type* adr_type = gvn->type(adr); - // Pointer stores in G1 barriers looks like unsafe access. - // Ignore such stores to be able scalar replace non-escaping - // allocations. - if (adr_type->isa_rawptr() && adr->is_AddP()) { - Node* base = conn_graph->get_addp_base(adr); - if (base->Opcode() == Op_LoadP && - base->in(MemNode::Address)->is_AddP()) { - adr = base->in(MemNode::Address); - Node* tls = conn_graph->get_addp_base(adr); - if (tls->Opcode() == Op_ThreadLocal) { - int offs = (int) gvn->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot); - const int buf_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()); - if (offs == buf_offset) { - return true; // G1 pre barrier previous oop value store. - } - if (offs == in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())) { - return true; // G1 post barrier card address store. - } - } - } - } + if ((mach->barrier_data() & G1C2BarrierPostNotNull) != 0) { + st->print("notnull "); } - return false; } +#endif // !PRODUCT diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp index c445a87d2e46d..dc333d8c33174 100644 --- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp +++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp @@ -31,29 +31,62 @@ class PhaseTransform; class Type; class TypeFunc; -class G1BarrierSetC2: public CardTableBarrierSetC2 { +const int G1C2BarrierPre = 1; +const int G1C2BarrierPost = 2; +const int G1C2BarrierPostNotNull = 4; + +class G1BarrierStubC2 : public BarrierStubC2 { +public: + G1BarrierStubC2(const MachNode* node); + virtual void emit_code(MacroAssembler& masm) = 0; +}; + +class G1PreBarrierStubC2 : public G1BarrierStubC2 { +private: + Register _obj; + Register _pre_val; + Register _thread; + Register _tmp1; + Register _tmp2; + +protected: + G1PreBarrierStubC2(const MachNode* node); + +public: + static bool needs_barrier(const MachNode* node); + static G1PreBarrierStubC2* create(const MachNode* node); + void initialize_registers(Register obj, Register pre_val, Register thread, Register tmp1 = noreg, Register tmp2 = noreg); + Register obj() const; + Register pre_val() const; + Register thread() const; + Register tmp1() const; + Register tmp2() const; + virtual void emit_code(MacroAssembler& masm); +}; + +class G1PostBarrierStubC2 : public G1BarrierStubC2 { +private: + Register _thread; + Register _tmp1; + Register _tmp2; + Register _tmp3; + protected: - virtual void pre_barrier(GraphKit* kit, - bool do_load, - Node* ctl, - Node* obj, - Node* adr, - uint adr_idx, - Node* val, - const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt) const; - - virtual void post_barrier(GraphKit* kit, - Node* ctl, - Node* store, - Node* obj, - Node* adr, - uint adr_idx, - Node* val, - BasicType bt, - bool use_precise) const; + G1PostBarrierStubC2(const MachNode* node); +public: + static bool needs_barrier(const MachNode* node); + static G1PostBarrierStubC2* create(const MachNode* node); + void initialize_registers(Register thread, Register tmp1 = noreg, Register tmp2 = noreg, Register tmp3 = noreg); + Register thread() const; + Register tmp1() const; + Register tmp2() const; + Register tmp3() const; + virtual void emit_code(MacroAssembler& masm); +}; + +class G1BarrierSetC2: public CardTableBarrierSetC2 { +protected: bool g1_can_remove_pre_barrier(GraphKit* kit, PhaseValues* phase, Node* adr, @@ -64,44 +97,31 @@ class G1BarrierSetC2: public CardTableBarrierSetC2 { PhaseValues* phase, Node* store, Node* adr) const; - void g1_mark_card(GraphKit* kit, - IdealKit& ideal, - Node* card_adr, - Node* oop_store, - uint oop_alias_idx, - Node* index, - Node* index_adr, - Node* buffer, - const TypeFunc* tf) const; - - // Helper for unsafe accesses, that may or may not be on the referent field. - // Generates the guards that check whether the result of - // Unsafe.getReference should be recorded in an SATB log buffer. - void insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar) const; - - static const TypeFunc* write_ref_field_pre_entry_Type(); - static const TypeFunc* write_ref_field_post_entry_Type(); + int get_store_barrier(C2Access& access) const; virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const; + virtual Node* store_at_resolved(C2Access& access, C2AccessValue& val) const; + virtual Node* atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const; + virtual Node* atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const; + virtual Node* atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const; -#ifdef ASSERT - bool has_cas_in_use_chain(Node* x) const; - void verify_pre_load(Node* marking_check_if, Unique_Node_List& loads /*output*/) const; - void verify_no_safepoints(Compile* compile, Node* marking_load, const Unique_Node_List& loads) const; -#endif - - static bool is_g1_pre_val_load(Node* n); public: - virtual bool is_gc_pre_barrier_node(Node* node) const; - virtual bool is_gc_barrier_node(Node* node) const; virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const; - virtual Node* step_over_gc_barrier(Node* c) const; - -#ifdef ASSERT - virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const; + virtual void eliminate_gc_barrier_data(Node* node) const; + virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const; + virtual uint estimated_barrier_size(const Node* node) const; + virtual bool can_initialize_object(const StoreNode* store) const; + virtual void clone_at_expansion(PhaseMacroExpand* phase, + ArrayCopyNode* ac) const; + virtual void* create_barrier_state(Arena* comp_arena) const; + virtual void emit_stubs(CodeBuffer& cb) const; + virtual void late_barrier_analysis() const; + +#ifndef PRODUCT + virtual void dump_barrier_data(const MachNode* mach, outputStream* st) const; #endif - - virtual bool escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const; }; #endif // SHARE_GC_G1_C2_G1BARRIERSETC2_HPP diff --git a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp index a0fce437807f4..2e247f46c93d8 100644 --- a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp +++ b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp @@ -61,3 +61,11 @@ JRT_LEAF(void, G1BarrierSetRuntime::write_ref_field_post_entry(volatile G1CardTa G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread); G1BarrierSet::dirty_card_queue_set().enqueue(queue, card_addr); JRT_END + +JRT_LEAF(void, G1BarrierSetRuntime::clone(oopDesc* src, oopDesc* dst, size_t size)) + HeapAccess<>::clone(src, dst, size); +JRT_END + +address G1BarrierSetRuntime::clone_addr() { + return reinterpret_cast
(clone); +} diff --git a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp index 366679f032ba9..f98e94096e72d 100644 --- a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp +++ b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp @@ -35,6 +35,8 @@ class oopDesc; class JavaThread; class G1BarrierSetRuntime: public AllStatic { +private: + static void clone(oopDesc* src, oopDesc* dst, size_t size); public: using CardValue = G1CardTable::CardValue; @@ -46,6 +48,8 @@ class G1BarrierSetRuntime: public AllStatic { // C2 slow-path runtime calls. static void write_ref_field_pre_entry(oopDesc* orig, JavaThread *thread); static void write_ref_field_post_entry(volatile CardValue* card_addr, JavaThread* thread); + + static address clone_addr(); }; #endif // SHARE_GC_G1_G1BARRIERSETRUNTIME_HPP diff --git a/src/hotspot/share/gc/g1/g1CollectionSet.cpp b/src/hotspot/share/gc/g1/g1CollectionSet.cpp index d315497268f99..ec90fd377503d 100644 --- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp +++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp @@ -26,7 +26,7 @@ #include "gc/g1/g1Analytics.hpp" #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/g1/g1CollectionSet.hpp" -#include "gc/g1/g1CollectionSetCandidates.hpp" +#include "gc/g1/g1CollectionSetCandidates.inline.hpp" #include "gc/g1/g1CollectorState.hpp" #include "gc/g1/g1HeapRegion.inline.hpp" #include "gc/g1/g1HeapRegionRemSet.inline.hpp" @@ -346,20 +346,16 @@ void G1CollectionSet::finalize_old_part(double time_remaining_ms) { G1CollectionCandidateRegionList pinned_retained_regions; if (collector_state()->in_mixed_phase()) { - time_remaining_ms = _policy->select_candidates_from_marking(&candidates()->marking_regions(), - time_remaining_ms, - &initial_old_regions, - &_optional_old_regions, - &pinned_marking_regions); + time_remaining_ms = select_candidates_from_marking(time_remaining_ms, + &initial_old_regions, + &pinned_marking_regions); } else { log_debug(gc, ergo, cset)("Do not add marking candidates to collection set due to pause type."); } - _policy->select_candidates_from_retained(&candidates()->retained_regions(), - time_remaining_ms, - &initial_old_regions, - &_optional_old_regions, - &pinned_retained_regions); + select_candidates_from_retained(time_remaining_ms, + &initial_old_regions, + &pinned_retained_regions); // Move initially selected old regions to collection set directly. move_candidates_to_collection_set(&initial_old_regions); @@ -394,6 +390,215 @@ void G1CollectionSet::move_candidates_to_collection_set(G1CollectionCandidateReg candidates()->remove(regions); } +static void print_finish_message(const char* reason, bool from_marking) { + log_debug(gc, ergo, cset)("Finish adding %s candidates to collection set (%s).", + from_marking ? "marking" : "retained", reason); +} + +double G1CollectionSet::select_candidates_from_marking(double time_remaining_ms, + G1CollectionCandidateRegionList* initial_old_regions, + G1CollectionCandidateRegionList* pinned_old_regions) { + uint num_expensive_regions = 0; + + uint num_initial_regions_selected = 0; + uint num_optional_regions_selected = 0; + uint num_pinned_regions = 0; + + double predicted_initial_time_ms = 0.0; + double predicted_optional_time_ms = 0.0; + + double optional_threshold_ms = time_remaining_ms * _policy->optional_prediction_fraction(); + + const uint min_old_cset_length = _policy->calc_min_old_cset_length(candidates()->last_marking_candidates_length()); + const uint max_old_cset_length = MAX2(min_old_cset_length, _policy->calc_max_old_cset_length()); + const uint max_optional_regions = max_old_cset_length - min_old_cset_length; + bool check_time_remaining = _policy->use_adaptive_young_list_length(); + + G1CollectionCandidateList* marking_list = &candidates()->marking_regions(); + assert(marking_list != nullptr, "must be"); + + log_debug(gc, ergo, cset)("Start adding marking candidates to collection set. " + "Min %u regions, max %u regions, available %u regions" + "time remaining %1.2fms, optional threshold %1.2fms", + min_old_cset_length, max_old_cset_length, marking_list->length(), time_remaining_ms, optional_threshold_ms); + + G1CollectionCandidateListIterator iter = marking_list->begin(); + for (; iter != marking_list->end(); ++iter) { + if (num_initial_regions_selected + num_optional_regions_selected >= max_old_cset_length) { + // Added maximum number of old regions to the CSet. + print_finish_message("Maximum number of regions reached", true); + break; + } + G1HeapRegion* hr = (*iter)->_r; + // Skip evacuating pinned marking regions because we are not getting any free + // space from them (and we expect to get free space from marking candidates). + // Also prepare to move them to retained regions to be evacuated optionally later + // to not impact the mixed phase too much. + if (hr->has_pinned_objects()) { + num_pinned_regions++; + (*iter)->update_num_unreclaimed(); + log_trace(gc, ergo, cset)("Marking candidate %u can not be reclaimed currently. Skipping.", hr->hrm_index()); + pinned_old_regions->append(hr); + continue; + } + double predicted_time_ms = _policy->predict_region_total_time_ms(hr, false); + time_remaining_ms = MAX2(time_remaining_ms - predicted_time_ms, 0.0); + // Add regions to old set until we reach the minimum amount + if (initial_old_regions->length() < min_old_cset_length) { + initial_old_regions->append(hr); + num_initial_regions_selected++; + predicted_initial_time_ms += predicted_time_ms; + // Record the number of regions added with no time remaining + if (time_remaining_ms == 0.0) { + num_expensive_regions++; + } + } else if (!check_time_remaining) { + // In the non-auto-tuning case, we'll finish adding regions + // to the CSet if we reach the minimum. + print_finish_message("Region amount reached min", true); + break; + } else { + // Keep adding regions to old set until we reach the optional threshold + if (time_remaining_ms > optional_threshold_ms) { + predicted_initial_time_ms += predicted_time_ms; + initial_old_regions->append(hr); + num_initial_regions_selected++; + } else if (time_remaining_ms > 0) { + // Keep adding optional regions until time is up. + assert(_optional_old_regions.length() < max_optional_regions, "Should not be possible."); + predicted_optional_time_ms += predicted_time_ms; + _optional_old_regions.append(hr); + num_optional_regions_selected++; + } else { + print_finish_message("Predicted time too high", true); + break; + } + } + } + if (iter == marking_list->end()) { + log_debug(gc, ergo, cset)("Marking candidates exhausted."); + } + + if (num_expensive_regions > 0) { + log_debug(gc, ergo, cset)("Added %u marking candidates to collection set although the predicted time was too high.", + num_expensive_regions); + } + + log_debug(gc, ergo, cset)("Finish adding marking candidates to collection set. Initial: %u, optional: %u, pinned: %u, " + "predicted initial time: %1.2fms, predicted optional time: %1.2fms, time remaining: %1.2fms", + num_initial_regions_selected, num_optional_regions_selected, num_pinned_regions, + predicted_initial_time_ms, predicted_optional_time_ms, time_remaining_ms); + + assert(initial_old_regions->length() == num_initial_regions_selected, "must be"); + assert(_optional_old_regions.length() == num_optional_regions_selected, "must be"); + return time_remaining_ms; +} + +void G1CollectionSet::select_candidates_from_retained(double time_remaining_ms, + G1CollectionCandidateRegionList* initial_old_regions, + G1CollectionCandidateRegionList* pinned_old_regions) { + uint num_initial_regions_selected = 0; + uint num_optional_regions_selected = 0; + uint num_expensive_regions_selected = 0; + uint num_pinned_regions = 0; + + double predicted_initial_time_ms = 0.0; + double predicted_optional_time_ms = 0.0; + + uint const min_regions = _policy->min_retained_old_cset_length(); + // We want to make sure that on the one hand we process the retained regions asap, + // but on the other hand do not take too many of them as optional regions. + // So we split the time budget into budget we will unconditionally take into the + // initial old regions, and budget for taking optional regions from the retained + // list. + double optional_time_remaining_ms = _policy->max_time_for_retaining(); + time_remaining_ms = MIN2(time_remaining_ms, optional_time_remaining_ms); + + G1CollectionCandidateList* retained_list = &candidates()->retained_regions(); + + log_debug(gc, ergo, cset)("Start adding retained candidates to collection set. " + "Min %u regions, available %u, " + "time remaining %1.2fms, optional remaining %1.2fms", + min_regions, retained_list->length(), time_remaining_ms, optional_time_remaining_ms); + + for (G1CollectionSetCandidateInfo* ci : *retained_list) { + G1HeapRegion* r = ci->_r; + double predicted_time_ms = _policy->predict_region_total_time_ms(r, collector_state()->in_young_only_phase()); + bool fits_in_remaining_time = predicted_time_ms <= time_remaining_ms; + // If we can't reclaim that region ignore it for now. + if (r->has_pinned_objects()) { + num_pinned_regions++; + if (ci->update_num_unreclaimed()) { + log_trace(gc, ergo, cset)("Retained candidate %u can not be reclaimed currently. Skipping.", r->hrm_index()); + } else { + log_trace(gc, ergo, cset)("Retained candidate %u can not be reclaimed currently. Dropping.", r->hrm_index()); + pinned_old_regions->append(r); + } + continue; + } + + if (fits_in_remaining_time || (num_expensive_regions_selected < min_regions)) { + predicted_initial_time_ms += predicted_time_ms; + if (!fits_in_remaining_time) { + num_expensive_regions_selected++; + } + initial_old_regions->append(r); + num_initial_regions_selected++; + } else if (predicted_time_ms <= optional_time_remaining_ms) { + predicted_optional_time_ms += predicted_time_ms; + _optional_old_regions.append(r); + num_optional_regions_selected++; + } else { + // Fits neither initial nor optional time limit. Exit. + break; + } + time_remaining_ms = MAX2(0.0, time_remaining_ms - predicted_time_ms); + optional_time_remaining_ms = MAX2(0.0, optional_time_remaining_ms - predicted_time_ms); + } + + uint num_regions_selected = num_initial_regions_selected + num_optional_regions_selected; + if (num_regions_selected == retained_list->length()) { + log_debug(gc, ergo, cset)("Retained candidates exhausted."); + } + if (num_expensive_regions_selected > 0) { + log_debug(gc, ergo, cset)("Added %u retained candidates to collection set although the predicted time was too high.", + num_expensive_regions_selected); + } + + log_debug(gc, ergo, cset)("Finish adding retained candidates to collection set. Initial: %u, optional: %u, pinned: %u, " + "predicted initial time: %1.2fms, predicted optional time: %1.2fms, " + "time remaining: %1.2fms optional time remaining %1.2fms", + num_initial_regions_selected, num_optional_regions_selected, num_pinned_regions, + predicted_initial_time_ms, predicted_optional_time_ms, time_remaining_ms, optional_time_remaining_ms); +} + +void G1CollectionSet::select_candidates_from_optional_regions(double time_remaining_ms, + G1CollectionCandidateRegionList* selected_regions) { + assert(optional_region_length() > 0, + "Should only be called when there are optional regions"); + + double total_prediction_ms = 0.0; + + for (G1HeapRegion* r : _optional_old_regions) { + double prediction_ms = _policy->predict_region_total_time_ms(r, false); + + if (prediction_ms > time_remaining_ms) { + log_debug(gc, ergo, cset)("Prediction %.3fms for region %u does not fit remaining time: %.3fms.", + prediction_ms, r->hrm_index(), time_remaining_ms); + break; + } + // This region will be included in the next optional evacuation. + + total_prediction_ms += prediction_ms; + time_remaining_ms -= prediction_ms; + + selected_regions->append(r); + } + + log_debug(gc, ergo, cset)("Prepared %u regions out of %u for optional evacuation. Total predicted time: %.3fms", + selected_regions->length(), _optional_old_regions.length(), total_prediction_ms); +} + void G1CollectionSet::prepare_optional_regions(G1CollectionCandidateRegionList* regions){ uint cur_index = 0; for (G1HeapRegion* r : *regions) { @@ -441,9 +646,8 @@ bool G1CollectionSet::finalize_optional_for_evacuation(double remaining_pause_ti update_incremental_marker(); G1CollectionCandidateRegionList selected_regions; - _policy->calculate_optional_collection_set_regions(&_optional_old_regions, - remaining_pause_time, - &selected_regions); + select_candidates_from_optional_regions(remaining_pause_time, + &selected_regions); move_candidates_to_collection_set(&selected_regions); diff --git a/src/hotspot/share/gc/g1/g1CollectionSet.hpp b/src/hotspot/share/gc/g1/g1CollectionSet.hpp index e569d3ee966c3..5280ba7d0fd6c 100644 --- a/src/hotspot/share/gc/g1/g1CollectionSet.hpp +++ b/src/hotspot/share/gc/g1/g1CollectionSet.hpp @@ -196,6 +196,22 @@ class G1CollectionSet { // and retained collection set candidates. void finalize_old_part(double time_remaining_ms); + // Calculate and fill in the initial, optional and pinned old gen candidate regions from + // the given candidate list and the remaining time. + // Returns the remaining time. + double select_candidates_from_marking(double time_remaining_ms, + G1CollectionCandidateRegionList* initial_old_regions, + G1CollectionCandidateRegionList* pinned_old_regions); + + void select_candidates_from_retained(double time_remaining_ms, + G1CollectionCandidateRegionList* initial_old_regions, + G1CollectionCandidateRegionList* pinned_old_regions); + + // Calculate the number of optional regions from the given collection set candidates, + // the remaining time and the maximum number of these regions. + void select_candidates_from_optional_regions(double time_remaining_ms, + G1CollectionCandidateRegionList* selected); + // Iterate the part of the collection set given by the offset and length applying the given // G1HeapRegionClosure. The worker_id will determine where in the part to start the iteration // to allow for more efficient parallel iteration. diff --git a/src/hotspot/share/gc/g1/g1Policy.cpp b/src/hotspot/share/gc/g1/g1Policy.cpp index e7e57c962c734..6d0864f032c86 100644 --- a/src/hotspot/share/gc/g1/g1Policy.cpp +++ b/src/hotspot/share/gc/g1/g1Policy.cpp @@ -1467,219 +1467,6 @@ uint G1Policy::calc_max_old_cset_length() const { return (uint)ceil(result); } -static void print_finish_message(const char* reason, bool from_marking) { - log_debug(gc, ergo, cset)("Finish adding %s candidates to collection set (%s).", - from_marking ? "marking" : "retained", reason); -} - -double G1Policy::select_candidates_from_marking(G1CollectionCandidateList* marking_list, - double time_remaining_ms, - G1CollectionCandidateRegionList* initial_old_regions, - G1CollectionCandidateRegionList* optional_old_regions, - G1CollectionCandidateRegionList* pinned_old_regions) { - assert(marking_list != nullptr, "must be"); - - uint num_expensive_regions = 0; - - uint num_initial_regions_selected = 0; - uint num_optional_regions_selected = 0; - uint num_pinned_regions = 0; - - double predicted_initial_time_ms = 0.0; - double predicted_optional_time_ms = 0.0; - - double optional_threshold_ms = time_remaining_ms * optional_prediction_fraction(); - - const uint min_old_cset_length = calc_min_old_cset_length(candidates()->last_marking_candidates_length()); - const uint max_old_cset_length = MAX2(min_old_cset_length, calc_max_old_cset_length()); - const uint max_optional_regions = max_old_cset_length - min_old_cset_length; - bool check_time_remaining = use_adaptive_young_list_length(); - - log_debug(gc, ergo, cset)("Start adding marking candidates to collection set. " - "Min %u regions, max %u regions, available %u regions" - "time remaining %1.2fms, optional threshold %1.2fms", - min_old_cset_length, max_old_cset_length, marking_list->length(), time_remaining_ms, optional_threshold_ms); - - G1CollectionCandidateListIterator iter = marking_list->begin(); - for (; iter != marking_list->end(); ++iter) { - if (num_initial_regions_selected + num_optional_regions_selected >= max_old_cset_length) { - // Added maximum number of old regions to the CSet. - print_finish_message("Maximum number of regions reached", true); - break; - } - G1HeapRegion* hr = (*iter)->_r; - // Skip evacuating pinned marking regions because we are not getting any free - // space from them (and we expect to get free space from marking candidates). - // Also prepare to move them to retained regions to be evacuated optionally later - // to not impact the mixed phase too much. - if (hr->has_pinned_objects()) { - num_pinned_regions++; - (*iter)->update_num_unreclaimed(); - log_trace(gc, ergo, cset)("Marking candidate %u can not be reclaimed currently. Skipping.", hr->hrm_index()); - pinned_old_regions->append(hr); - continue; - } - double predicted_time_ms = predict_region_total_time_ms(hr, false); - time_remaining_ms = MAX2(time_remaining_ms - predicted_time_ms, 0.0); - // Add regions to old set until we reach the minimum amount - if (initial_old_regions->length() < min_old_cset_length) { - initial_old_regions->append(hr); - num_initial_regions_selected++; - predicted_initial_time_ms += predicted_time_ms; - // Record the number of regions added with no time remaining - if (time_remaining_ms == 0.0) { - num_expensive_regions++; - } - } else if (!check_time_remaining) { - // In the non-auto-tuning case, we'll finish adding regions - // to the CSet if we reach the minimum. - print_finish_message("Region amount reached min", true); - break; - } else { - // Keep adding regions to old set until we reach the optional threshold - if (time_remaining_ms > optional_threshold_ms) { - predicted_initial_time_ms += predicted_time_ms; - initial_old_regions->append(hr); - num_initial_regions_selected++; - } else if (time_remaining_ms > 0) { - // Keep adding optional regions until time is up. - assert(optional_old_regions->length() < max_optional_regions, "Should not be possible."); - predicted_optional_time_ms += predicted_time_ms; - optional_old_regions->append(hr); - num_optional_regions_selected++; - } else { - print_finish_message("Predicted time too high", true); - break; - } - } - } - if (iter == marking_list->end()) { - log_debug(gc, ergo, cset)("Marking candidates exhausted."); - } - - if (num_expensive_regions > 0) { - log_debug(gc, ergo, cset)("Added %u marking candidates to collection set although the predicted time was too high.", - num_expensive_regions); - } - - log_debug(gc, ergo, cset)("Finish adding marking candidates to collection set. Initial: %u, optional: %u, pinned: %u, " - "predicted initial time: %1.2fms, predicted optional time: %1.2fms, time remaining: %1.2fms", - num_initial_regions_selected, num_optional_regions_selected, num_pinned_regions, - predicted_initial_time_ms, predicted_optional_time_ms, time_remaining_ms); - - assert(initial_old_regions->length() == num_initial_regions_selected, "must be"); - assert(optional_old_regions->length() == num_optional_regions_selected, "must be"); - return time_remaining_ms; -} - -void G1Policy::select_candidates_from_retained(G1CollectionCandidateList* retained_list, - double time_remaining_ms, - G1CollectionCandidateRegionList* initial_old_regions, - G1CollectionCandidateRegionList* optional_old_regions, - G1CollectionCandidateRegionList* pinned_old_regions) { - - uint const min_regions = min_retained_old_cset_length(); - - uint num_initial_regions_selected = 0; - uint num_optional_regions_selected = 0; - uint num_expensive_regions_selected = 0; - uint num_pinned_regions = 0; - - double predicted_initial_time_ms = 0.0; - double predicted_optional_time_ms = 0.0; - - // We want to make sure that on the one hand we process the retained regions asap, - // but on the other hand do not take too many of them as optional regions. - // So we split the time budget into budget we will unconditionally take into the - // initial old regions, and budget for taking optional regions from the retained - // list. - double optional_time_remaining_ms = max_time_for_retaining(); - time_remaining_ms = MIN2(time_remaining_ms, optional_time_remaining_ms); - - log_debug(gc, ergo, cset)("Start adding retained candidates to collection set. " - "Min %u regions, available %u, " - "time remaining %1.2fms, optional remaining %1.2fms", - min_regions, retained_list->length(), time_remaining_ms, optional_time_remaining_ms); - - for (G1CollectionSetCandidateInfo* ci : *retained_list) { - G1HeapRegion* r = ci->_r; - double predicted_time_ms = predict_region_total_time_ms(r, collector_state()->in_young_only_phase()); - bool fits_in_remaining_time = predicted_time_ms <= time_remaining_ms; - // If we can't reclaim that region ignore it for now. - if (r->has_pinned_objects()) { - num_pinned_regions++; - if (ci->update_num_unreclaimed()) { - log_trace(gc, ergo, cset)("Retained candidate %u can not be reclaimed currently. Skipping.", r->hrm_index()); - } else { - log_trace(gc, ergo, cset)("Retained candidate %u can not be reclaimed currently. Dropping.", r->hrm_index()); - pinned_old_regions->append(r); - } - continue; - } - - if (fits_in_remaining_time || (num_expensive_regions_selected < min_regions)) { - predicted_initial_time_ms += predicted_time_ms; - if (!fits_in_remaining_time) { - num_expensive_regions_selected++; - } - initial_old_regions->append(r); - num_initial_regions_selected++; - } else if (predicted_time_ms <= optional_time_remaining_ms) { - predicted_optional_time_ms += predicted_time_ms; - optional_old_regions->append(r); - num_optional_regions_selected++; - } else { - // Fits neither initial nor optional time limit. Exit. - break; - } - time_remaining_ms = MAX2(0.0, time_remaining_ms - predicted_time_ms); - optional_time_remaining_ms = MAX2(0.0, optional_time_remaining_ms - predicted_time_ms); - } - - uint num_regions_selected = num_initial_regions_selected + num_optional_regions_selected; - if (num_regions_selected == retained_list->length()) { - log_debug(gc, ergo, cset)("Retained candidates exhausted."); - } - if (num_expensive_regions_selected > 0) { - log_debug(gc, ergo, cset)("Added %u retained candidates to collection set although the predicted time was too high.", - num_expensive_regions_selected); - } - - log_debug(gc, ergo, cset)("Finish adding retained candidates to collection set. Initial: %u, optional: %u, pinned: %u, " - "predicted initial time: %1.2fms, predicted optional time: %1.2fms, " - "time remaining: %1.2fms optional time remaining %1.2fms", - num_initial_regions_selected, num_optional_regions_selected, num_pinned_regions, - predicted_initial_time_ms, predicted_optional_time_ms, time_remaining_ms, optional_time_remaining_ms); -} - -void G1Policy::calculate_optional_collection_set_regions(G1CollectionCandidateRegionList* optional_regions, - double time_remaining_ms, - G1CollectionCandidateRegionList* selected_regions) { - assert(_collection_set->optional_region_length() > 0, - "Should only be called when there are optional regions"); - - double total_prediction_ms = 0.0; - - for (G1HeapRegion* r : *optional_regions) { - double prediction_ms = predict_region_total_time_ms(r, false); - - if (prediction_ms > time_remaining_ms) { - log_debug(gc, ergo, cset)("Prediction %.3fms for region %u does not fit remaining time: %.3fms.", - prediction_ms, r->hrm_index(), time_remaining_ms); - break; - } - // This region will be included in the next optional evacuation. - - total_prediction_ms += prediction_ms; - time_remaining_ms -= prediction_ms; - - selected_regions->append(r); - } - - log_debug(gc, ergo, cset)("Prepared %u regions out of %u for optional evacuation. Total predicted time: %.3fms", - selected_regions->length(), optional_regions->length(), total_prediction_ms); -} - void G1Policy::transfer_survivors_to_cset(const G1SurvivorRegions* survivors) { start_adding_survivor_regions(); diff --git a/src/hotspot/share/gc/g1/g1Policy.hpp b/src/hotspot/share/gc/g1/g1Policy.hpp index 98d444084678c..9a6ffb570be70 100644 --- a/src/hotspot/share/gc/g1/g1Policy.hpp +++ b/src/hotspot/share/gc/g1/g1Policy.hpp @@ -335,27 +335,7 @@ class G1Policy: public CHeapObj { // Amount of allowed waste in bytes in the collection set. size_t allowed_waste_in_collection_set() const; - // Calculate and fill in the initial, optional and pinned old gen candidate regions from - // the given candidate list and the remaining time. - // Returns the remaining time. - double select_candidates_from_marking(G1CollectionCandidateList* marking_list, - double time_remaining_ms, - G1CollectionCandidateRegionList* initial_old_regions, - G1CollectionCandidateRegionList* optional_old_regions, - G1CollectionCandidateRegionList* pinned_old_regions); - - void select_candidates_from_retained(G1CollectionCandidateList* retained_list, - double time_remaining_ms, - G1CollectionCandidateRegionList* initial_old_regions, - G1CollectionCandidateRegionList* optional_old_regions, - G1CollectionCandidateRegionList* pinned_old_regions); - - // Calculate the number of optional regions from the given collection set candidates, - // the remaining time and the maximum number of these regions and return the number - // of actually selected regions in num_optional_regions. - void calculate_optional_collection_set_regions(G1CollectionCandidateRegionList* optional_old_regions, - double time_remaining_ms, - G1CollectionCandidateRegionList* selected); + private: @@ -423,12 +403,12 @@ class G1Policy: public CHeapObj { size_t desired_survivor_size(uint max_regions) const; +public: // Fraction used when predicting how many optional regions to include in // the CSet. This fraction of the available time is used for optional regions, // the rest is used to add old regions to the normal CSet. double optional_prediction_fraction() const { return 0.2; } -public: // Fraction used when evacuating the optional regions. This fraction of the // remaining time is used to choose what regions to include in the evacuation. double optional_evacuation_fraction() const { return 0.75; } diff --git a/src/hotspot/share/gc/g1/g1RemSet.cpp b/src/hotspot/share/gc/g1/g1RemSet.cpp index f5f65cf1c48aa..bb5ac5036fe47 100644 --- a/src/hotspot/share/gc/g1/g1RemSet.cpp +++ b/src/hotspot/share/gc/g1/g1RemSet.cpp @@ -967,6 +967,10 @@ class G1MergeHeapRootsTask : public WorkerTask { _merged[G1GCPhaseTimes::MergeRSCards] += increment; } + void dec_remset_cards(size_t decrement) { + _merged[G1GCPhaseTimes::MergeRSCards] -= decrement; + } + size_t merged(uint i) const { return _merged[i]; } }; @@ -1091,6 +1095,11 @@ class G1MergeHeapRootsTask : public WorkerTask { G1MergeCardSetStats stats() { _merge_card_set_cache.flush(); + // Compensation for the dummy cards that were initially pushed into the + // card cache. + // We do not need to compensate for the other counters because the dummy + // card mark will never update another counter because it is initally "dirty". + _stats.dec_remset_cards(G1MergeCardSetCache::CacheSize); return _stats; } }; diff --git a/src/hotspot/share/gc/parallel/psScavenge.hpp b/src/hotspot/share/gc/parallel/psScavenge.hpp index 99d0487760b15..55abdfd3cf38e 100644 --- a/src/hotspot/share/gc/parallel/psScavenge.hpp +++ b/src/hotspot/share/gc/parallel/psScavenge.hpp @@ -34,9 +34,7 @@ #include "oops/oop.hpp" #include "utilities/stack.hpp" -class ReferenceProcessor; class ParallelScavengeHeap; -class ParallelScavengeTracer; class PSIsAliveClosure; class STWGCTimer; diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp index 59e0245204441..643a7936b9b17 100644 --- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp @@ -109,6 +109,10 @@ Label* BarrierStubC2::continuation() { return &_continuation; } +uint8_t BarrierStubC2::barrier_data() const { + return _node->barrier_data(); +} + void BarrierStubC2::preserve(Register r) { const VMReg vm_reg = r->as_VMReg(); assert(vm_reg->is_Register(), "r must be a general-purpose register"); diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp index c1485c069c83c..00fbf1f2c9f8b 100644 --- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp @@ -254,6 +254,8 @@ class BarrierStubC2 : public ArenaObj { Label* entry(); // Return point from the stub (typically end of barrier). Label* continuation(); + // High-level, GC-specific barrier flags. + uint8_t barrier_data() const; // Preserve the value in reg across runtime calls in this barrier. void preserve(Register reg); @@ -340,6 +342,8 @@ class BarrierSetC2: public CHeapObj { // Estimated size of the node barrier in number of C2 Ideal nodes. // This is used to guide heuristics in C2, e.g. whether to unroll a loop. virtual uint estimated_barrier_size(const Node* node) const { return 0; } + // Whether the given store can be used to initialize a newly allocated object. + virtual bool can_initialize_object(const StoreNode* store) const { return true; } enum CompilePhase { BeforeOptimize, diff --git a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp index 87bb9f3cd5170..11b742156a831 100644 --- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp @@ -125,39 +125,10 @@ void CardTableBarrierSetC2::post_barrier(GraphKit* kit, kit->final_sync(ideal); } -void CardTableBarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const { - BarrierSetC2::clone(kit, src, dst, size, is_array); - const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; - - // If necessary, emit some card marks afterwards. (Non-arrays only.) - bool card_mark = !is_array && !use_ReduceInitialCardMarks(); - if (card_mark) { - assert(!is_array, ""); - // Put in store barrier for any and all oops we are sticking - // into this object. (We could avoid this if we could prove - // that the object type contains no oop fields at all.) - Node* no_particular_value = nullptr; - Node* no_particular_field = nullptr; - int raw_adr_idx = Compile::AliasIdxRaw; - post_barrier(kit, kit->control(), - kit->memory(raw_adr_type), - dst, - no_particular_field, - raw_adr_idx, - no_particular_value, - T_OBJECT, - false); - } -} - bool CardTableBarrierSetC2::use_ReduceInitialCardMarks() const { return ReduceInitialCardMarks; } -bool CardTableBarrierSetC2::is_gc_barrier_node(Node* node) const { - return ModRefBarrierSetC2::is_gc_barrier_node(node) || node->Opcode() == Op_StoreCM; -} - void CardTableBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required"); Node *shift = node->unique_out(); diff --git a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp index 9512f09ff8a6d..3bbf14892d3ef 100644 --- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp @@ -42,8 +42,6 @@ class CardTableBarrierSetC2: public ModRefBarrierSetC2 { Node* byte_map_base_node(GraphKit* kit) const; public: - virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const; - virtual bool is_gc_barrier_node(Node* node) const; virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const; virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, bool is_clone_instance, ArrayCopyPhase phase) const; diff --git a/src/hotspot/share/gc/shared/oopStorage.cpp b/src/hotspot/share/gc/shared/oopStorage.cpp index 568888ac7d97e..2373d6b1d93a8 100644 --- a/src/hotspot/share/gc/shared/oopStorage.cpp +++ b/src/hotspot/share/gc/shared/oopStorage.cpp @@ -300,7 +300,12 @@ void OopStorage::Block::set_active_index(size_t index) { size_t OopStorage::Block::active_index_safe(const Block* block) { STATIC_ASSERT(sizeof(intptr_t) == sizeof(block->_active_index)); - return SafeFetchN((intptr_t*)&block->_active_index, 0); + // Be careful, because block could be a false positive from block_for_ptr. + assert(block != nullptr, "precondition"); + uintptr_t block_addr = reinterpret_cast(block); + uintptr_t index_loc = block_addr + offset_of(Block, _active_index); + static_assert(sizeof(size_t) == sizeof(intptr_t), "assumption"); + return static_cast(SafeFetchN(reinterpret_cast(index_loc), 0)); } unsigned OopStorage::Block::get_index(const oop* ptr) const { @@ -366,21 +371,23 @@ void OopStorage::Block::delete_block(const Block& block) { OopStorage::Block* OopStorage::Block::block_for_ptr(const OopStorage* owner, const oop* ptr) { STATIC_ASSERT(_data_pos == 0); - // Const-ness of ptr is not related to const-ness of containing block. + assert(ptr != nullptr, "precondition"); // Blocks are allocated section-aligned, so get the containing section. - oop* section_start = align_down(const_cast(ptr), block_alignment); + uintptr_t section_start = align_down(reinterpret_cast(ptr), block_alignment); // Start with a guess that the containing section is the last section, // so the block starts section_count-1 sections earlier. - oop* section = section_start - (section_size * (section_count - 1)); + size_t section_size_in_bytes = sizeof(oop) * section_size; + uintptr_t section = section_start - (section_size_in_bytes * (section_count - 1)); // Walk up through the potential block start positions, looking for // the owner in the expected location. If we're below the actual block // start position, the value at the owner position will be some oop // (possibly null), which can never match the owner. intptr_t owner_addr = reinterpret_cast(owner); - for (unsigned i = 0; i < section_count; ++i, section += section_size) { - Block* candidate = reinterpret_cast(section); - if (SafeFetchN(&candidate->_owner_address, 0) == owner_addr) { - return candidate; + for (unsigned i = 0; i < section_count; ++i, section += section_size_in_bytes) { + uintptr_t owner_loc = section + offset_of(Block, _owner_address); + static_assert(sizeof(OopStorage*) == sizeof(intptr_t), "assumption"); + if (SafeFetchN(reinterpret_cast(owner_loc), 0) == owner_addr) { + return reinterpret_cast(section); } } return nullptr; @@ -643,8 +650,7 @@ class OopStorage::WithActiveArray : public StackObj { } }; -OopStorage::Block* OopStorage::find_block_or_null(const oop* ptr) const { - assert(ptr != nullptr, "precondition"); +OopStorage::Block* OopStorage::block_for_ptr(const oop* ptr) const { return Block::block_for_ptr(this, ptr); } @@ -771,7 +777,7 @@ static inline void check_release_entry(const oop* entry) { void OopStorage::release(const oop* ptr) { check_release_entry(ptr); - Block* block = find_block_or_null(ptr); + Block* block = block_for_ptr(ptr); assert(block != nullptr, "%s: invalid release " PTR_FORMAT, name(), p2i(ptr)); log_trace(oopstorage, ref)("%s: releasing " PTR_FORMAT, name(), p2i(ptr)); block->release_entries(block->bitmask_for_entry(ptr), this); @@ -782,7 +788,7 @@ void OopStorage::release(const oop* const* ptrs, size_t size) { size_t i = 0; while (i < size) { check_release_entry(ptrs[i]); - Block* block = find_block_or_null(ptrs[i]); + Block* block = block_for_ptr(ptrs[i]); assert(block != nullptr, "%s: invalid release " PTR_FORMAT, name(), p2i(ptrs[i])); size_t count = 0; uintx releasing = 0; @@ -989,7 +995,8 @@ bool OopStorage::delete_empty_blocks() { } OopStorage::EntryStatus OopStorage::allocation_status(const oop* ptr) const { - const Block* block = find_block_or_null(ptr); + if (ptr == nullptr) return INVALID_ENTRY; + const Block* block = block_for_ptr(ptr); if (block != nullptr) { // Prevent block deletion and _active_array modification. MutexLocker ml(_allocation_mutex, Mutex::_no_safepoint_check_flag); @@ -1137,7 +1144,7 @@ const char* OopStorage::name() const { return _name; } bool OopStorage::print_containing(const oop* addr, outputStream* st) { if (addr != nullptr) { - Block* block = find_block_or_null(addr); + Block* block = block_for_ptr(addr); if (block != nullptr && block->print_containing(addr, st)) { st->print(" in oop storage \"%s\"", name()); return true; diff --git a/src/hotspot/share/gc/shared/oopStorage.hpp b/src/hotspot/share/gc/shared/oopStorage.hpp index 96cc5a23d6a91..34c980a058659 100644 --- a/src/hotspot/share/gc/shared/oopStorage.hpp +++ b/src/hotspot/share/gc/shared/oopStorage.hpp @@ -288,7 +288,7 @@ class OopStorage : public CHeapObjBase { Block* block_for_allocation(); void log_block_transition(Block* block, const char* new_state) const; - Block* find_block_or_null(const oop* ptr) const; + Block* block_for_ptr(const oop* ptr) const; void delete_empty_block(const Block& block); bool reduce_deferred_updates(); void record_needs_cleanup(); diff --git a/src/hotspot/share/gc/shared/oopStorage.inline.hpp b/src/hotspot/share/gc/shared/oopStorage.inline.hpp index 545da0be0a76e..da0926a20b6e2 100644 --- a/src/hotspot/share/gc/shared/oopStorage.inline.hpp +++ b/src/hotspot/share/gc/shared/oopStorage.inline.hpp @@ -184,7 +184,10 @@ class OopStorage::Block /* No base class, to avoid messing up alignment. */ { void set_active_index(size_t index); static size_t active_index_safe(const Block* block); // Returns 0 if access fails. - // Returns null if ptr is not in a block or not allocated in that block. + // Return block of owner containing ptr, if ptr is a valid entry of owner. + // If ptr is not a valid entry of owner then returns either null or a "false + // positive" pointer; see allocation_status. + // precondition: ptr != nullptr static Block* block_for_ptr(const OopStorage* owner, const oop* ptr); oop* allocate(); diff --git a/src/hotspot/share/gc/shared/oopStorageSet.cpp b/src/hotspot/share/gc/shared/oopStorageSet.cpp index c6947590d96fb..e3a9fccbad3dc 100644 --- a/src/hotspot/share/gc/shared/oopStorageSet.cpp +++ b/src/hotspot/share/gc/shared/oopStorageSet.cpp @@ -86,7 +86,9 @@ bool OopStorageSet::print_containing(const void* addr, outputStream* st) { if (addr != nullptr) { const void* aligned_addr = align_down(addr, alignof(oop)); for (OopStorage* storage : Range()) { - if (storage->print_containing((oop*) aligned_addr, st)) { + // Check for null for extra safety: might get here while handling error + // before storage initialization. + if ((storage != nullptr) && storage->print_containing((oop*) aligned_addr, st)) { if (aligned_addr != addr) { st->print_cr(" (unaligned)"); } else { diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp index 368d76696058c..7ac9dcc2e8134 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp @@ -249,8 +249,9 @@ void ShenandoahBarrierSetC2::satb_write_barrier_pre(GraphKit* kit, } __ else_(); { // logging buffer is full, call the runtime - const TypeFunc *tf = ShenandoahBarrierSetC2::write_ref_field_pre_entry_Type(); - __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), "shenandoah_wb_pre", pre_val, tls); + const TypeFunc *tf = ShenandoahBarrierSetC2::write_ref_field_pre_Type(); + __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), "shenandoah_wb_pre", + pre_val, tls); } __ end_if(); // (!index) } __ end_if(); // (pre_val != nullptr) } __ end_if(); // (!marking) @@ -268,7 +269,12 @@ void ShenandoahBarrierSetC2::satb_write_barrier_pre(GraphKit* kit, bool ShenandoahBarrierSetC2::is_shenandoah_wb_pre_call(Node* call) { return call->is_CallLeaf() && - call->as_CallLeaf()->entry_point() == CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry); + call->as_CallLeaf()->entry_point() == CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre); +} + +bool ShenandoahBarrierSetC2::is_shenandoah_clone_call(Node* call) { + return call->is_CallLeaf() && + call->as_CallLeaf()->entry_point() == CAST_FROM_FN_PTR(address, ShenandoahRuntime::clone_barrier); } bool ShenandoahBarrierSetC2::is_shenandoah_lrb_call(Node* call) { @@ -428,7 +434,7 @@ void ShenandoahBarrierSetC2::insert_pre_barrier(GraphKit* kit, Node* base_oop, N #undef __ -const TypeFunc* ShenandoahBarrierSetC2::write_ref_field_pre_entry_Type() { +const TypeFunc* ShenandoahBarrierSetC2::write_ref_field_pre_Type() { const Type **fields = TypeTuple::fields(2); fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread @@ -441,7 +447,7 @@ const TypeFunc* ShenandoahBarrierSetC2::write_ref_field_pre_entry_Type() { return TypeFunc::make(domain, range); } -const TypeFunc* ShenandoahBarrierSetC2::shenandoah_clone_barrier_Type() { +const TypeFunc* ShenandoahBarrierSetC2::clone_barrier_Type() { const Type **fields = TypeTuple::fields(1); fields[TypeFunc::Parms+0] = TypeOopPtr::NOTNULL; // src oop const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields); @@ -453,7 +459,7 @@ const TypeFunc* ShenandoahBarrierSetC2::shenandoah_clone_barrier_Type() { return TypeFunc::make(domain, range); } -const TypeFunc* ShenandoahBarrierSetC2::shenandoah_load_reference_barrier_Type() { +const TypeFunc* ShenandoahBarrierSetC2::load_reference_barrier_Type() { const Type **fields = TypeTuple::fields(2); fields[TypeFunc::Parms+0] = TypeOopPtr::BOTTOM; // original field value fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // original load address @@ -675,20 +681,11 @@ bool ShenandoahBarrierSetC2::is_gc_pre_barrier_node(Node* node) const { return is_shenandoah_wb_pre_call(node); } -// Support for GC barriers emitted during parsing bool ShenandoahBarrierSetC2::is_gc_barrier_node(Node* node) const { - if (node->Opcode() == Op_ShenandoahLoadReferenceBarrier) return true; - if (node->Opcode() != Op_CallLeaf && node->Opcode() != Op_CallLeafNoFP) { - return false; - } - CallLeafNode *call = node->as_CallLeaf(); - if (call->_name == nullptr) { - return false; - } - - return strcmp(call->_name, "shenandoah_clone_barrier") == 0 || - strcmp(call->_name, "shenandoah_cas_obj") == 0 || - strcmp(call->_name, "shenandoah_wb_pre") == 0; + return (node->Opcode() == Op_ShenandoahLoadReferenceBarrier) || + is_shenandoah_lrb_call(node) || + is_shenandoah_wb_pre_call(node) || + is_shenandoah_clone_call(node); } Node* ShenandoahBarrierSetC2::step_over_gc_barrier(Node* c) const { @@ -802,11 +799,11 @@ void ShenandoahBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCo // Heap is unstable, call into clone barrier stub Node* call = phase->make_leaf_call(unstable_ctrl, mem, - ShenandoahBarrierSetC2::shenandoah_clone_barrier_Type(), - CAST_FROM_FN_PTR(address, ShenandoahRuntime::shenandoah_clone_barrier), - "shenandoah_clone", - TypeRawPtr::BOTTOM, - src_base); + ShenandoahBarrierSetC2::clone_barrier_Type(), + CAST_FROM_FN_PTR(address, ShenandoahRuntime::clone_barrier), + "shenandoah_clone", + TypeRawPtr::BOTTOM, + src_base); call = phase->transform_later(call); ctrl = phase->transform_later(new ProjNode(call, TypeFunc::Control)); @@ -981,7 +978,7 @@ void ShenandoahBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase p Node* ShenandoahBarrierSetC2::ideal_node(PhaseGVN* phase, Node* n, bool can_reshape) const { if (is_shenandoah_wb_pre_call(n)) { - uint cnt = ShenandoahBarrierSetC2::write_ref_field_pre_entry_Type()->domain()->cnt(); + uint cnt = ShenandoahBarrierSetC2::write_ref_field_pre_Type()->domain()->cnt(); if (n->req() > cnt) { Node* addp = n->in(cnt); if (has_only_shenandoah_wb_pre_uses(addp)) { @@ -1067,7 +1064,7 @@ bool ShenandoahBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, ui assert (n->is_Call(), ""); CallNode *call = n->as_Call(); if (ShenandoahBarrierSetC2::is_shenandoah_wb_pre_call(call)) { - uint cnt = ShenandoahBarrierSetC2::write_ref_field_pre_entry_Type()->domain()->cnt(); + uint cnt = ShenandoahBarrierSetC2::write_ref_field_pre_Type()->domain()->cnt(); if (call->req() > cnt) { assert(call->req() == cnt + 1, "only one extra input"); Node *addp = call->in(cnt); diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp index 4619b217e96c6..6e241b39ce967 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp @@ -85,6 +85,7 @@ class ShenandoahBarrierSetC2 : public BarrierSetC2 { static ShenandoahBarrierSetC2* bsc2(); static bool is_shenandoah_wb_pre_call(Node* call); + static bool is_shenandoah_clone_call(Node* call); static bool is_shenandoah_lrb_call(Node* call); static bool is_shenandoah_marking_if(PhaseValues* phase, Node* n); static bool is_shenandoah_state_load(Node* n); @@ -92,9 +93,9 @@ class ShenandoahBarrierSetC2 : public BarrierSetC2 { ShenandoahBarrierSetC2State* state() const; - static const TypeFunc* write_ref_field_pre_entry_Type(); - static const TypeFunc* shenandoah_clone_barrier_Type(); - static const TypeFunc* shenandoah_load_reference_barrier_Type(); + static const TypeFunc* write_ref_field_pre_Type(); + static const TypeFunc* clone_barrier_Type(); + static const TypeFunc* load_reference_barrier_Type(); virtual bool has_load_barrier_nodes() const { return true; } // This is the entry-point for the backend to perform accesses through the Access API. diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp index 0a51f74299545..efa0ced603cda 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp @@ -414,7 +414,7 @@ void ShenandoahBarrierC2Support::verify(RootNode* root) { "cipherBlockChaining_decryptAESCrypt", { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad }, { TypeFunc::Parms+3, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, - "shenandoah_clone_barrier", + "shenandoah_clone", { { TypeFunc::Parms, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, "ghash_processBlocks", @@ -995,7 +995,7 @@ void ShenandoahBarrierC2Support::call_lrb_stub(Node*& ctrl, Node*& val, Node* lo name = "load_reference_barrier_phantom"; } } - Node* call = new CallLeafNode(ShenandoahBarrierSetC2::shenandoah_load_reference_barrier_Type(), calladdr, name, TypeRawPtr::BOTTOM); + Node* call = new CallLeafNode(ShenandoahBarrierSetC2::load_reference_barrier_Type(), calladdr, name, TypeRawPtr::BOTTOM); call->init_req(TypeFunc::Control, ctrl); call->init_req(TypeFunc::I_O, phase->C->top()); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp index 6ed75a9d96106..75cdb99e177d1 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp @@ -225,8 +225,7 @@ void ShenandoahConcurrentMark::finish_mark() { assert(Thread::current()->is_VM_thread(), "Must by VM Thread"); finish_mark_work(); assert(task_queues()->is_empty(), "Should be empty"); - TASKQUEUE_STATS_ONLY(task_queues()->print_taskqueue_stats()); - TASKQUEUE_STATS_ONLY(task_queues()->reset_taskqueue_stats()); + TASKQUEUE_STATS_ONLY(task_queues()->print_and_reset_taskqueue_stats("")); ShenandoahHeap* const heap = ShenandoahHeap::heap(); heap->set_concurrent_mark_in_progress(false); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp index 8a4ef63b8e38b..df2d6d092e630 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp @@ -177,10 +177,13 @@ void ShenandoahControlThread::run_service() { // it is a normal completion, or the abort. heap->free_set()->log_status_under_lock(); - // Notify Universe about new heap usage. This has implications for - // global soft refs policy, and we better report it every time heap - // usage goes down. - heap->update_capacity_and_used_at_gc(); + { + // Notify Universe about new heap usage. This has implications for + // global soft refs policy, and we better report it every time heap + // usage goes down. + ShenandoahHeapLocker locker(heap->lock()); + heap->update_capacity_and_used_at_gc(); + } // Signal that we have completed a visit to all live objects. heap->record_whole_heap_examined_timestamp(); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 03e19a3af5e30..310cd5b8061eb 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -910,7 +910,6 @@ void ShenandoahFreeSet::try_recycle_trashed(ShenandoahHeapRegion* r) { void ShenandoahFreeSet::recycle_trash() { // lock is not reentrable, check we don't have it shenandoah_assert_not_heaplocked(); - size_t count = 0; for (size_t i = 0; i < _heap->num_regions(); i++) { ShenandoahHeapRegion* r = _heap->get_region(i); @@ -919,16 +918,45 @@ void ShenandoahFreeSet::recycle_trash() { } } - // Relinquish the lock after this much time passed. - static constexpr jlong deadline_ns = 30000; // 30 us + size_t total_batches = 0; + jlong batch_start_time = 0; + jlong recycle_trash_start_time = os::javaTimeNanos(); // This value will be treated as the initial batch_start_time + jlong batch_end_time = recycle_trash_start_time; + // Process as many batches as can be processed within 10 us. + static constexpr jlong deadline_ns = 10000; // 10 us size_t idx = 0; + jlong predicted_next_batch_end_time; + jlong batch_process_time_estimate = 0; while (idx < count) { - os::naked_yield(); // Yield to allow allocators to take the lock - ShenandoahHeapLocker locker(_heap->lock()); - const jlong deadline = os::javaTimeNanos() + deadline_ns; - while (idx < count && os::javaTimeNanos() < deadline) { - try_recycle_trashed(_trash_regions[idx++]); + if (idx > 0) { + os::naked_yield(); // Yield to allow allocators to take the lock, except on the first iteration } + // Avoid another call to javaTimeNanos() if we already know time at which last batch ended + batch_start_time = batch_end_time; + const jlong deadline = batch_start_time + deadline_ns; + + ShenandoahHeapLocker locker(_heap->lock()); + do { + // Measurements on typical 2024 hardware suggest it typically requires between 1400 and 2000 ns to process a batch of + // 32 regions, assuming low contention with other threads. Sometimes this goes higher, when mutator threads + // are contending for CPU cores and/or the heap lock. On this hardware with a 10 us deadline, we expect 3-6 batches + // to be processed between yields most of the time. + // + // Note that deadline is enforced since the end of previous batch. In the case that yield() or acquisition of heap lock + // takes a "long time", we will have less time to process regions, but we will always process at least one batch between + // yields. Yielding more frequently when there is heavy contention for the heap lock or for CPU cores is considered the + // right thing to do. + const size_t REGIONS_PER_BATCH = 32; + size_t max_idx = MIN2(count, idx + REGIONS_PER_BATCH); + while (idx < max_idx) { + try_recycle_trashed(_trash_regions[idx++]); + } + total_batches++; + batch_end_time = os::javaTimeNanos(); + // Estimate includes historic combination of yield times and heap lock acquisition times. + batch_process_time_estimate = (batch_end_time - recycle_trash_start_time) / total_batches; + predicted_next_batch_end_time = batch_end_time + batch_process_time_estimate; + } while ((idx < count) && (predicted_next_batch_end_time < deadline)); } } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp index 4ab17aabcc5ea..7ae4a1cf8b3dc 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -693,7 +693,7 @@ void ShenandoahHeap::notify_mutator_alloc_words(size_t words, bool waste) { if (ShenandoahPacing) { control_thread()->pacing_notify_alloc(words); if (waste) { - pacer()->claim_for_alloc(words, true); + pacer()->claim_for_alloc(words); } } } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahPacer.cpp b/src/hotspot/share/gc/shenandoah/shenandoahPacer.cpp index 0fc6744c15ae7..e67d3d197d42d 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahPacer.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahPacer.cpp @@ -189,7 +189,8 @@ void ShenandoahPacer::restart_with(size_t non_taxable_bytes, double tax_rate) { _need_notify_waiters.try_set(); } -bool ShenandoahPacer::claim_for_alloc(size_t words, bool force) { +template +bool ShenandoahPacer::claim_for_alloc(size_t words) { assert(ShenandoahPacing, "Only be here when pacing is enabled"); intptr_t tax = MAX2(1, words * Atomic::load(&_tax_rate)); @@ -198,7 +199,7 @@ bool ShenandoahPacer::claim_for_alloc(size_t words, bool force) { intptr_t new_val = 0; do { cur = Atomic::load(&_budget); - if (cur < tax && !force) { + if (cur < tax && !FORCE) { // Progress depleted, alas. return false; } @@ -207,6 +208,9 @@ bool ShenandoahPacer::claim_for_alloc(size_t words, bool force) { return true; } +template bool ShenandoahPacer::claim_for_alloc(size_t words); +template bool ShenandoahPacer::claim_for_alloc(size_t words); + void ShenandoahPacer::unpace_for_alloc(intptr_t epoch, size_t words) { assert(ShenandoahPacing, "Only be here when pacing is enabled"); @@ -227,18 +231,11 @@ void ShenandoahPacer::pace_for_alloc(size_t words) { assert(ShenandoahPacing, "Only be here when pacing is enabled"); // Fast path: try to allocate right away - bool claimed = claim_for_alloc(words, false); + bool claimed = claim_for_alloc(words); if (claimed) { return; } - // Forcefully claim the budget: it may go negative at this point, and - // GC should replenish for this and subsequent allocations. After this claim, - // we would wait a bit until our claim is matched by additional progress, - // or the time budget depletes. - claimed = claim_for_alloc(words, true); - assert(claimed, "Should always succeed"); - // Threads that are attaching should not block at all: they are not // fully initialized yet. Blocking them would be awkward. // This is probably the path that allocates the thread oop itself. @@ -249,32 +246,25 @@ void ShenandoahPacer::pace_for_alloc(size_t words) { JavaThread* current = JavaThread::current(); if (current->is_attaching_via_jni() || !current->is_active_Java_thread()) { + claim_for_alloc(words); return; } - double start = os::elapsedTime(); - - size_t max_ms = ShenandoahPacingMaxDelay; - size_t total_ms = 0; - - while (true) { + jlong const max_delay = ShenandoahPacingMaxDelay * NANOSECS_PER_MILLISEC; + jlong const start_time = os::elapsed_counter(); + while (!claimed && (os::elapsed_counter() - start_time) < max_delay) { // We could instead assist GC, but this would suffice for now. - size_t cur_ms = (max_ms > total_ms) ? (max_ms - total_ms) : 1; - wait(cur_ms); - - double end = os::elapsedTime(); - total_ms = (size_t)((end - start) * 1000); - - if (total_ms > max_ms || Atomic::load(&_budget) >= 0) { - // Exiting if either: - // a) Spent local time budget to wait for enough GC progress. - // Breaking out and allocating anyway, which may mean we outpace GC, - // and start Degenerated GC cycle. - // b) The budget had been replenished, which means our claim is satisfied. - ShenandoahThreadLocalData::add_paced_time(JavaThread::current(), end - start); - break; - } + wait(1); + claimed = claim_for_alloc(words); + } + if (!claimed) { + // Spent local time budget to wait for enough GC progress. + // Force allocating anyway, which may mean we outpace GC, + // and start Degenerated GC cycle. + claimed = claim_for_alloc(words); + assert(claimed, "Should always succeed"); } + ShenandoahThreadLocalData::add_paced_time(current, (double)(os::elapsed_counter() - start_time) / NANOSECS_PER_SEC); } void ShenandoahPacer::wait(size_t time_ms) { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahPacer.hpp b/src/hotspot/share/gc/shenandoah/shenandoahPacer.hpp index 1c2bf00eb56ba..44ad2700f8704 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahPacer.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahPacer.hpp @@ -107,7 +107,9 @@ class ShenandoahPacer : public CHeapObj { inline void report_alloc(size_t words); - bool claim_for_alloc(size_t words, bool force); + template + bool claim_for_alloc(size_t words); + void pace_for_alloc(size_t words); void unpace_for_alloc(intptr_t epoch, size_t words); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp index 2c727de585799..b217c641824c2 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp @@ -31,22 +31,19 @@ #include "oops/oop.inline.hpp" #include "utilities/copy.hpp" -void ShenandoahRuntime::arraycopy_barrier_oop_entry(oop* src, oop* dst, size_t length) { - ShenandoahBarrierSet *bs = ShenandoahBarrierSet::barrier_set(); - bs->arraycopy_barrier(src, dst, length); -} +JRT_LEAF(void, ShenandoahRuntime::arraycopy_barrier_oop(oop* src, oop* dst, size_t length)) + ShenandoahBarrierSet::barrier_set()->arraycopy_barrier(src, dst, length); +JRT_END -void ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry(narrowOop* src, narrowOop* dst, size_t length) { - ShenandoahBarrierSet *bs = ShenandoahBarrierSet::barrier_set(); - bs->arraycopy_barrier(src, dst, length); -} +JRT_LEAF(void, ShenandoahRuntime::arraycopy_barrier_narrow_oop(narrowOop* src, narrowOop* dst, size_t length)) + ShenandoahBarrierSet::barrier_set()->arraycopy_barrier(src, dst, length); +JRT_END -// Shenandoah pre write barrier slowpath -JRT_LEAF(void, ShenandoahRuntime::write_ref_field_pre_entry(oopDesc* orig, JavaThread *thread)) +JRT_LEAF(void, ShenandoahRuntime::write_ref_field_pre(oopDesc * orig, JavaThread * thread)) assert(thread == JavaThread::current(), "pre-condition"); assert(orig != nullptr, "should be optimized out"); shenandoah_assert_correct(nullptr, orig); - // store the original value that was in the field reference + // Capture the original value that was in the field reference. assert(ShenandoahThreadLocalData::satb_mark_queue(thread).is_active(), "Shouldn't be here otherwise"); SATBMarkQueue& queue = ShenandoahThreadLocalData::satb_mark_queue(thread); ShenandoahBarrierSet::satb_mark_queue_set().enqueue_known_active(queue, orig); @@ -60,26 +57,24 @@ JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_strong_narrow(oopDe return ShenandoahBarrierSet::barrier_set()->load_reference_barrier_mutator(src, load_addr); JRT_END -// Shenandoah clone barrier: makes sure that references point to to-space -// in cloned objects. -JRT_LEAF(void, ShenandoahRuntime::shenandoah_clone_barrier(oopDesc* src)) - oop s = oop(src); - shenandoah_assert_correct(nullptr, s); - ShenandoahBarrierSet::barrier_set()->clone_barrier(s); -JRT_END - -JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_weak(oopDesc * src, oop* load_addr)) +JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_weak(oopDesc* src, oop* load_addr)) return (oopDesc*) ShenandoahBarrierSet::barrier_set()->load_reference_barrier(ON_WEAK_OOP_REF, oop(src), load_addr); JRT_END -JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_weak_narrow(oopDesc * src, narrowOop* load_addr)) +JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_weak_narrow(oopDesc* src, narrowOop* load_addr)) return (oopDesc*) ShenandoahBarrierSet::barrier_set()->load_reference_barrier(ON_WEAK_OOP_REF, oop(src), load_addr); JRT_END -JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_phantom(oopDesc * src, oop* load_addr)) +JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_phantom(oopDesc* src, oop* load_addr)) return (oopDesc*) ShenandoahBarrierSet::barrier_set()->load_reference_barrier(ON_PHANTOM_OOP_REF, oop(src), load_addr); JRT_END -JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_phantom_narrow(oopDesc * src, narrowOop* load_addr)) +JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_phantom_narrow(oopDesc* src, narrowOop* load_addr)) return (oopDesc*) ShenandoahBarrierSet::barrier_set()->load_reference_barrier(ON_PHANTOM_OOP_REF, oop(src), load_addr); JRT_END + +JRT_LEAF(void, ShenandoahRuntime::clone_barrier(oopDesc* src)) + oop s = oop(src); + shenandoah_assert_correct(nullptr, s); + ShenandoahBarrierSet::barrier_set()->clone_barrier(s); +JRT_END diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp index e187e4360b16b..4ad8fc997ea76 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp @@ -33,10 +33,10 @@ class oopDesc; class ShenandoahRuntime : public AllStatic { public: - static void arraycopy_barrier_oop_entry(oop* src, oop* dst, size_t length); - static void arraycopy_barrier_narrow_oop_entry(narrowOop* src, narrowOop* dst, size_t length); + static void arraycopy_barrier_oop(oop* src, oop* dst, size_t length); + static void arraycopy_barrier_narrow_oop(narrowOop* src, narrowOop* dst, size_t length); - static void write_ref_field_pre_entry(oopDesc* orig, JavaThread* thread); + static void write_ref_field_pre(oopDesc* orig, JavaThread* thread); static oopDesc* load_reference_barrier_strong(oopDesc* src, oop* load_addr); static oopDesc* load_reference_barrier_strong_narrow(oopDesc* src, narrowOop* load_addr); @@ -47,7 +47,7 @@ class ShenandoahRuntime : public AllStatic { static oopDesc* load_reference_barrier_phantom(oopDesc* src, oop* load_addr); static oopDesc* load_reference_barrier_phantom_narrow(oopDesc* src, narrowOop* load_addr); - static void shenandoah_clone_barrier(oopDesc* src); + static void clone_barrier(oopDesc* src); }; #endif // SHARE_GC_SHENANDOAH_SHENANDOAHRUNTIME_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahSTWMark.cpp b/src/hotspot/share/gc/shenandoah/shenandoahSTWMark.cpp index 05cd8ef66b9d1..9a30b1fed8724 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahSTWMark.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahSTWMark.cpp @@ -133,8 +133,7 @@ void ShenandoahSTWMark::mark() { ShenandoahCodeRoots::disarm_nmethods(); assert(task_queues()->is_empty(), "Should be empty"); - TASKQUEUE_STATS_ONLY(task_queues()->print_taskqueue_stats()); - TASKQUEUE_STATS_ONLY(task_queues()->reset_taskqueue_stats()); + TASKQUEUE_STATS_ONLY(task_queues()->print_and_reset_taskqueue_stats("")); } void ShenandoahSTWMark::mark_roots(uint worker_id) { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.cpp index c3e8108752fed..127e6324fb01e 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "gc/shenandoah/shenandoahSimpleBitMap.hpp" +#include "gc/shenandoah/shenandoahSimpleBitMap.inline.hpp" ShenandoahSimpleBitMap::ShenandoahSimpleBitMap(size_t num_bits) : _num_bits(num_bits), @@ -43,8 +43,8 @@ size_t ShenandoahSimpleBitMap::count_leading_ones(idx_t start_idx) const { assert((start_idx >= 0) && (start_idx < _num_bits), "precondition"); size_t array_idx = start_idx >> LogBitsPerWord; uintx element_bits = _bitmap[array_idx]; - uintx bit_number = start_idx & right_n_bits(LogBitsPerWord); - uintx mask = ~right_n_bits(bit_number); + uintx bit_number = start_idx & (BitsPerWord - 1); + uintx mask = ~tail_mask(bit_number); size_t counted_ones = 0; while ((element_bits & mask) == mask) { // All bits numbered >= bit_number are set @@ -54,7 +54,7 @@ size_t ShenandoahSimpleBitMap::count_leading_ones(idx_t start_idx) const { // Strength reduction: array_idx = (start_idx >> LogBitsPerWord) array_idx++; element_bits = _bitmap[array_idx]; - // Constant folding: bit_number = start_idx & right_n_bits(LogBitsPerWord); + // Constant folding: bit_number = start_idx & (BitsPerWord - 1); bit_number = 0; // Constant folding: mask = ~right_n_bits(bit_number); mask = ~0; @@ -70,9 +70,9 @@ size_t ShenandoahSimpleBitMap::count_trailing_ones(idx_t last_idx) const { assert((last_idx >= 0) && (last_idx < _num_bits), "precondition"); size_t array_idx = last_idx >> LogBitsPerWord; uintx element_bits = _bitmap[array_idx]; - uintx bit_number = last_idx & right_n_bits(LogBitsPerWord); + uintx bit_number = last_idx & (BitsPerWord - 1); // All ones from bit 0 to the_bit - uintx mask = right_n_bits(bit_number + 1); + uintx mask = tail_mask(bit_number + 1); size_t counted_ones = 0; while ((element_bits & mask) == mask) { // All bits numbered <= bit_number are set @@ -81,7 +81,7 @@ size_t ShenandoahSimpleBitMap::count_trailing_ones(idx_t last_idx) const { // Dead code: do not need to compute: last_idx -= found_ones; array_idx--; element_bits = _bitmap[array_idx]; - // Constant folding: bit_number = last_idx & right_n_bits(LogBitsPerWord); + // Constant folding: bit_number = last_idx & (BitsPerWord - 1); bit_number = BitsPerWord - 1; // Constant folding: mask = right_n_bits(bit_number + 1); mask = ~0; @@ -99,7 +99,7 @@ bool ShenandoahSimpleBitMap::is_forward_consecutive_ones(idx_t start_idx, idx_t start_idx, count); assert(start_idx + count <= (idx_t) _num_bits, "precondition"); size_t array_idx = start_idx >> LogBitsPerWord; - uintx bit_number = start_idx & right_n_bits(LogBitsPerWord); + uintx bit_number = start_idx & (BitsPerWord - 1); uintx element_bits = _bitmap[array_idx]; uintx bits_to_examine = BitsPerWord - bit_number; element_bits >>= bit_number; @@ -128,7 +128,7 @@ bool ShenandoahSimpleBitMap::is_backward_consecutive_ones(idx_t last_idx, idx_t assert((last_idx >= 0) && (last_idx < _num_bits), "precondition"); assert(last_idx - count >= -1, "precondition"); size_t array_idx = last_idx >> LogBitsPerWord; - uintx bit_number = last_idx & right_n_bits(LogBitsPerWord); + uintx bit_number = last_idx & (BitsPerWord - 1); uintx element_bits = _bitmap[array_idx]; uintx bits_to_examine = bit_number + 1; element_bits <<= (BitsPerWord - bits_to_examine); @@ -161,10 +161,10 @@ idx_t ShenandoahSimpleBitMap::find_first_consecutive_set_bits(idx_t beg, idx_t e return end; } uintx array_idx = beg >> LogBitsPerWord; - uintx bit_number = beg & right_n_bits(LogBitsPerWord); + uintx bit_number = beg & (BitsPerWord - 1); uintx element_bits = _bitmap[array_idx]; if (bit_number > 0) { - uintx mask_out = right_n_bits(bit_number); + uintx mask_out = tail_mask(bit_number); element_bits &= ~mask_out; } @@ -222,9 +222,9 @@ idx_t ShenandoahSimpleBitMap::find_first_consecutive_set_bits(idx_t beg, idx_t e } array_idx = beg >> LogBitsPerWord; element_bits = _bitmap[array_idx]; - bit_number = beg & right_n_bits(LogBitsPerWord); + bit_number = beg & (BitsPerWord - 1); if (bit_number > 0) { - size_t mask_out = right_n_bits(bit_number); + size_t mask_out = tail_mask(bit_number); element_bits &= ~mask_out; } } @@ -242,10 +242,10 @@ idx_t ShenandoahSimpleBitMap::find_last_consecutive_set_bits(const idx_t beg, id } size_t array_idx = end >> LogBitsPerWord; - uintx bit_number = end & right_n_bits(LogBitsPerWord); + uintx bit_number = end & (BitsPerWord - 1); uintx element_bits = _bitmap[array_idx]; if (bit_number < BitsPerWord - 1) { - uintx mask_in = right_n_bits(bit_number + 1); + uintx mask_in = tail_mask(bit_number + 1); element_bits &= mask_in; } @@ -280,10 +280,10 @@ idx_t ShenandoahSimpleBitMap::find_last_consecutive_set_bits(const idx_t beg, id return beg; } array_idx = end >> LogBitsPerWord; - bit_number = end & right_n_bits(LogBitsPerWord); + bit_number = end & (BitsPerWord - 1); element_bits = _bitmap[array_idx]; if (bit_number < BitsPerWord - 1){ - size_t mask_in = right_n_bits(bit_number + 1); + size_t mask_in = tail_mask(bit_number + 1); element_bits &= mask_in; } } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.hpp index c22e952700204..55d21b06e4bbd 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.hpp @@ -50,7 +50,7 @@ typedef ssize_t idx_t; // ShenandoahSimpleBitMap resembles CHeapBitMap but adds missing support for find_first_consecutive_set_bits() and // find_last_consecutive_set_bits. An alternative refactoring of code would subclass CHeapBitMap, but this might // break abstraction rules, because efficient implementation requires assumptions about superclass internals that -// might be violatee through future software maintenance. +// might be violated through future software maintenance. class ShenandoahSimpleBitMap { const idx_t _num_bits; const size_t _num_words; @@ -80,11 +80,13 @@ class ShenandoahSimpleBitMap { bool is_forward_consecutive_ones(idx_t start_idx, idx_t count) const; bool is_backward_consecutive_ones(idx_t last_idx, idx_t count) const; + static inline uintx tail_mask(uintx bit_number); + public: inline idx_t aligned_index(idx_t idx) const { assert((idx >= 0) && (idx < _num_bits), "precondition"); - idx_t array_idx = idx & ~right_n_bits(LogBitsPerWord); + idx_t array_idx = idx & ~(BitsPerWord - 1); return array_idx; } @@ -107,7 +109,7 @@ class ShenandoahSimpleBitMap { inline void set_bit(idx_t idx) { assert((idx >= 0) && (idx < _num_bits), "precondition"); size_t array_idx = idx >> LogBitsPerWord; - uintx bit_number = idx & right_n_bits(LogBitsPerWord); + uintx bit_number = idx & (BitsPerWord - 1); uintx the_bit = nth_bit(bit_number); _bitmap[array_idx] |= the_bit; } @@ -116,7 +118,7 @@ class ShenandoahSimpleBitMap { assert((idx >= 0) && (idx < _num_bits), "precondition"); assert(idx >= 0, "precondition"); size_t array_idx = idx >> LogBitsPerWord; - uintx bit_number = idx & right_n_bits(LogBitsPerWord); + uintx bit_number = idx & (BitsPerWord - 1); uintx the_bit = nth_bit(bit_number); _bitmap[array_idx] &= ~the_bit; } @@ -125,9 +127,9 @@ class ShenandoahSimpleBitMap { assert((idx >= 0) && (idx < _num_bits), "precondition"); assert(idx >= 0, "precondition"); size_t array_idx = idx >> LogBitsPerWord; - uintx bit_number = idx & right_n_bits(LogBitsPerWord); + uintx bit_number = idx & (BitsPerWord - 1); uintx the_bit = nth_bit(bit_number); - return (_bitmap[array_idx] & the_bit)? true: false; + return (_bitmap[array_idx] & the_bit) != 0; } // Return the index of the first set bit in the range [beg, size()), or size() if none found. diff --git a/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.inline.hpp index 3e602ed11e0c0..4582ab9a781dd 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.inline.hpp @@ -27,15 +27,22 @@ #include "gc/shenandoah/shenandoahSimpleBitMap.hpp" +inline uintx ShenandoahSimpleBitMap::tail_mask(uintx bit_number) { + if (bit_number >= BitsPerWord) { + return -1; + } + return (uintx(1) << bit_number) - 1; +} + inline idx_t ShenandoahSimpleBitMap::find_first_set_bit(idx_t beg, idx_t end) const { assert((beg >= 0) && (beg < _num_bits), "precondition"); assert((end > beg) && (end <= _num_bits), "precondition"); do { size_t array_idx = beg >> LogBitsPerWord; - uintx bit_number = beg & right_n_bits(LogBitsPerWord); + uintx bit_number = beg & (BitsPerWord - 1); uintx element_bits = _bitmap[array_idx]; if (bit_number > 0) { - uintx mask_out = right_n_bits(bit_number); + uintx mask_out = tail_mask(bit_number); element_bits &= ~mask_out; } if (element_bits) { @@ -62,10 +69,10 @@ inline idx_t ShenandoahSimpleBitMap::find_last_set_bit(idx_t beg, idx_t end) con assert((beg >= -1) && (beg < end), "precondition"); do { idx_t array_idx = end >> LogBitsPerWord; - uintx bit_number = end & right_n_bits(LogBitsPerWord); + uint8_t bit_number = end & (BitsPerWord - 1); uintx element_bits = _bitmap[array_idx]; if (bit_number < BitsPerWord - 1){ - uintx mask_in = right_n_bits(bit_number + 1); + uintx mask_in = tail_mask(bit_number + 1); element_bits &= mask_in; } if (element_bits) { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.cpp b/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.cpp index 3cddc0c6c0a83..eb185c197bd5d 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.cpp @@ -51,45 +51,6 @@ bool ShenandoahObjToScanQueueSet::is_empty() { return true; } -#if TASKQUEUE_STATS -void ShenandoahObjToScanQueueSet::print_taskqueue_stats_hdr(outputStream* const st) { - st->print_raw_cr("GC Task Stats"); - st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr(); - st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr(); -} - -void ShenandoahObjToScanQueueSet::print_taskqueue_stats() const { - if (!log_develop_is_enabled(Trace, gc, task, stats)) { - return; - } - Log(gc, task, stats) log; - ResourceMark rm; - LogStream ls(log.trace()); - outputStream* st = &ls; - print_taskqueue_stats_hdr(st); - - ShenandoahObjToScanQueueSet* queues = const_cast(this); - TaskQueueStats totals; - const uint n = size(); - for (uint i = 0; i < n; ++i) { - st->print(UINT32_FORMAT_W(3), i); - queues->queue(i)->stats.print(st); - st->cr(); - totals += queues->queue(i)->stats; - } - st->print("tot "); totals.print(st); st->cr(); - DEBUG_ONLY(totals.verify()); - -} - -void ShenandoahObjToScanQueueSet::reset_taskqueue_stats() { - const uint n = size(); - for (uint i = 0; i < n; ++i) { - queue(i)->stats.reset(); - } -} -#endif // TASKQUEUE_STATS - bool ShenandoahTerminatorTerminator::should_exit_termination() { return _heap->cancelled_gc(); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.hpp b/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.hpp index 2b160a2938794..10887ad8c19d6 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.hpp @@ -355,12 +355,6 @@ class ShenandoahObjToScanQueueSet: public ParallelClaimableQueueSetsize()); - free_page(page); + free_page(page, false /* allow_defragment */); } -void ZHeap::free_page(ZPage* page) { +void ZHeap::free_page(ZPage* page, bool allow_defragment) { // Remove page table entry _page_table.remove(page); @@ -252,7 +253,7 @@ void ZHeap::free_page(ZPage* page) { } // Free page - _page_allocator.free_page(page); + _page_allocator.free_page(page, allow_defragment); } size_t ZHeap::free_empty_pages(const ZArray* pages) { diff --git a/src/hotspot/share/gc/z/zHeap.hpp b/src/hotspot/share/gc/z/zHeap.hpp index 18fa0d6349bea..7b75c63cf8ce0 100644 --- a/src/hotspot/share/gc/z/zHeap.hpp +++ b/src/hotspot/share/gc/z/zHeap.hpp @@ -104,7 +104,7 @@ class ZHeap { // Page allocation ZPage* alloc_page(ZPageType type, size_t size, ZAllocationFlags flags, ZPageAge age); void undo_alloc_page(ZPage* page); - void free_page(ZPage* page); + void free_page(ZPage* page, bool allow_defragment); size_t free_empty_pages(const ZArray* pages); // Object allocation diff --git a/src/hotspot/share/gc/z/zInitialize.cpp b/src/hotspot/share/gc/z/zInitialize.cpp index 52229bf283097..e37fc550bfe2a 100644 --- a/src/hotspot/share/gc/z/zInitialize.cpp +++ b/src/hotspot/share/gc/z/zInitialize.cpp @@ -22,6 +22,7 @@ */ #include "precompiled.hpp" +#include "gc/shared/gcLogPrecious.hpp" #include "gc/z/zAddress.hpp" #include "gc/z/zBarrierSet.hpp" #include "gc/z/zCPU.hpp" @@ -38,9 +39,19 @@ #include "gc/z/zThreadLocalAllocBuffer.hpp" #include "gc/z/zTracer.hpp" #include "logging/log.hpp" +#include "nmt/memTag.hpp" #include "runtime/vm_version.hpp" +#include "utilities/formatBuffer.hpp" -ZInitialize::ZInitialize(ZBarrierSet* barrier_set) { +char ZInitialize::_error_message[ErrorMessageLength] = {}; +bool ZInitialize::_had_error = false; +bool ZInitialize::_finished = false; + +ZInitializer::ZInitializer(ZBarrierSet* barrier_set) { + ZInitialize::initialize(barrier_set); +} + +void ZInitialize::initialize(ZBarrierSet* barrier_set) { log_info(gc, init)("Initializing %s", ZName); log_info(gc, init)("Version: %s (%s)", VM_Version::vm_release(), @@ -62,3 +73,51 @@ ZInitialize::ZInitialize(ZBarrierSet* barrier_set) { pd_initialize(); } + +void ZInitialize::register_error(bool debug, const char *error_msg) { + guarantee(!_finished, "Only register errors during initialization"); + + if (!_had_error) { + strncpy(_error_message, error_msg, ErrorMessageLength - 1); + _had_error = true; + } + + if (debug) { + log_error_pd(gc)("%s", error_msg); + } else { + log_error_p(gc)("%s", error_msg); + } +} + +void ZInitialize::error(const char* msg_format, ...) { + va_list argp; + va_start(argp, msg_format); + const FormatBuffer error_msg(FormatBufferDummy(), msg_format, argp); + va_end(argp); + register_error(false /* debug */, error_msg); +} + +void ZInitialize::error_d(const char* msg_format, ...) { + va_list argp; + va_start(argp, msg_format); + const FormatBuffer error_msg(FormatBufferDummy(), msg_format, argp); + va_end(argp); + register_error(true /* debug */, error_msg); +} + +bool ZInitialize::had_error() { + return _had_error; +} + +const char* ZInitialize::error_message() { + assert(had_error(), "Should have registered an error"); + if (had_error()) { + return _error_message; + } + return "Unknown error, check error GC logs"; +} + +void ZInitialize::finish() { + guarantee(!_finished, "Only finish initialization once"); + _finished = true; +} diff --git a/src/hotspot/share/gc/z/zInitialize.hpp b/src/hotspot/share/gc/z/zInitialize.hpp index 599b656623448..3c551b4c62260 100644 --- a/src/hotspot/share/gc/z/zInitialize.hpp +++ b/src/hotspot/share/gc/z/zInitialize.hpp @@ -24,16 +24,39 @@ #ifndef SHARE_GC_Z_ZINITIALIZE_HPP #define SHARE_GC_Z_ZINITIALIZE_HPP -#include "memory/allocation.hpp" +#include "memory/allStatic.hpp" +#include "utilities/compilerWarnings.hpp" + +#include class ZBarrierSet; -class ZInitialize { +class ZInitializer { +public: + ZInitializer(ZBarrierSet* barrier_set); +}; + +class ZInitialize : public AllStatic { private: - void pd_initialize(); + static constexpr size_t ErrorMessageLength = 256; + + static char _error_message[ErrorMessageLength]; + static bool _had_error; + static bool _finished; + + static void register_error(bool debug, const char *error_msg); + + static void pd_initialize(); public: - ZInitialize(ZBarrierSet* barrier_set); + static void error(const char* msg_format, ...) ATTRIBUTE_PRINTF(1, 2); + static void error_d(const char* msg_format, ...) ATTRIBUTE_PRINTF(1, 2); + + static bool had_error(); + static const char* error_message(); + + static void initialize(ZBarrierSet* barrier_set); + static void finish(); }; #endif // SHARE_GC_Z_ZINITIALIZE_HPP diff --git a/src/hotspot/share/gc/z/zMarkStackAllocator.cpp b/src/hotspot/share/gc/z/zMarkStackAllocator.cpp index a9e404a0f55c1..100036dc3fe53 100644 --- a/src/hotspot/share/gc/z/zMarkStackAllocator.cpp +++ b/src/hotspot/share/gc/z/zMarkStackAllocator.cpp @@ -22,8 +22,8 @@ */ #include "precompiled.hpp" -#include "gc/shared/gcLogPrecious.hpp" #include "gc/shared/gc_globals.hpp" +#include "gc/z/zInitialize.hpp" #include "gc/z/zLock.inline.hpp" #include "gc/z/zMarkStack.inline.hpp" #include "gc/z/zMarkStackAllocator.hpp" @@ -43,7 +43,7 @@ ZMarkStackSpace::ZMarkStackSpace() const size_t size = ZMarkStackSpaceLimit; const uintptr_t addr = (uintptr_t)os::reserve_memory(size, !ExecMem, mtGC); if (addr == 0) { - log_error_pd(gc, marking)("Failed to reserve address space for mark stacks"); + ZInitialize::error_d("Failed to reserve address space for mark stacks"); return; } diff --git a/src/hotspot/share/gc/z/zPageAllocator.cpp b/src/hotspot/share/gc/z/zPageAllocator.cpp index 01200f76b519e..010241294a701 100644 --- a/src/hotspot/share/gc/z/zPageAllocator.cpp +++ b/src/hotspot/share/gc/z/zPageAllocator.cpp @@ -275,7 +275,7 @@ bool ZPageAllocator::prime_cache(ZWorkers* workers, size_t size) { workers->run_all(&task); } - free_page(page); + free_page(page, false /* allow_defragment */); return true; } @@ -462,6 +462,38 @@ void ZPageAllocator::destroy_page(ZPage* page) { safe_destroy_page(page); } +bool ZPageAllocator::should_defragment(const ZPage* page) const { + // A small page can end up at a high address (second half of the address space) + // if we've split a larger page or we have a constrained address space. To help + // fight address space fragmentation we remap such pages to a lower address, if + // a lower address is available. + return page->type() == ZPageType::small && + page->start() >= to_zoffset(_virtual.reserved() / 2) && + page->start() > _virtual.lowest_available_address(); +} + +ZPage* ZPageAllocator::defragment_page(ZPage* page) { + // Harvest the physical memory (which is committed) + ZPhysicalMemory pmem; + ZPhysicalMemory& old_pmem = page->physical_memory(); + pmem.add_segments(old_pmem); + old_pmem.remove_segments(); + + _unmapper->unmap_and_destroy_page(page); + + // Allocate new virtual memory at a low address + const ZVirtualMemory vmem = _virtual.alloc(pmem.size(), true /* force_low_address */); + + // Create the new page and map it + ZPage* new_page = new ZPage(ZPageType::small, vmem, pmem); + map_page(new_page); + + // Update statistics + ZStatInc(ZCounterDefragment); + + return new_page; +} + bool ZPageAllocator::is_alloc_allowed(size_t size) const { const size_t available = _current_max_capacity - _used - _claimed; return available >= size; @@ -623,16 +655,6 @@ ZPage* ZPageAllocator::alloc_page_create(ZPageAllocation* allocation) { return new ZPage(allocation->type(), vmem, pmem); } -bool ZPageAllocator::should_defragment(const ZPage* page) const { - // A small page can end up at a high address (second half of the address space) - // if we've split a larger page or we have a constrained address space. To help - // fight address space fragmentation we remap such pages to a lower address, if - // a lower address is available. - return page->type() == ZPageType::small && - page->start() >= to_zoffset(_virtual.reserved() / 2) && - page->start() > _virtual.lowest_available_address(); -} - bool ZPageAllocator::is_alloc_satisfied(ZPageAllocation* allocation) const { // The allocation is immediately satisfied if the list of pages contains // exactly one page, with the type and size that was requested. However, @@ -652,12 +674,6 @@ bool ZPageAllocator::is_alloc_satisfied(ZPageAllocation* allocation) const { return false; } - if (should_defragment(page)) { - // Defragment address space - ZStatInc(ZCounterDefragment); - return false; - } - // Allocation immediately satisfied return true; } @@ -773,6 +789,18 @@ void ZPageAllocator::satisfy_stalled() { } } +ZPage* ZPageAllocator::prepare_to_recycle(ZPage* page, bool allow_defragment) { + // Make sure we have a page that is safe to recycle + ZPage* const to_recycle = _safe_recycle.register_and_clone_if_activated(page); + + // Defragment the page before recycle if allowed and needed + if (allow_defragment && should_defragment(to_recycle)) { + return defragment_page(to_recycle); + } + + return to_recycle; +} + void ZPageAllocator::recycle_page(ZPage* page) { // Set time when last used page->set_last_used(); @@ -781,9 +809,11 @@ void ZPageAllocator::recycle_page(ZPage* page) { _cache.free_page(page); } -void ZPageAllocator::free_page(ZPage* page) { +void ZPageAllocator::free_page(ZPage* page, bool allow_defragment) { const ZGenerationId generation_id = page->generation_id(); - ZPage* const to_recycle = _safe_recycle.register_and_clone_if_activated(page); + + // Prepare page for recycling before taking the lock + ZPage* const to_recycle = prepare_to_recycle(page, allow_defragment); ZLocker locker(&_lock); @@ -800,11 +830,12 @@ void ZPageAllocator::free_page(ZPage* page) { } void ZPageAllocator::free_pages(const ZArray* pages) { - ZArray to_recycle; + ZArray to_recycle_pages; size_t young_size = 0; size_t old_size = 0; + // Prepare pages for recycling before taking the lock ZArrayIterator pages_iter(pages); for (ZPage* page; pages_iter.next(&page);) { if (page->is_young()) { @@ -812,7 +843,12 @@ void ZPageAllocator::free_pages(const ZArray* pages) { } else { old_size += page->size(); } - to_recycle.push(_safe_recycle.register_and_clone_if_activated(page)); + + // Prepare to recycle + ZPage* const to_recycle = prepare_to_recycle(page, true /* allow_defragment */); + + // Register for recycling + to_recycle_pages.push(to_recycle); } ZLocker locker(&_lock); @@ -823,7 +859,7 @@ void ZPageAllocator::free_pages(const ZArray* pages) { decrease_used_generation(ZGenerationId::old, old_size); // Free pages - ZArrayIterator iter(&to_recycle); + ZArrayIterator iter(&to_recycle_pages); for (ZPage* page; iter.next(&page);) { recycle_page(page); } @@ -833,11 +869,16 @@ void ZPageAllocator::free_pages(const ZArray* pages) { } void ZPageAllocator::free_pages_alloc_failed(ZPageAllocation* allocation) { - ZArray to_recycle; + ZArray to_recycle_pages; + // Prepare pages for recycling before taking the lock ZListRemoveIterator allocation_pages_iter(allocation->pages()); for (ZPage* page; allocation_pages_iter.next(&page);) { - to_recycle.push(_safe_recycle.register_and_clone_if_activated(page)); + // Prepare to recycle + ZPage* const to_recycle = prepare_to_recycle(page, false /* allow_defragment */); + + // Register for recycling + to_recycle_pages.push(to_recycle); } ZLocker locker(&_lock); @@ -849,7 +890,7 @@ void ZPageAllocator::free_pages_alloc_failed(ZPageAllocation* allocation) { size_t freed = 0; // Free any allocated/flushed pages - ZArrayIterator iter(&to_recycle); + ZArrayIterator iter(&to_recycle_pages); for (ZPage* page; iter.next(&page);) { freed += page->size(); recycle_page(page); diff --git a/src/hotspot/share/gc/z/zPageAllocator.hpp b/src/hotspot/share/gc/z/zPageAllocator.hpp index 5d3d59a416344..7df83a10eaf5a 100644 --- a/src/hotspot/share/gc/z/zPageAllocator.hpp +++ b/src/hotspot/share/gc/z/zPageAllocator.hpp @@ -104,13 +104,15 @@ class ZPageAllocator { void destroy_page(ZPage* page); + bool should_defragment(const ZPage* page) const; + ZPage* defragment_page(ZPage* page); + bool is_alloc_allowed(size_t size) const; bool alloc_page_common_inner(ZPageType type, size_t size, ZList* pages); bool alloc_page_common(ZPageAllocation* allocation); bool alloc_page_stall(ZPageAllocation* allocation); bool alloc_page_or_stall(ZPageAllocation* allocation); - bool should_defragment(const ZPage* page) const; bool is_alloc_satisfied(ZPageAllocation* allocation) const; ZPage* alloc_page_create(ZPageAllocation* allocation); ZPage* alloc_page_finalize(ZPageAllocation* allocation); @@ -149,9 +151,10 @@ class ZPageAllocator { void reset_statistics(ZGenerationId id); ZPage* alloc_page(ZPageType type, size_t size, ZAllocationFlags flags, ZPageAge age); + ZPage* prepare_to_recycle(ZPage* page, bool allow_defragment); void recycle_page(ZPage* page); void safe_destroy_page(ZPage* page); - void free_page(ZPage* page); + void free_page(ZPage* page, bool allow_defragment); void free_pages(const ZArray* pages); void enable_safe_destroy() const; diff --git a/src/hotspot/share/gc/z/zRelocate.cpp b/src/hotspot/share/gc/z/zRelocate.cpp index 33304bcefd37f..7f69c0752bc5a 100644 --- a/src/hotspot/share/gc/z/zRelocate.cpp +++ b/src/hotspot/share/gc/z/zRelocate.cpp @@ -411,7 +411,7 @@ static void retire_target_page(ZGeneration* generation, ZPage* page) { // relocate the remaining objects, leaving the target page empty when // relocation completed. if (page->used() == 0) { - ZHeap::heap()->free_page(page); + ZHeap::heap()->free_page(page, true /* allow_defragment */); } } @@ -1012,7 +1012,7 @@ class ZRelocateWork : public StackObj { page->log_msg(" (relocate page done normal)"); // Free page - ZHeap::heap()->free_page(page); + ZHeap::heap()->free_page(page, true /* allow_defragment */); } } }; diff --git a/src/hotspot/share/gc/z/zVirtualMemory.cpp b/src/hotspot/share/gc/z/zVirtualMemory.cpp index 6b53b2ba7c82b..2160aa3894802 100644 --- a/src/hotspot/share/gc/z/zVirtualMemory.cpp +++ b/src/hotspot/share/gc/z/zVirtualMemory.cpp @@ -27,6 +27,7 @@ #include "gc/z/zAddress.inline.hpp" #include "gc/z/zAddressSpaceLimit.hpp" #include "gc/z/zGlobals.hpp" +#include "gc/z/zInitialize.hpp" #include "gc/z/zNMT.hpp" #include "gc/z/zVirtualMemory.inline.hpp" #include "utilities/align.hpp" @@ -44,7 +45,7 @@ ZVirtualMemoryManager::ZVirtualMemoryManager(size_t max_capacity) // Reserve address space if (!reserve(max_capacity)) { - log_error_pd(gc)("Failed to reserve enough address space for Java heap"); + ZInitialize::error_d("Failed to reserve enough address space for Java heap"); return; } diff --git a/src/hotspot/share/interpreter/abstractInterpreter.cpp b/src/hotspot/share/interpreter/abstractInterpreter.cpp index 2fad5ba39ef5c..616ba29c62b33 100644 --- a/src/hotspot/share/interpreter/abstractInterpreter.cpp +++ b/src/hotspot/share/interpreter/abstractInterpreter.cpp @@ -74,7 +74,9 @@ void AbstractInterpreter::print() { tty->print_cr("avg codelet size = %6d bytes", _code->used_space() / _code->number_of_stubs()); tty->cr(); } + _should_print_instructions = PrintInterpreter; _code->print(); + _should_print_instructions = false; tty->print_cr("----------------------------------------------------------------------"); tty->cr(); } @@ -91,6 +93,8 @@ address AbstractInterpreter::_slow_signature_handler; address AbstractInterpreter::_entry_table [AbstractInterpreter::number_of_method_entries]; address AbstractInterpreter::_native_abi_to_tosca [AbstractInterpreter::number_of_result_handlers]; +bool AbstractInterpreter::_should_print_instructions = false; + //------------------------------------------------------------------------------------------------------------------------ // Generation of complete interpreter @@ -138,6 +142,7 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(const methodHan case vmIntrinsics::_dsin: return java_lang_math_sin; case vmIntrinsics::_dcos: return java_lang_math_cos; case vmIntrinsics::_dtan: return java_lang_math_tan; + case vmIntrinsics::_dtanh: return java_lang_math_tanh; case vmIntrinsics::_dabs: return java_lang_math_abs; case vmIntrinsics::_dlog: return java_lang_math_log; case vmIntrinsics::_dlog10: return java_lang_math_log10; @@ -198,6 +203,7 @@ vmIntrinsics::ID AbstractInterpreter::method_intrinsic(MethodKind kind) { case java_lang_math_sin : return vmIntrinsics::_dsin; case java_lang_math_cos : return vmIntrinsics::_dcos; case java_lang_math_tan : return vmIntrinsics::_dtan; + case java_lang_math_tanh : return vmIntrinsics::_dtanh; case java_lang_math_abs : return vmIntrinsics::_dabs; case java_lang_math_log : return vmIntrinsics::_dlog; case java_lang_math_log10 : return vmIntrinsics::_dlog10; @@ -309,6 +315,7 @@ void AbstractInterpreter::print_method_kind(MethodKind kind) { case java_lang_math_sin : tty->print("java_lang_math_sin" ); break; case java_lang_math_cos : tty->print("java_lang_math_cos" ); break; case java_lang_math_tan : tty->print("java_lang_math_tan" ); break; + case java_lang_math_tanh : tty->print("java_lang_math_tanh" ); break; case java_lang_math_abs : tty->print("java_lang_math_abs" ); break; case java_lang_math_log : tty->print("java_lang_math_log" ); break; case java_lang_math_log10 : tty->print("java_lang_math_log10" ); break; diff --git a/src/hotspot/share/interpreter/abstractInterpreter.hpp b/src/hotspot/share/interpreter/abstractInterpreter.hpp index e487b152b76ea..55fb58021a0d4 100644 --- a/src/hotspot/share/interpreter/abstractInterpreter.hpp +++ b/src/hotspot/share/interpreter/abstractInterpreter.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -72,6 +72,7 @@ class AbstractInterpreter: AllStatic { java_lang_math_sin, // implementation of java.lang.Math.sin (x) java_lang_math_cos, // implementation of java.lang.Math.cos (x) java_lang_math_tan, // implementation of java.lang.Math.tan (x) + java_lang_math_tanh, // implementation of java.lang.Math.tanh (x) java_lang_math_abs, // implementation of java.lang.Math.abs (x) java_lang_math_sqrt, // implementation of java.lang.Math.sqrt (x) java_lang_math_sqrt_strict, // implementation of java.lang.StrictMath.sqrt(x) @@ -125,6 +126,8 @@ class AbstractInterpreter: AllStatic { static address _rethrow_exception_entry; // rethrows an activation in previous frame + static bool _should_print_instructions; // only with PrintInterpreter and when printing all InterpreterCodelet + friend class AbstractInterpreterGenerator; friend class InterpreterMacroAssembler; @@ -132,6 +135,7 @@ class AbstractInterpreter: AllStatic { // Initialization/debugging static void initialize(); static StubQueue* code() { return _code; } + static bool should_print_instructions() { return _should_print_instructions; } // Method activation @@ -151,6 +155,7 @@ class AbstractInterpreter: AllStatic { case vmIntrinsics::_dsin : // fall thru case vmIntrinsics::_dcos : // fall thru case vmIntrinsics::_dtan : // fall thru + case vmIntrinsics::_dtanh : // fall thru case vmIntrinsics::_dabs : // fall thru case vmIntrinsics::_dsqrt : // fall thru case vmIntrinsics::_dsqrt_strict : // fall thru diff --git a/src/hotspot/share/interpreter/bytecodeTracer.cpp b/src/hotspot/share/interpreter/bytecodeTracer.cpp index e5a3e9c16f4a1..cdb53b62f8c40 100644 --- a/src/hotspot/share/interpreter/bytecodeTracer.cpp +++ b/src/hotspot/share/interpreter/bytecodeTracer.cpp @@ -105,7 +105,7 @@ class BytecodePrinter { // the incoming method. We could lose a line of trace output. // This is acceptable in a debug-only feature. st->cr(); - st->print("[%ld] ", (long) Thread::current()->osthread()->thread_id()); + st->print("[" UINTX_FORMAT "] ", Thread::current()->osthread()->thread_id_for_printing()); method->print_name(st); st->cr(); _current_method = method(); @@ -128,7 +128,7 @@ class BytecodePrinter { code == Bytecodes::_return_register_finalizer || (code >= Bytecodes::_ireturn && code <= Bytecodes::_return)) { int bci = (int)(bcp - method->code_base()); - st->print("[%ld] ", (long) Thread::current()->osthread()->thread_id()); + st->print("[" UINTX_FORMAT "] ", Thread::current()->osthread()->thread_id_for_printing()); if (Verbose) { st->print("%8d %4d " INTPTR_FORMAT " " INTPTR_FORMAT " %s", BytecodeCounter::counter_value(), bci, tos, tos2, Bytecodes::name(code)); diff --git a/src/hotspot/share/interpreter/interpreter.cpp b/src/hotspot/share/interpreter/interpreter.cpp index 3c4ff4c1749e9..cba26f5aa6a6d 100644 --- a/src/hotspot/share/interpreter/interpreter.cpp +++ b/src/hotspot/share/interpreter/interpreter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -66,7 +66,7 @@ void InterpreterCodelet::verify() {} void InterpreterCodelet::print_on(outputStream* st) const { ttyLocker ttyl; - if (PrintInterpreter) { + if (AbstractInterpreter::should_print_instructions()) { st->cr(); st->print_cr("----------------------------------------------------------------------"); } @@ -76,7 +76,7 @@ void InterpreterCodelet::print_on(outputStream* st) const { st->print_cr("[" INTPTR_FORMAT ", " INTPTR_FORMAT "] %d bytes", p2i(code_begin()), p2i(code_end()), code_size()); - if (PrintInterpreter) { + if (AbstractInterpreter::should_print_instructions()) { st->cr(); Disassembler::decode(code_begin(), code_end(), st NOT_PRODUCT(COMMA &_asm_remarks)); } diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp index 9cd6f5ceffbe9..3f497c3360b7e 100644 --- a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp +++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -192,6 +192,7 @@ void TemplateInterpreterGenerator::generate_all() { method_entry(java_lang_math_sin ) method_entry(java_lang_math_cos ) method_entry(java_lang_math_tan ) + method_entry(java_lang_math_tanh ) method_entry(java_lang_math_abs ) method_entry(java_lang_math_sqrt ) method_entry(java_lang_math_sqrt_strict) @@ -457,6 +458,7 @@ address TemplateInterpreterGenerator::generate_intrinsic_entry(AbstractInterpret case Interpreter::java_lang_math_sin : // fall thru case Interpreter::java_lang_math_cos : // fall thru case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_tanh : // fall thru case Interpreter::java_lang_math_abs : // fall thru case Interpreter::java_lang_math_log : // fall thru case Interpreter::java_lang_math_log10 : // fall thru diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp index bcccff2fe82ec..b0afcb5279522 100644 --- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp +++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -74,7 +74,7 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator { void set_safepoints_for_all_bytes(); // Helpers for generate_and_dispatch - address generate_trace_code(TosState state) PRODUCT_RETURN0; + address generate_trace_code(TosState state) PRODUCT_RETURN_NULL; void count_bytecode() PRODUCT_RETURN; void histogram_bytecode(Template* t) PRODUCT_RETURN; void histogram_bytecode_pair(Template* t) PRODUCT_RETURN; diff --git a/src/hotspot/share/interpreter/zero/zeroInterpreterGenerator.cpp b/src/hotspot/share/interpreter/zero/zeroInterpreterGenerator.cpp index e08d9553c3e07..27ea1b9706719 100644 --- a/src/hotspot/share/interpreter/zero/zeroInterpreterGenerator.cpp +++ b/src/hotspot/share/interpreter/zero/zeroInterpreterGenerator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2009, 2010, 2011 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -54,6 +54,7 @@ void ZeroInterpreterGenerator::generate_all() { method_entry(java_lang_math_sin ); method_entry(java_lang_math_cos ); method_entry(java_lang_math_tan ); + method_entry(java_lang_math_tanh ); method_entry(java_lang_math_abs ); method_entry(java_lang_math_sqrt ); method_entry(java_lang_math_sqrt_strict); @@ -95,6 +96,7 @@ address ZeroInterpreterGenerator::generate_method_entry( case Interpreter::java_lang_math_sin : // fall thru case Interpreter::java_lang_math_cos : // fall thru case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_tanh : // fall thru case Interpreter::java_lang_math_abs : // fall thru case Interpreter::java_lang_math_log : // fall thru case Interpreter::java_lang_math_log10 : // fall thru diff --git a/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSet.cpp b/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSet.cpp index 279b871d8181a..a53eaa474f3f0 100644 --- a/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSet.cpp +++ b/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSet.cpp @@ -484,6 +484,9 @@ static void do_primitives() { static void do_unloading_klass(Klass* klass) { assert(klass != nullptr, "invariant"); assert(_subsystem_callback != nullptr, "invariant"); + if (klass->is_instance_klass() && InstanceKlass::cast(klass)->is_scratch_class()) { + return; + } if (JfrKlassUnloading::on_unload(klass)) { _subsystem_callback->do_artifact(klass); } diff --git a/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp b/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp index fa4b1c75c0573..0773de6ddbaa0 100644 --- a/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp +++ b/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp @@ -116,6 +116,7 @@ class CompilerToVM { static address dsin; static address dcos; static address dtan; + static address dtanh; static address dexp; static address dlog; static address dlog10; diff --git a/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp b/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp index 26c88abec0f18..1612038008a32 100644 --- a/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp +++ b/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp @@ -135,6 +135,7 @@ int CompilerToVM::Data::sizeof_ZStoreBarrierEntry = sizeof(ZStoreBarrierEntry); address CompilerToVM::Data::dsin; address CompilerToVM::Data::dcos; address CompilerToVM::Data::dtan; +address CompilerToVM::Data::dtanh; address CompilerToVM::Data::dexp; address CompilerToVM::Data::dlog; address CompilerToVM::Data::dlog10; @@ -268,6 +269,19 @@ void CompilerToVM::Data::initialize(JVMCI_TRAPS) { SET_TRIGFUNC(dpow); #undef SET_TRIGFUNC + +#define SET_TRIGFUNC_OR_NULL(name) \ + if (StubRoutines::name() != nullptr) { \ + name = StubRoutines::name(); \ + } else { \ + name = nullptr; \ + } + + SET_TRIGFUNC_OR_NULL(dtanh); + +#undef SET_TRIGFUNC_OR_NULL + + } static jboolean is_c1_supported(vmIntrinsics::ID id){ diff --git a/src/hotspot/share/jvmci/jvmci_globals.cpp b/src/hotspot/share/jvmci/jvmci_globals.cpp index 86d8491b73303..2ae38044df0ff 100644 --- a/src/hotspot/share/jvmci/jvmci_globals.cpp +++ b/src/hotspot/share/jvmci/jvmci_globals.cpp @@ -26,6 +26,7 @@ #include "compiler/compilerDefinitions.hpp" #include "gc/shared/gcConfig.hpp" #include "jvm.h" +#include "jvmci/jvmci.hpp" #include "jvmci/jvmci_globals.hpp" #include "logging/log.hpp" #include "runtime/arguments.hpp" @@ -80,20 +81,25 @@ bool JVMCIGlobals::check_jvmci_flags_are_consistent() { CHECK_NOT_SET(LibJVMCICompilerThreadHidden, UseJVMCICompiler) if (UseJVMCICompiler) { - if (FLAG_IS_DEFAULT(UseJVMCINativeLibrary) && !UseJVMCINativeLibrary) { - char path[JVM_MAXPATHLEN]; - if (os::dll_locate_lib(path, sizeof(path), Arguments::get_dll_dir(), JVMCI_SHARED_LIBRARY_NAME)) { - // If a JVMCI native library is present, - // we enable UseJVMCINativeLibrary by default. - FLAG_SET_DEFAULT(UseJVMCINativeLibrary, true); - } - } if (!FLAG_IS_DEFAULT(EnableJVMCI) && !EnableJVMCI) { jio_fprintf(defaultStream::error_stream(), "Improperly specified VM option UseJVMCICompiler: EnableJVMCI cannot be disabled\n"); return false; } FLAG_SET_DEFAULT(EnableJVMCI, true); + } + + if (EnableJVMCI) { + if (FLAG_IS_DEFAULT(UseJVMCINativeLibrary) && !UseJVMCINativeLibrary) { + if (JVMCI::shared_library_exists()) { + // If a JVMCI native library is present, + // we enable UseJVMCINativeLibrary by default. + FLAG_SET_DEFAULT(UseJVMCINativeLibrary, true); + } + } + } + + if (UseJVMCICompiler) { if (BootstrapJVMCI && UseJVMCINativeLibrary) { jio_fprintf(defaultStream::error_stream(), "-XX:+BootstrapJVMCI is not compatible with -XX:+UseJVMCINativeLibrary\n"); return false; diff --git a/src/hotspot/share/jvmci/jvmci_globals.hpp b/src/hotspot/share/jvmci/jvmci_globals.hpp index 1f2c0c647ab1e..4da49b24e6ef9 100644 --- a/src/hotspot/share/jvmci/jvmci_globals.hpp +++ b/src/hotspot/share/jvmci/jvmci_globals.hpp @@ -45,7 +45,7 @@ class fileStream; constraint) \ \ product(bool, EnableJVMCI, false, EXPERIMENTAL, \ - "Enable JVMCI") \ + "Enable JVMCI. Defaults to true if UseJVMCICompiler is true.") \ \ product(bool, UseGraalJIT, false, EXPERIMENTAL, \ "Select the Graal JVMCI compiler. This is an alias for: " \ @@ -140,12 +140,14 @@ class fileStream; product(bool, UseJVMCINativeLibrary, false, EXPERIMENTAL, \ "Execute JVMCI Java code from a shared library (\"libjvmci\") " \ "instead of loading it from class files and executing it " \ - "on the HotSpot heap. Defaults to true if EnableJVMCIProduct is " \ - "true and a JVMCI native library is available.") \ + "on the HotSpot heap. Defaults to true if UseJVMCICompiler or " \ + "EnableJVMCI is true and a JVMCI native library is available.") \ \ - product(double, JVMCINativeLibraryThreadFraction, 0.33, EXPERIMENTAL, \ + product(double, JVMCINativeLibraryThreadFraction, 0.66, EXPERIMENTAL, \ "The fraction of compiler threads used by libjvmci. " \ - "The remaining compiler threads are used by C1.") \ + "The remaining compiler threads are used by C1. " \ + "Reducing this value could reduce the max RSS but " \ + "also increase the warmup time.") \ range(0.0, 1.0) \ \ product(ccstr, JVMCINativeLibraryErrorFile, nullptr, EXPERIMENTAL, \ diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp index df77e8a2882ee..5452cca96b8c0 100644 --- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp @@ -38,6 +38,7 @@ #include "runtime/continuationEntry.hpp" #include "runtime/deoptimization.hpp" #include "runtime/flags/jvmFlag.hpp" +#include "runtime/objectMonitor.hpp" #include "runtime/osThread.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -246,6 +247,7 @@ nonstatic_field(JavaThread, _lock_stack, LockStack) \ nonstatic_field(JavaThread, _om_cache, OMCache) \ nonstatic_field(JavaThread, _cont_entry, ContinuationEntry*) \ + nonstatic_field(JavaThread, _unlocked_inflated_monitor, ObjectMonitor*) \ JVMTI_ONLY(nonstatic_field(JavaThread, _is_in_VTMS_transition, bool)) \ JVMTI_ONLY(nonstatic_field(JavaThread, _is_in_tmp_VTMS_transition, bool)) \ JVMTI_ONLY(nonstatic_field(JavaThread, _is_disable_suspend, bool)) \ diff --git a/src/hotspot/share/logging/logSelection.cpp b/src/hotspot/share/logging/logSelection.cpp index aea5719b36d4f..1e7ba3a887848 100644 --- a/src/hotspot/share/logging/logSelection.cpp +++ b/src/hotspot/share/logging/logSelection.cpp @@ -33,11 +33,11 @@ const LogSelection LogSelection::Invalid; -LogSelection::LogSelection() : _ntags(0), _wildcard(false), _level(LogLevel::Invalid), _tag_sets_selected(0) { +LogSelection::LogSelection() : _ntags(0), _tags(), _wildcard(false), _level(LogLevel::Invalid), _tag_sets_selected(0) { } LogSelection::LogSelection(const LogTagType tags[LogTag::MaxTags], bool wildcard, LogLevelType level) - : _ntags(0), _wildcard(wildcard), _level(level), _tag_sets_selected(0) { + : _ntags(0), _tags(), _wildcard(wildcard), _level(level), _tag_sets_selected(0) { while (_ntags < LogTag::MaxTags && tags[_ntags] != LogTag::__NO_TAG) { _tags[_ntags] = tags[_ntags]; _ntags++; diff --git a/src/hotspot/share/nmt/mallocHeader.hpp b/src/hotspot/share/nmt/mallocHeader.hpp index c76e61fb4b5a2..6711c2b993e6f 100644 --- a/src/hotspot/share/nmt/mallocHeader.hpp +++ b/src/hotspot/share/nmt/mallocHeader.hpp @@ -127,6 +127,7 @@ class MallocHeader { inline MallocHeader(size_t size, MemTag mem_tag, uint32_t mst_marker); + inline static size_t malloc_overhead() { return sizeof(MallocHeader) + sizeof(uint16_t); } inline size_t size() const { return _size; } inline MemTag mem_tag() const { return _mem_tag; } inline uint32_t mst_marker() const { return _mst_marker; } diff --git a/src/hotspot/share/nmt/mallocTracker.hpp b/src/hotspot/share/nmt/mallocTracker.hpp index 39d120433ef02..de30f32373edf 100644 --- a/src/hotspot/share/nmt/mallocTracker.hpp +++ b/src/hotspot/share/nmt/mallocTracker.hpp @@ -166,7 +166,7 @@ class MallocMemorySnapshot { } inline size_t malloc_overhead() const { - return _all_mallocs.count() * sizeof(MallocHeader); + return _all_mallocs.count() * MallocHeader::malloc_overhead(); } // Total malloc invocation count @@ -269,7 +269,7 @@ class MallocTracker : AllStatic { // The overhead that is incurred by switching on NMT (we need, per malloc allocation, // space for header and 16-bit footer) - static const size_t overhead_per_malloc = sizeof(MallocHeader) + sizeof(uint16_t); + static inline size_t overhead_per_malloc() { return MallocHeader::malloc_overhead(); } // Parameter name convention: // memblock : the beginning address for user data diff --git a/src/hotspot/share/nmt/memTracker.hpp b/src/hotspot/share/nmt/memTracker.hpp index 31b1e66b8a6b3..6ba1db2e7ffe6 100644 --- a/src/hotspot/share/nmt/memTracker.hpp +++ b/src/hotspot/share/nmt/memTracker.hpp @@ -72,7 +72,7 @@ class MemTracker : AllStatic { // Per-malloc overhead incurred by NMT, depending on the current NMT level static size_t overhead_per_malloc() { - return enabled() ? MallocTracker::overhead_per_malloc : 0; + return enabled() ? MallocTracker::overhead_per_malloc() : 0; } static inline void* record_malloc(void* mem_base, size_t size, MemTag mem_tag, diff --git a/src/hotspot/share/oops/compressedOops.cpp b/src/hotspot/share/oops/compressedOops.cpp index 98a4438383a79..ec41dd8521918 100644 --- a/src/hotspot/share/oops/compressedOops.cpp +++ b/src/hotspot/share/oops/compressedOops.cpp @@ -61,7 +61,7 @@ void CompressedOops::initialize(const ReservedHeapSpace& heap_space) { } if ((uint64_t)heap_space.end() <= OopEncodingHeapMax) { // Did reserve heap below 32Gb. Can use base == 0; - set_base(0); + set_base(nullptr); } else { set_base((address)heap_space.compressed_oop_base()); } @@ -115,7 +115,7 @@ CompressedOops::Mode CompressedOops::mode() { return DisjointBaseNarrowOop; } - if (base() != 0) { + if (base() != nullptr) { return HeapBasedNarrowOop; } @@ -166,7 +166,7 @@ void CompressedOops::print_mode(outputStream* st) { st->print(", Compressed Oops mode: %s", mode_to_string(mode())); - if (base() != 0) { + if (base() != nullptr) { st->print(": " PTR_FORMAT, p2i(base())); } diff --git a/src/hotspot/share/oops/constantPool.hpp b/src/hotspot/share/oops/constantPool.hpp index 7a17c62ddaf99..bcc9a08dd6ca0 100644 --- a/src/hotspot/share/oops/constantPool.hpp +++ b/src/hotspot/share/oops/constantPool.hpp @@ -37,6 +37,7 @@ #include "utilities/align.hpp" #include "utilities/bytes.hpp" #include "utilities/constantTag.hpp" +#include "utilities/macros.hpp" #include "utilities/resourceHash.hpp" // A ConstantPool is an array containing class constants as described in the @@ -781,7 +782,7 @@ class ConstantPool : public Metadata { int pre_resolve_shared_klasses(TRAPS); // Debugging - const char* printable_name_at(int cp_index) PRODUCT_RETURN0; + const char* printable_name_at(int cp_index) PRODUCT_RETURN_NULL; private: diff --git a/src/hotspot/share/oops/instanceKlass.cpp b/src/hotspot/share/oops/instanceKlass.cpp index fd198f54fc957..6b6d35ee026de 100644 --- a/src/hotspot/share/oops/instanceKlass.cpp +++ b/src/hotspot/share/oops/instanceKlass.cpp @@ -2721,6 +2721,13 @@ static void clear_all_breakpoints(Method* m) { #endif void InstanceKlass::unload_class(InstanceKlass* ik) { + + if (ik->is_scratch_class()) { + assert(ik->dependencies().is_empty(), "dependencies should be empty for scratch classes"); + return; + } + assert(ik->is_loaded(), "class should be loaded " PTR_FORMAT, p2i(ik)); + // Release dependencies. ik->dependencies().remove_all_dependents(); @@ -4096,7 +4103,7 @@ void InstanceKlass::set_init_state(ClassState state) { assert(good_state || state == allocated, "illegal state transition"); #endif assert(_init_thread == nullptr, "should be cleared before state change"); - _init_state = state; + Atomic::release_store(&_init_state, state); } #if INCLUDE_JVMTI diff --git a/src/hotspot/share/oops/instanceKlass.hpp b/src/hotspot/share/oops/instanceKlass.hpp index eaffa0250d133..45d65f273c866 100644 --- a/src/hotspot/share/oops/instanceKlass.hpp +++ b/src/hotspot/share/oops/instanceKlass.hpp @@ -507,14 +507,14 @@ class InstanceKlass: public Klass { public: // initialization state - bool is_loaded() const { return _init_state >= loaded; } - bool is_linked() const { return _init_state >= linked; } - bool is_initialized() const { return _init_state == fully_initialized; } - bool is_not_initialized() const { return _init_state < being_initialized; } - bool is_being_initialized() const { return _init_state == being_initialized; } - bool is_in_error_state() const { return _init_state == initialization_error; } + bool is_loaded() const { return init_state() >= loaded; } + bool is_linked() const { return init_state() >= linked; } + bool is_initialized() const { return init_state() == fully_initialized; } + bool is_not_initialized() const { return init_state() < being_initialized; } + bool is_being_initialized() const { return init_state() == being_initialized; } + bool is_in_error_state() const { return init_state() == initialization_error; } bool is_reentrant_initialization(Thread *thread) { return thread == _init_thread; } - ClassState init_state() const { return _init_state; } + ClassState init_state() const { return Atomic::load_acquire(&_init_state); } const char* init_state_name() const; bool is_rewritten() const { return _misc_flags.rewritten(); } diff --git a/src/hotspot/share/opto/addnode.cpp b/src/hotspot/share/opto/addnode.cpp index 9a7d93dc469ba..802af20adae12 100644 --- a/src/hotspot/share/opto/addnode.cpp +++ b/src/hotspot/share/opto/addnode.cpp @@ -395,9 +395,159 @@ Node* AddNode::IdealIL(PhaseGVN* phase, bool can_reshape, BasicType bt) { } } + // Convert a + a + ... + a into a*n + Node* serial_additions = convert_serial_additions(phase, bt); + if (serial_additions != nullptr) { + return serial_additions; + } + return AddNode::Ideal(phase, can_reshape); } +// Try to convert a serial of additions into a single multiplication. Also convert `(a * CON) + a` to `(CON + 1) * a` as +// a side effect. On success, a new MulNode is returned. +Node* AddNode::convert_serial_additions(PhaseGVN* phase, BasicType bt) { + // We need to make sure that the current AddNode is not part of a MulNode that has already been optimized to a + // power-of-2 addition (e.g., 3 * a => (a << 2) + a). Without this check, GVN would keep trying to optimize the same + // node and can't progress. For example, 3 * a => (a << 2) + a => 3 * a => (a << 2) + a => ... + if (find_power_of_two_addition_pattern(this, bt, nullptr) != nullptr) { + return nullptr; + } + + Node* in1 = in(1); + Node* in2 = in(2); + jlong multiplier; + + // While multiplications can be potentially optimized to power-of-2 subtractions (e.g., a * 7 => (a << 3) - a), + // (x - y) + y => x is already handled by the Identity() methods. So, we don't need to check for that pattern here. + if (find_simple_addition_pattern(in1, bt, &multiplier) == in2 + || find_simple_lshift_pattern(in1, bt, &multiplier) == in2 + || find_simple_multiplication_pattern(in1, bt, &multiplier) == in2 + || find_power_of_two_addition_pattern(in1, bt, &multiplier) == in2) { + multiplier++; // +1 for the in2 term + + Node* con = (bt == T_INT) + ? (Node*) phase->intcon((jint) multiplier) // intentional type narrowing to allow overflow at max_jint + : (Node*) phase->longcon(multiplier); + return MulNode::make(con, in2, bt); + } + + return nullptr; +} + +// Try to match `a + a`. On success, return `a` and set `2` as `multiplier`. +// The method matches `n` for pattern: AddNode(a, a). +Node* AddNode::find_simple_addition_pattern(Node* n, BasicType bt, jlong* multiplier) { + if (n->Opcode() == Op_Add(bt) && n->in(1) == n->in(2)) { + *multiplier = 2; + return n->in(1); + } + + return nullptr; +} + +// Try to match `a << CON`. On success, return `a` and set `1 << CON` as `multiplier`. +// Match `n` for pattern: LShiftNode(a, CON). +// Note that the power-of-2 multiplication optimization could potentially convert a MulNode to this pattern. +Node* AddNode::find_simple_lshift_pattern(Node* n, BasicType bt, jlong* multiplier) { + // Note that power-of-2 multiplication optimization could potentially convert a MulNode to this pattern + if (n->Opcode() == Op_LShift(bt) && n->in(2)->is_Con()) { + Node* con = n->in(2); + if (con->is_top()) { + return nullptr; + } + + *multiplier = ((jlong) 1 << con->get_int()); + return n->in(1); + } + + return nullptr; +} + +// Try to match `CON * a`. On success, return `a` and set `CON` as `multiplier`. +// Match `n` for patterns: +// - MulNode(CON, a) +// - MulNode(a, CON) +Node* AddNode::find_simple_multiplication_pattern(Node* n, BasicType bt, jlong* multiplier) { + // This optimization technically only produces MulNode(CON, a), but we might as match MulNode(a, CON), too. + if (n->Opcode() == Op_Mul(bt) && (n->in(1)->is_Con() || n->in(2)->is_Con())) { + Node* con = n->in(1); + Node* base = n->in(2); + + // swap ConNode to lhs for easier matching + if (!con->is_Con()) { + swap(con, base); + } + + if (con->is_top()) { + return nullptr; + } + + *multiplier = con->get_integer_as_long(bt); + return base; + } + + return nullptr; +} + +// Try to match `(a << CON1) + (a << CON2)`. On success, return `a` and set `(1 << CON1) + (1 << CON2)` as `multiplier`. +// Match `n` for patterns: +// - AddNode(LShiftNode(a, CON), LShiftNode(a, CON)/a) +// - AddNode(LShiftNode(a, CON)/a, LShiftNode(a, CON)) +// given that lhs is different from rhs. +// Note that one of the term of the addition could simply be `a` (i.e., a << 0). Calling this function with `multiplier` +// being null is safe. +Node* AddNode::find_power_of_two_addition_pattern(Node* n, BasicType bt, jlong* multiplier) { + if (n->Opcode() == Op_Add(bt) && n->in(1) != n->in(2)) { + Node* lhs = n->in(1); + Node* rhs = n->in(2); + + // swap LShiftNode to lhs for easier matching + if (lhs->Opcode() != Op_LShift(bt)) { + swap(lhs, rhs); + } + + // AddNode(LShiftNode(a, CON), *)? + if (lhs->Opcode() != Op_LShift(bt) || !lhs->in(2)->is_Con()) { + return nullptr; + } + + jlong lhs_multiplier = 0; + if (multiplier != nullptr) { + Node* con = lhs->in(2); + if (con->is_top()) { + return nullptr; + } + + lhs_multiplier = (jlong) 1 << con->get_int(); + } + + // AddNode(LShiftNode(a, CON), a)? + if (lhs->in(1) == rhs) { + if (multiplier != nullptr) { + *multiplier = lhs_multiplier + 1; + } + + return rhs; + } + + // AddNode(LShiftNode(a, CON), LShiftNode(a, CON2))? + if (rhs->Opcode() == Op_LShift(bt) && lhs->in(1) == rhs->in(1) && rhs->in(2)->is_Con()) { + if (multiplier != nullptr) { + Node* con = rhs->in(2); + if (con->is_top()) { + return nullptr; + } + + *multiplier = lhs_multiplier + ((jlong) 1 << con->get_int()); + } + + return lhs->in(1); + } + return nullptr; + } + return nullptr; +} Node* AddINode::Ideal(PhaseGVN* phase, bool can_reshape) { Node* in1 = in(1); diff --git a/src/hotspot/share/opto/addnode.hpp b/src/hotspot/share/opto/addnode.hpp index 8879606954a52..8afbb440572bf 100644 --- a/src/hotspot/share/opto/addnode.hpp +++ b/src/hotspot/share/opto/addnode.hpp @@ -42,6 +42,13 @@ typedef const Pair ConstAddOperands; // by virtual functions. class AddNode : public Node { virtual uint hash() const; + + Node* convert_serial_additions(PhaseGVN* phase, BasicType bt); + static Node* find_simple_addition_pattern(Node* n, BasicType bt, jlong* multiplier); + static Node* find_simple_lshift_pattern(Node* n, BasicType bt, jlong* multiplier); + static Node* find_simple_multiplication_pattern(Node* n, BasicType bt, jlong* multiplier); + static Node* find_power_of_two_addition_pattern(Node* n, BasicType bt, jlong* multiplier); + public: AddNode( Node *in1, Node *in2 ) : Node(nullptr,in1,in2) { init_class_id(Class_Add); diff --git a/src/hotspot/share/opto/block.cpp b/src/hotspot/share/opto/block.cpp index 1af085cd1282d..b39db528691de 100644 --- a/src/hotspot/share/opto/block.cpp +++ b/src/hotspot/share/opto/block.cpp @@ -398,7 +398,10 @@ PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher) Node *x = new GotoNode(nullptr); x->init_req(0, x); _goto = matcher.match_tree(x); - assert(_goto != nullptr, ""); + assert(_goto != nullptr || C->failure_is_artificial(), ""); + if (C->failing()) { + return; + } _goto->set_req(0,_goto); // Build the CFG in Reverse Post Order diff --git a/src/hotspot/share/opto/buildOopMap.cpp b/src/hotspot/share/opto/buildOopMap.cpp index 4591e87da2d14..b553cc6ea6949 100644 --- a/src/hotspot/share/opto/buildOopMap.cpp +++ b/src/hotspot/share/opto/buildOopMap.cpp @@ -235,6 +235,13 @@ OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, i Node *def = _defs[reg]; // Get reaching def assert( def, "since live better have reaching def" ); + if (def->is_MachTemp()) { + assert(!def->bottom_type()->isa_oop_ptr(), + "ADLC only assigns OOP types to MachTemp defs corresponding to xRegN operands"); + // Exclude MachTemp definitions even if they are typed as oops. + continue; + } + // Classify the reaching def as oop, derived, callee-save, dead, or other const Type *t = def->bottom_type(); if( t->isa_oop_ptr() ) { // Oop or derived? diff --git a/src/hotspot/share/opto/c2_CodeStubs.hpp b/src/hotspot/share/opto/c2_CodeStubs.hpp index 5db7596e072dc..e778cfcde47e6 100644 --- a/src/hotspot/share/opto/c2_CodeStubs.hpp +++ b/src/hotspot/share/opto/c2_CodeStubs.hpp @@ -105,7 +105,6 @@ class C2FastUnlockLightweightStub : public C2CodeStub { Register _thread; Label _slow_path; Label _push_and_slow_path; - Label _check_successor; Label _unlocked_continuation; public: C2FastUnlockLightweightStub(Register obj, Register mark, Register t, Register thread) : C2CodeStub(), @@ -114,26 +113,10 @@ class C2FastUnlockLightweightStub : public C2CodeStub { void emit(C2_MacroAssembler& masm); Label& slow_path() { return _slow_path; } Label& push_and_slow_path() { return _push_and_slow_path; } - Label& check_successor() { return _check_successor; } Label& unlocked_continuation() { return _unlocked_continuation; } Label& slow_path_continuation() { return continuation(); } }; -#ifdef _LP64 -class C2HandleAnonOMOwnerStub : public C2CodeStub { -private: - Register _monitor; - Register _tmp; -public: - C2HandleAnonOMOwnerStub(Register monitor, Register tmp = noreg) : C2CodeStub(), - _monitor(monitor), _tmp(tmp) {} - Register monitor() { return _monitor; } - Register tmp() { return _tmp; } - int max_size() const; - void emit(C2_MacroAssembler& masm); -}; -#endif - //-----------------------------C2GeneralStub----------------------------------- // A generalized stub that can be used to implement an arbitrary stub in a // type-safe manner. An example: diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index 42de77acca931..c14162ddf6eed 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -70,6 +70,14 @@ develop(bool, StressMethodHandleLinkerInlining, false, \ "Stress inlining through method handle linkers") \ \ + develop(bool, StressBailout, false, \ + "Perform bailouts randomly at C2 failing() checks") \ + \ + develop(uint, StressBailoutMean, 100000, \ + "The expected number of failing() checks made until " \ + "a random bailout.") \ + range(1, max_juint) \ + \ develop(intx, OptoPrologueNops, 0, \ "Insert this many extra nop instructions " \ "in the prologue of every nmethod") \ diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp index 2f087858efd48..151c320cadde6 100644 --- a/src/hotspot/share/opto/c2compiler.cpp +++ b/src/hotspot/share/opto/c2compiler.cpp @@ -610,6 +610,7 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) { case vmIntrinsics::_dsin: case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: + case vmIntrinsics::_dtanh: case vmIntrinsics::_dabs: case vmIntrinsics::_fabs: case vmIntrinsics::_iabs: @@ -810,6 +811,7 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) { case vmIntrinsics::_VectorFromBitsCoerced: case vmIntrinsics::_VectorShuffleIota: case vmIntrinsics::_VectorShuffleToVector: + case vmIntrinsics::_VectorWrapShuffleIndexes: case vmIntrinsics::_VectorLoadOp: case vmIntrinsics::_VectorLoadMaskedOp: case vmIntrinsics::_VectorStoreOp: @@ -820,6 +822,7 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) { case vmIntrinsics::_VectorTest: case vmIntrinsics::_VectorBlend: case vmIntrinsics::_VectorRearrange: + case vmIntrinsics::_VectorSelectFrom: case vmIntrinsics::_VectorCompare: case vmIntrinsics::_VectorBroadcastInt: case vmIntrinsics::_VectorConvert: diff --git a/src/hotspot/share/opto/callnode.cpp b/src/hotspot/share/opto/callnode.cpp index d715e6533432e..e800b3c736bf2 100644 --- a/src/hotspot/share/opto/callnode.cpp +++ b/src/hotspot/share/opto/callnode.cpp @@ -755,7 +755,7 @@ Node *CallNode::match( const ProjNode *proj, const Matcher *match ) { if (Opcode() == Op_CallLeafVector) { // If the return is in vector, compute appropriate regmask taking into account the whole range - if(ideal_reg >= Op_VecS && ideal_reg <= Op_VecZ) { + if(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ) { if(OptoReg::is_valid(regs.second())) { for (OptoReg::Name r = regs.first(); r <= regs.second(); r = OptoReg::add(r, 1)) { rm.Insert(r); diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp index 220b916436ea8..be0aadacbc2b9 100644 --- a/src/hotspot/share/opto/chaitin.cpp +++ b/src/hotspot/share/opto/chaitin.cpp @@ -479,6 +479,9 @@ void PhaseChaitin::Register_Allocate() { } uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena); // Split spilling LRG everywhere + if (C->failing()) { + return; + } _lrg_map.set_max_lrg_id(new_max_lrg_id); // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor) // or we failed to split @@ -551,6 +554,9 @@ void PhaseChaitin::Register_Allocate() { return; } uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena); // Split spilling LRG everywhere + if (C->failing()) { + return; + } _lrg_map.set_max_lrg_id(new_max_lrg_id); // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor) C->check_node_count(2 * NodeLimitFudgeFactor, "out of nodes after split"); diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index 5abc398cb8ce2..fa0d39057cb12 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -720,7 +720,7 @@ Compile::Compile( ciEnv* ci_env, ciMethod* target, int osr_bci, } if (StressLCM || StressGCM || StressIGVN || StressCCP || - StressIncrementalInlining || StressMacroExpansion || StressUnstableIfTraps) { + StressIncrementalInlining || StressMacroExpansion || StressUnstableIfTraps || StressBailout) { initialize_stress_seed(directive); } @@ -798,7 +798,7 @@ Compile::Compile( ciEnv* ci_env, ciMethod* target, int osr_bci, assert(failure_reason() != nullptr, "expect reason for parse failure"); stringStream ss; ss.print("method parse failed: %s", failure_reason()); - record_method_not_compilable(ss.as_string()); + record_method_not_compilable(ss.as_string() DEBUG_ONLY(COMMA true)); return; } GraphKit kit(jvms); @@ -973,7 +973,7 @@ Compile::Compile( ciEnv* ci_env, _types = new (comp_arena()) Type_Array(comp_arena()); _node_hash = new (comp_arena()) NodeHash(comp_arena(), 255); - if (StressLCM || StressGCM) { + if (StressLCM || StressGCM || StressBailout) { initialize_stress_seed(directive); } @@ -1018,6 +1018,7 @@ void Compile::Init(bool aliasing) { #ifdef ASSERT _phase_optimize_finished = false; + _phase_verify_ideal_loop = false; _exception_backedge = false; _type_verify = nullptr; #endif @@ -1108,7 +1109,7 @@ void Compile::Init(bool aliasing) { #ifdef ASSERT // Verify that the current StartNode is valid. void Compile::verify_start(StartNode* s) const { - assert(failing() || s == start(), "should be StartNode"); + assert(failing_internal() || s == start(), "should be StartNode"); } #endif @@ -1118,7 +1119,7 @@ void Compile::verify_start(StartNode* s) const { * the ideal graph. */ StartNode* Compile::start() const { - assert (!failing(), "Must not have pending failure. Reason is: %s", failure_reason()); + assert (!failing_internal() || C->failure_is_artificial(), "Must not have pending failure. Reason is: %s", failure_reason()); for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) { Node* start = root()->fast_out(i); if (start->is_Start()) { @@ -1465,12 +1466,18 @@ const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const { } else { ciInstanceKlass *canonical_holder = ik->get_canonical_holder(offset); assert(offset < canonical_holder->layout_helper_size_in_bytes(), ""); - if (!ik->equals(canonical_holder) || tj->offset() != offset) { - if( is_known_inst ) { - tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, true, nullptr, offset, to->instance_id()); - } else { - tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, false, nullptr, offset); - } + assert(tj->offset() == offset, "no change to offset expected"); + bool xk = to->klass_is_exact(); + int instance_id = to->instance_id(); + + // If the input type's class is the holder: if exact, the type only includes interfaces implemented by the holder + // but if not exact, it may include extra interfaces: build new type from the holder class to make sure only + // its interfaces are included. + if (xk && ik->equals(canonical_holder)) { + assert(tj == TypeInstPtr::make(to->ptr(), canonical_holder, is_known_inst, nullptr, offset, instance_id), "exact type should be canonical type"); + } else { + assert(xk || !is_known_inst, "Known instance should be exact type"); + tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, is_known_inst, nullptr, offset, instance_id); } } } @@ -2114,7 +2121,7 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) { igvn_worklist()->ensure_empty(); // should be done with igvn while (inline_incrementally_one()) { - assert(!failing(), "inconsistent"); + assert(!failing_internal() || failure_is_artificial(), "inconsistent"); } if (failing()) return; @@ -2157,7 +2164,7 @@ void Compile::process_late_inline_calls_no_inline(PhaseIterGVN& igvn) { igvn_worklist()->ensure_empty(); // should be done with igvn while (inline_incrementally_one()) { - assert(!failing(), "inconsistent"); + assert(!failing_internal() || failure_is_artificial(), "inconsistent"); } if (failing()) return; @@ -2944,6 +2951,9 @@ void Compile::Code_Gen() { // Build a proper-looking CFG PhaseCFG cfg(node_arena(), root(), matcher); + if (failing()) { + return; + } _cfg = &cfg; { TracePhase tp("scheduler", &timers[_t_scheduler]); @@ -4329,7 +4339,7 @@ void Compile::verify_graph_edges(bool no_dead_code) { // to backtrack and retry without subsuming loads. Other than this backtracking // behavior, the Compile's failure reason is quietly copied up to the ciEnv // by the logic in C2Compiler. -void Compile::record_failure(const char* reason) { +void Compile::record_failure(const char* reason DEBUG_ONLY(COMMA bool allow_multiple_failures)) { if (log() != nullptr) { log()->elem("failure reason='%s' phase='compile'", reason); } @@ -4339,6 +4349,8 @@ void Compile::record_failure(const char* reason) { if (CaptureBailoutInformation) { _first_failure_details = new CompilationFailureInfo(reason); } + } else { + assert(!StressBailout || allow_multiple_failures, "should have handled previous failure."); } if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { @@ -4366,7 +4378,9 @@ Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator) } Compile::TracePhase::~TracePhase() { - if (_compile->failing()) return; + if (_compile->failing_internal()) { + return; // timing code, not stressing bailouts. + } #ifdef ASSERT if (PrintIdealNodeCount) { tty->print_cr("phase name='%s' nodes='%d' live='%d' live_graph_walk='%d'", @@ -5057,6 +5071,22 @@ bool Compile::randomized_select(int count) { return (random() & RANDOMIZED_DOMAIN_MASK) < (RANDOMIZED_DOMAIN / count); } +#ifdef ASSERT +// Failures are geometrically distributed with probability 1/StressBailoutMean. +bool Compile::fail_randomly() { + if ((random() % StressBailoutMean) != 0) { + return false; + } + record_failure("StressBailout"); + return true; +} + +bool Compile::failure_is_artificial() { + assert(failing_internal(), "should be failing"); + return C->failure_reason_is("StressBailout"); +} +#endif + CloneMap& Compile::clone_map() { return _clone_map; } void Compile::set_clone_map(Dict* d) { _clone_map._dict = d; } @@ -5144,7 +5174,7 @@ void Compile::sort_macro_nodes() { } void Compile::print_method(CompilerPhaseType cpt, int level, Node* n) { - if (failing()) { return; } + if (failing_internal()) { return; } // failing_internal to not stress bailouts from printing code. EventCompilerPhase event(UNTIMED); if (event.should_commit()) { CompilerEvent::PhaseEvent::post(event, C->_latest_stage_start_counter, cpt, C->_compile_id, level); diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp index 13ad980434b79..1ccfedd0d460f 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp @@ -391,6 +391,8 @@ class Compile : public Phase { DEBUG_ONLY(Unique_Node_List* _modified_nodes;) // List of nodes which inputs were modified DEBUG_ONLY(bool _phase_optimize_finished;) // Used for live node verification while creating new nodes + DEBUG_ONLY(bool _phase_verify_ideal_loop;) // Are we in PhaseIdealLoop verification? + // Arenas for new-space and old-space nodes. // Swapped between using _node_arena. // The lifetime of the old-space nodes is during xform. @@ -786,6 +788,12 @@ class Compile : public Phase { void set_post_loop_opts_phase() { _post_loop_opts_phase = true; } void reset_post_loop_opts_phase() { _post_loop_opts_phase = false; } +#ifdef ASSERT + bool phase_verify_ideal_loop() const { return _phase_verify_ideal_loop; } + void set_phase_verify_ideal_loop() { _phase_verify_ideal_loop = true; } + void reset_phase_verify_ideal_loop() { _phase_verify_ideal_loop = false; } +#endif + bool allow_macro_nodes() { return _allow_macro_nodes; } void reset_allow_macro_nodes() { _allow_macro_nodes = false; } @@ -815,7 +823,7 @@ class Compile : public Phase { ciEnv* env() const { return _env; } CompileLog* log() const { return _log; } - bool failing() const { + bool failing_internal() const { return _env->failing() || _failure_reason.get() != nullptr; } @@ -827,6 +835,27 @@ class Compile : public Phase { const CompilationFailureInfo* first_failure_details() const { return _first_failure_details; } + bool failing() { + if (failing_internal()) { + return true; + } +#ifdef ASSERT + // Disable stress code for PhaseIdealLoop verification (would have cascading effects). + if (phase_verify_ideal_loop()) { + return false; + } + if (StressBailout) { + return fail_randomly(); + } +#endif + return false; + } + +#ifdef ASSERT + bool fail_randomly(); + bool failure_is_artificial(); +#endif + bool failure_reason_is(const char* r) const { return (r == _failure_reason.get()) || (r != nullptr && @@ -834,11 +863,11 @@ class Compile : public Phase { strcmp(r, _failure_reason.get()) == 0); } - void record_failure(const char* reason); - void record_method_not_compilable(const char* reason) { + void record_failure(const char* reason DEBUG_ONLY(COMMA bool allow_multiple_failures = false)); + void record_method_not_compilable(const char* reason DEBUG_ONLY(COMMA bool allow_multiple_failures = false)) { env()->record_method_not_compilable(reason); // Record failure reason. - record_failure(reason); + record_failure(reason DEBUG_ONLY(COMMA allow_multiple_failures)); } bool check_node_count(uint margin, const char* reason) { if (oom()) { diff --git a/src/hotspot/share/opto/gcm.cpp b/src/hotspot/share/opto/gcm.cpp index 4646e1bb9c701..9ba571b926d0d 100644 --- a/src/hotspot/share/opto/gcm.cpp +++ b/src/hotspot/share/opto/gcm.cpp @@ -746,6 +746,21 @@ Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) { // The anti-dependence constraints apply only to the fringe of this tree. Node* initial_mem = load->in(MemNode::Memory); + + // We don't optimize the memory graph for pinned loads, so we may need to raise the + // root of our search tree through the corresponding slices of MergeMem nodes to + // get to the node that really creates the memory state for this slice. + if (load_alias_idx >= Compile::AliasIdxRaw) { + while (initial_mem->is_MergeMem()) { + MergeMemNode* mm = initial_mem->as_MergeMem(); + Node* p = mm->memory_at(load_alias_idx); + if (p != mm->base_memory()) { + initial_mem = p; + } else { + break; + } + } + } worklist_def_use_mem_states.push(nullptr, initial_mem); while (worklist_def_use_mem_states.is_nonempty()) { // Examine a nearby store to see if it might interfere with our load. @@ -1512,8 +1527,8 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) { C->record_failure(C2Compiler::retry_no_subsuming_loads()); } else { // Bailout without retry when (early->_dom_depth > LCA->_dom_depth) - assert(false, "graph should be schedulable"); - C->record_method_not_compilable("late schedule failed: incorrect graph"); + assert(C->failure_is_artificial(), "graph should be schedulable"); + C->record_method_not_compilable("late schedule failed: incorrect graph" DEBUG_ONLY(COMMA true)); } return; } @@ -1693,8 +1708,8 @@ void PhaseCFG::global_code_motion() { Block* block = get_block(i); if (!schedule_local(block, ready_cnt, visited, recalc_pressure_nodes)) { if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { - assert(false, "local schedule failed"); - C->record_method_not_compilable("local schedule failed"); + assert(C->failure_is_artificial(), "local schedule failed"); + C->record_method_not_compilable("local schedule failed" DEBUG_ONLY(COMMA true)); } _regalloc = nullptr; return; diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp index 3bc5b9a8b2a7d..27120c5ea1e73 100644 --- a/src/hotspot/share/opto/graphKit.cpp +++ b/src/hotspot/share/opto/graphKit.cpp @@ -340,7 +340,9 @@ static inline void add_one_req(Node* dstphi, Node* src) { // having a control input of its exception map, rather than null. Such // regions do not appear except in this function, and in use_exception_state. void GraphKit::combine_exception_states(SafePointNode* ex_map, SafePointNode* phi_map) { - if (failing()) return; // dying anyway... + if (failing_internal()) { + return; // dying anyway... + } JVMState* ex_jvms = ex_map->_jvms; assert(ex_jvms->same_calls_as(phi_map->_jvms), "consistent call chains"); assert(ex_jvms->stkoff() == phi_map->_jvms->stkoff(), "matching locals"); @@ -446,7 +448,7 @@ void GraphKit::combine_exception_states(SafePointNode* ex_map, SafePointNode* ph //--------------------------use_exception_state-------------------------------- Node* GraphKit::use_exception_state(SafePointNode* phi_map) { - if (failing()) { stop(); return top(); } + if (failing_internal()) { stop(); return top(); } Node* region = phi_map->control(); Node* hidden_merge_mark = root(); assert(phi_map->jvms()->map() == phi_map, "sanity: 1-1 relation"); @@ -1556,6 +1558,7 @@ Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, bool mismatched, bool unsafe, uint8_t barrier_data) { + assert(adr_idx == C->get_alias_index(_gvn.type(adr)->isa_ptr()), "slice of address and input slice don't match"); assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" ); const TypePtr* adr_type = nullptr; // debug-mode-only argument debug_only(adr_type = C->get_adr_type(adr_idx)); @@ -1585,6 +1588,7 @@ Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt, bool unsafe, int barrier_data) { assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" ); + assert(adr_idx == C->get_alias_index(_gvn.type(adr)->isa_ptr()), "slice of address and input slice don't match"); const TypePtr* adr_type = nullptr; debug_only(adr_type = C->get_adr_type(adr_idx)); Node *mem = memory(adr_idx); @@ -2056,7 +2060,9 @@ Node* GraphKit::uncommon_trap(int trap_request, ciKlass* klass, const char* comment, bool must_throw, bool keep_exact_action) { - if (failing()) stop(); + if (failing_internal()) { + stop(); + } if (stopped()) return nullptr; // trap reachable? // Note: If ProfileTraps is true, and if a deopt. actually @@ -3008,7 +3014,7 @@ void GraphKit::guard_klass_being_initialized(Node* klass) { Node* adr = basic_plus_adr(top(), klass, init_state_off); Node* init_state = LoadNode::make(_gvn, nullptr, immutable_memory(), adr, adr->bottom_type()->is_ptr(), TypeInt::BYTE, - T_BYTE, MemNode::unordered); + T_BYTE, MemNode::acquire); init_state = _gvn.transform(init_state); Node* being_initialized_state = makecon(TypeInt::make(InstanceKlass::being_initialized)); diff --git a/src/hotspot/share/opto/graphKit.hpp b/src/hotspot/share/opto/graphKit.hpp index e7f17c72a1b99..421ce933ed1f5 100644 --- a/src/hotspot/share/opto/graphKit.hpp +++ b/src/hotspot/share/opto/graphKit.hpp @@ -82,7 +82,7 @@ class GraphKit : public Phase { #ifdef ASSERT ~GraphKit() { - assert(failing() || !has_exceptions(), + assert(failing_internal() || !has_exceptions(), "unless compilation failed, user must call transfer_exceptions_into_jvms"); } #endif @@ -182,6 +182,7 @@ class GraphKit : public Phase { // Tell if the compilation is failing. bool failing() const { return C->failing(); } + bool failing_internal() const { return C->failing_internal(); } // Set _map to null, signalling a stop to further bytecode execution. // Preserve the map intact for future use, and return it back to the caller. diff --git a/src/hotspot/share/opto/lcm.cpp b/src/hotspot/share/opto/lcm.cpp index 9db94748ca27c..87be6a76eb202 100644 --- a/src/hotspot/share/opto/lcm.cpp +++ b/src/hotspot/share/opto/lcm.cpp @@ -161,6 +161,14 @@ void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allo Node *m = val->out(i); if( !m->is_Mach() ) continue; MachNode *mach = m->as_Mach(); + if (mach->barrier_data() != 0) { + // Using memory accesses with barriers to perform implicit null checks is + // not supported. These operations might expand into multiple assembly + // instructions during code emission, including new memory accesses (e.g. + // in G1's pre-barrier), which would invalidate the implicit null + // exception table. + continue; + } was_store = false; int iop = mach->ideal_Opcode(); switch( iop ) { @@ -1196,7 +1204,7 @@ bool PhaseCFG::schedule_local(Block* block, GrowableArray& ready_cnt, Vecto // to the Compile object, and the C2Compiler will see it and retry. C->record_failure(C2Compiler::retry_no_subsuming_loads()); } else { - assert(false, "graph should be schedulable"); + assert(C->failure_is_artificial(), "graph should be schedulable"); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index c95a450272989..4ab4eea6f8f68 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -254,6 +254,7 @@ bool LibraryCallKit::try_to_inline(int predicate) { case vmIntrinsics::_dsin: case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: + case vmIntrinsics::_dtanh: case vmIntrinsics::_dabs: case vmIntrinsics::_fabs: case vmIntrinsics::_iabs: @@ -716,6 +717,8 @@ bool LibraryCallKit::try_to_inline(int predicate) { return inline_vector_mask_operation(); case vmIntrinsics::_VectorShuffleToVector: return inline_vector_shuffle_to_vector(); + case vmIntrinsics::_VectorWrapShuffleIndexes: + return inline_vector_wrap_shuffle_indexes(); case vmIntrinsics::_VectorLoadOp: return inline_vector_mem_operation(/*is_store=*/false); case vmIntrinsics::_VectorLoadMaskedOp: @@ -736,6 +739,8 @@ bool LibraryCallKit::try_to_inline(int predicate) { return inline_vector_blend(); case vmIntrinsics::_VectorRearrange: return inline_vector_rearrange(); + case vmIntrinsics::_VectorSelectFrom: + return inline_vector_select_from(); case vmIntrinsics::_VectorCompare: return inline_vector_compare(); case vmIntrinsics::_VectorBroadcastInt: @@ -1879,6 +1884,9 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) { return StubRoutines::dtan() != nullptr ? runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dtan(), "dtan") : runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dtan), "TAN"); + case vmIntrinsics::_dtanh: + return StubRoutines::dtanh() != nullptr ? + runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dtanh(), "dtanh") : false; case vmIntrinsics::_dexp: return StubRoutines::dexp() != nullptr ? runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dexp(), "dexp") : @@ -2044,7 +2052,7 @@ LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset, BasicType type) if (base_type == nullptr) { // Unknown type. return Type::AnyPtr; - } else if (base_type == TypePtr::NULL_PTR) { + } else if (_gvn.type(base->uncast()) == TypePtr::NULL_PTR) { // Since this is a null+long form, we have to switch to a rawptr. base = _gvn.transform(new CastX2PNode(offset)); offset = MakeConX(0); @@ -2362,8 +2370,9 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c SafePointNode* old_map = clone_map(); Node* adr = make_unsafe_address(base, offset, type, kind == Relaxed); + assert(!stopped(), "Inlining of unsafe access failed: address construction stopped unexpectedly"); - if (_gvn.type(base)->isa_ptr() == TypePtr::NULL_PTR) { + if (_gvn.type(base->uncast())->isa_ptr() == TypePtr::NULL_PTR) { if (type != T_OBJECT) { decorators |= IN_NATIVE; // off-heap primitive access } else { @@ -2895,7 +2904,7 @@ bool LibraryCallKit::inline_unsafe_allocate() { Node* insp = basic_plus_adr(kls, in_bytes(InstanceKlass::init_state_offset())); // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler // can generate code to load it as unsigned byte. - Node* inst = make_load(nullptr, insp, TypeInt::UBYTE, T_BOOLEAN, MemNode::unordered); + Node* inst = make_load(nullptr, insp, TypeInt::UBYTE, T_BOOLEAN, MemNode::acquire); Node* bits = intcon(InstanceKlass::fully_initialized); test = _gvn.transform(new SubINode(inst, bits)); // The 'test' is non-zero if we need to take a slow path. @@ -2950,11 +2959,10 @@ bool LibraryCallKit::inline_native_notify_jvmti_funcs(address funcAddr, const ch Node* thread = ideal.thread(); Node* jt_addr = basic_plus_adr(thread, in_bytes(JavaThread::is_in_VTMS_transition_offset())); Node* vt_addr = basic_plus_adr(vt_oop, java_lang_Thread::is_in_VTMS_transition_offset()); - const TypePtr *addr_type = _gvn.type(addr)->isa_ptr(); sync_kit(ideal); - access_store_at(nullptr, jt_addr, addr_type, hide, _gvn.type(hide), T_BOOLEAN, IN_NATIVE | MO_UNORDERED); - access_store_at(nullptr, vt_addr, addr_type, hide, _gvn.type(hide), T_BOOLEAN, IN_NATIVE | MO_UNORDERED); + access_store_at(nullptr, jt_addr, _gvn.type(jt_addr)->is_ptr(), hide, _gvn.type(hide), T_BOOLEAN, IN_NATIVE | MO_UNORDERED); + access_store_at(nullptr, vt_addr, _gvn.type(vt_addr)->is_ptr(), hide, _gvn.type(hide), T_BOOLEAN, IN_NATIVE | MO_UNORDERED); ideal.sync_kit(this); } ideal.end_if(); @@ -3316,7 +3324,9 @@ bool LibraryCallKit::inline_native_getEventWriter() { // Load the raw epoch value from the threadObj. Node* threadObj_epoch_offset = basic_plus_adr(threadObj, java_lang_Thread::jfr_epoch_offset()); - Node* threadObj_epoch_raw = access_load_at(threadObj, threadObj_epoch_offset, TypeRawPtr::BOTTOM, TypeInt::CHAR, T_CHAR, + Node* threadObj_epoch_raw = access_load_at(threadObj, threadObj_epoch_offset, + _gvn.type(threadObj_epoch_offset)->isa_ptr(), + TypeInt::CHAR, T_CHAR, IN_HEAP | MO_UNORDERED | C2_MISMATCHED | C2_CONTROL_DEPENDENT_LOAD); // Mask off the excluded information from the epoch. @@ -3335,7 +3345,8 @@ bool LibraryCallKit::inline_native_getEventWriter() { // Load the raw epoch value from the vthread. Node* vthread_epoch_offset = basic_plus_adr(vthread, java_lang_Thread::jfr_epoch_offset()); - Node* vthread_epoch_raw = access_load_at(vthread, vthread_epoch_offset, TypeRawPtr::BOTTOM, TypeInt::CHAR, T_CHAR, + Node* vthread_epoch_raw = access_load_at(vthread, vthread_epoch_offset, _gvn.type(vthread_epoch_offset)->is_ptr(), + TypeInt::CHAR, T_CHAR, IN_HEAP | MO_UNORDERED | C2_MISMATCHED | C2_CONTROL_DEPENDENT_LOAD); // Mask off the excluded information from the epoch. @@ -3581,7 +3592,7 @@ void LibraryCallKit::extend_setCurrentThread(Node* jt, Node* thread) { // Load the raw epoch value from the vthread. Node* epoch_offset = basic_plus_adr(thread, java_lang_Thread::jfr_epoch_offset()); - Node* epoch_raw = access_load_at(thread, epoch_offset, TypeRawPtr::BOTTOM, TypeInt::CHAR, T_CHAR, + Node* epoch_raw = access_load_at(thread, epoch_offset, _gvn.type(epoch_offset)->is_ptr(), TypeInt::CHAR, T_CHAR, IN_HEAP | MO_UNORDERED | C2_MISMATCHED | C2_CONTROL_DEPENDENT_LOAD); // Mask off the excluded information from the epoch. diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp index dd74734802f65..4a85304517479 100644 --- a/src/hotspot/share/opto/library_call.hpp +++ b/src/hotspot/share/opto/library_call.hpp @@ -353,6 +353,7 @@ class LibraryCallKit : public GraphKit { bool inline_vector_nary_operation(int n); bool inline_vector_frombits_coerced(); bool inline_vector_shuffle_to_vector(); + bool inline_vector_wrap_shuffle_indexes(); bool inline_vector_shuffle_iota(); Node* partially_wrap_indexes(Node* index_vec, int num_elem, BasicType type_bt); bool inline_vector_mask_operation(); @@ -363,6 +364,7 @@ class LibraryCallKit : public GraphKit { bool inline_vector_test(); bool inline_vector_blend(); bool inline_vector_rearrange(); + bool inline_vector_select_from(); bool inline_vector_compare(); bool inline_vector_broadcast_int(); bool inline_vector_convert(); @@ -372,7 +374,7 @@ class LibraryCallKit : public GraphKit { bool inline_index_vector(); bool inline_index_partially_in_upper_range(); - Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2); + Node* gen_call_to_vector_math(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2); enum VectorMaskUseType { VecMaskUseLoad = 1 << 0, diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp index 59662ad53fe07..0bed38e5fb068 100644 --- a/src/hotspot/share/opto/loopTransform.cpp +++ b/src/hotspot/share/opto/loopTransform.cpp @@ -1464,7 +1464,8 @@ IfTrueNode* PhaseIdealLoop::create_initialized_assertion_predicate(IfNode* templ Node* new_stride, Node* control) { assert(assertion_predicate_has_loop_opaque_node(template_assertion_predicate), "must find OpaqueLoop* nodes for Template Assertion Predicate"); - InitializedAssertionPredicate initialized_assertion_predicate(template_assertion_predicate, new_init, new_stride, this); + InitializedAssertionPredicateCreator initialized_assertion_predicate(template_assertion_predicate, new_init, + new_stride, this); IfTrueNode* success_proj = initialized_assertion_predicate.create(control); assert(!assertion_predicate_has_loop_opaque_node(success_proj->in(0)->as_If()), "Initialized Assertion Predicates do not have OpaqueLoop* nodes in the bool expression anymore"); diff --git a/src/hotspot/share/opto/loopnode.cpp b/src/hotspot/share/opto/loopnode.cpp index 3128e23d79c49..6cb50b3dee2b5 100644 --- a/src/hotspot/share/opto/loopnode.cpp +++ b/src/hotspot/share/opto/loopnode.cpp @@ -692,14 +692,24 @@ SafePointNode* PhaseIdealLoop::find_safepoint(Node* back_control, Node* x, Ideal // We can only use that safepoint if there's no side effect between the backedge and the safepoint. - // mm is used for book keeping + // mm is the memory state at the safepoint (when it's a MergeMem) + // no_side_effect_since_safepoint() goes over the memory state at the backedge. It resets the mm input for each + // component of the memory state it encounters so it points to the base memory. Once no_side_effect_since_safepoint() + // is done, if no side effect after the safepoint was found, mm should transform to the base memory: the states at + // the backedge and safepoint are the same so all components of the memory state at the safepoint should have been + // reset. MergeMemNode* mm = nullptr; #ifdef ASSERT if (mem->is_MergeMem()) { mm = mem->clone()->as_MergeMem(); _igvn._worklist.push(mm); for (MergeMemStream mms(mem->as_MergeMem()); mms.next_non_empty(); ) { - if (mms.alias_idx() != Compile::AliasIdxBot && loop != get_loop(ctrl_or_self(mms.memory()))) { + // Loop invariant memory state won't be reset by no_side_effect_since_safepoint(). Do it here. + // Escape Analysis can add state to mm that it doesn't add to the backedge memory Phis, breaking verification + // code that relies on mm. Clear that extra state here. + if (mms.alias_idx() != Compile::AliasIdxBot && + (loop != get_loop(ctrl_or_self(mms.memory())) || + (mms.adr_type()->isa_oop_ptr() && mms.adr_type()->is_known_instance()))) { mm->set_memory_at(mms.alias_idx(), mem->as_MergeMem()->base_memory()); } } @@ -1918,12 +1928,28 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_ // Since stride > 0 and limit_correction <= stride + 1, we can restate this with no over- or underflow into: // max_int - canonicalized_correction - limit_correction >= limit // Since canonicalized_correction and limit_correction are both constants, we can replace them with a new constant: - // final_correction = canonicalized_correction + limit_correction + // (v) final_correction = canonicalized_correction + limit_correction + // // which gives us: // // Final predicate condition: // max_int - final_correction >= limit // + // However, we need to be careful that (v) does not over- or underflow. + // We know that: + // canonicalized_correction = stride - 1 + // and + // limit_correction <= stride + 1 + // and thus + // canonicalized_correction + limit_correction <= 2 * stride + // To prevent an over- or underflow of (v), we must ensure that + // 2 * stride <= max_int + // which can safely be checked without over- or underflow with + // (vi) stride != min_int AND abs(stride) <= max_int / 2 + // + // We could try to further optimize the cases where (vi) does not hold but given that such large strides are + // very uncommon and the loop would only run for a very few iterations anyway, we simply bail out if (vi) fails. + // // (2) Loop Limit Check Predicate for (ii): // Using (ii): init < limit // @@ -1954,6 +1980,10 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_ // there is no overflow of the iv phi after the first iteration. In this case, we don't need to check (ii) // again and can skip the predicate. + // Check (vi) and bail out if the stride is too big. + if (stride_con == min_signed_integer(iv_bt) || (ABS(stride_con) > max_signed_integer(iv_bt) / 2)) { + return false; + } // Accounting for (LE3) and (LE4) where we use pre-incremented phis in the loop exit check. const jlong limit_correction_for_pre_iv_exit_check = (phi_incr != nullptr) ? stride_con : 0; @@ -4319,13 +4349,21 @@ void PhaseIdealLoop::mark_loop_associated_parse_predicates_useful() { } } +// This visitor marks all visited Parse Predicates useful. +class ParsePredicateUsefulMarker : public PredicateVisitor { + public: + using PredicateVisitor::visit; + + void visit(const ParsePredicate& parse_predicate) override { + parse_predicate.head()->mark_useful(); + } +}; + void PhaseIdealLoop::mark_useful_parse_predicates_for_loop(IdealLoopTree* loop) { Node* entry = loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl); - const Predicates predicates(entry); - ParsePredicateIterator iterator(predicates); - while (iterator.has_next()) { - iterator.next()->mark_useful(); - } + const PredicateIterator predicate_iterator(entry); + ParsePredicateUsefulMarker useful_marker; + predicate_iterator.for_each(useful_marker); } void PhaseIdealLoop::add_useless_parse_predicates_to_igvn_worklist() { @@ -4915,7 +4953,9 @@ void PhaseIdealLoop::verify() const { bool success = true; PhaseIdealLoop phase_verify(_igvn, this); - if (C->failing()) return; + if (C->failing_internal()) { + return; + } // Verify ctrl and idom of every node. success &= verify_idom_and_nodes(C->root(), &phase_verify); @@ -6267,6 +6307,43 @@ void PhaseIdealLoop::build_loop_late_post(Node *n) { build_loop_late_post_work(n, true); } +// Class to visit all predicates in a predicate chain to find out which are dominated by a given node. Keeps track of +// the entry to the earliest predicate that is still dominated by the given dominator. This class is used when trying to +// legally skip all predicates when figuring out the latest placement such that a node does not interfere with Loop +// Predication or creating a Loop Limit Check Predicate later. +class DominatedPredicates : public UnifiedPredicateVisitor { + Node* const _dominator; + Node* _earliest_dominated_predicate_entry; + bool _should_continue; + PhaseIdealLoop* const _phase; + + public: + DominatedPredicates(Node* dominator, Node* start_node, PhaseIdealLoop* phase) + : _dominator(dominator), + _earliest_dominated_predicate_entry(start_node), + _should_continue(true), + _phase(phase) {} + NONCOPYABLE(DominatedPredicates); + + bool should_continue() const override { + return _should_continue; + } + + // Returns the entry to the earliest predicate that is still dominated by the given dominator (all could be dominated). + Node* earliest_dominated_predicate_entry() const { + return _earliest_dominated_predicate_entry; + } + + void visit_predicate(const Predicate& predicate) override { + Node* entry = predicate.entry(); + if (_phase->is_strict_dominator(entry, _dominator)) { + _should_continue = false; + } else { + _earliest_dominated_predicate_entry = entry; + } + } +}; + void PhaseIdealLoop::build_loop_late_post_work(Node *n, bool pinned) { if (n->req() == 2 && (n->Opcode() == Op_ConvI2L || n->Opcode() == Op_CastII) && !C->major_progress() && !_verify_only) { @@ -6378,14 +6455,10 @@ void PhaseIdealLoop::build_loop_late_post_work(Node *n, bool pinned) { // Move the node above predicates as far up as possible so a // following pass of Loop Predication doesn't hoist a predicate // that depends on it above that node. - PredicateEntryIterator predicate_iterator(least); - while (predicate_iterator.has_next()) { - Node* next_predicate_entry = predicate_iterator.next_entry(); - if (is_strict_dominator(next_predicate_entry, early)) { - break; - } - least = next_predicate_entry; - } + const PredicateIterator predicate_iterator(least); + DominatedPredicates dominated_predicates(early, least, this); + predicate_iterator.for_each(dominated_predicates); + least = dominated_predicates.earliest_dominated_predicate_entry(); } // Try not to place code on a loop entry projection // which can inhibit range check elimination. diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp index 2d169a6459b38..3aa67bcb5cb8b 100644 --- a/src/hotspot/share/opto/loopnode.hpp +++ b/src/hotspot/share/opto/loopnode.hpp @@ -1128,7 +1128,9 @@ class PhaseIdealLoop : public PhaseTransform { _verify_only(verify_me == nullptr), _mode(LoopOptsVerify), _nodes_required(UINT_MAX) { + DEBUG_ONLY(C->set_phase_verify_ideal_loop();) build_and_optimize(); + DEBUG_ONLY(C->reset_phase_verify_ideal_loop();) } #endif diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp index bf773d43d3d39..2031b09ca9d18 100644 --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -194,6 +194,9 @@ void Matcher::match( ) { } // One-time initialization of some register masks. init_spill_mask( C->root()->in(1) ); + if (C->failing()) { + return; + } _return_addr_mask = return_addr(); #ifdef _LP64 // Pointers take 2 slots in 64-bit land @@ -287,10 +290,16 @@ void Matcher::match( ) { // preserve area, locks & pad2. OptoReg::Name reg1 = warp_incoming_stk_arg(vm_parm_regs[i].first()); + if (C->failing()) { + return; + } if( OptoReg::is_valid(reg1)) _calling_convention_mask[i].Insert(reg1); OptoReg::Name reg2 = warp_incoming_stk_arg(vm_parm_regs[i].second()); + if (C->failing()) { + return; + } if( OptoReg::is_valid(reg2)) _calling_convention_mask[i].Insert(reg2); @@ -386,7 +395,7 @@ void Matcher::match( ) { // Don't set control, it will confuse GCM since there are no uses. // The control will be set when this node is used first time // in find_base_for_derived(). - assert(_mach_null != nullptr, ""); + assert(_mach_null != nullptr || C->failure_is_artificial(), ""); // bailouts are handled below. C->set_root(xroot->is_Root() ? xroot->as_Root() : nullptr); @@ -404,7 +413,7 @@ void Matcher::match( ) { assert(C->failure_reason() != nullptr, "graph lost: reason unknown"); ss.print("graph lost: reason unknown"); } - C->record_method_not_compilable(ss.as_string()); + C->record_method_not_compilable(ss.as_string() DEBUG_ONLY(COMMA true)); } if (C->failing()) { // delete old; @@ -1439,10 +1448,16 @@ MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) { } // Grab first register, adjust stack slots and insert in mask. OptoReg::Name reg1 = warp_outgoing_stk_arg(first, begin_out_arg_area, out_arg_limit_per_call ); + if (C->failing()) { + return nullptr; + } if (OptoReg::is_valid(reg1)) rm->Insert( reg1 ); // Grab second register (if any), adjust stack slots and insert in mask. OptoReg::Name reg2 = warp_outgoing_stk_arg(second, begin_out_arg_area, out_arg_limit_per_call ); + if (C->failing()) { + return nullptr; + } if (OptoReg::is_valid(reg2)) rm->Insert( reg2 ); } // End of for all arguments @@ -1594,6 +1609,14 @@ static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool s // the same register. See find_shared_node. return false; } else { // Not a constant + if (!shared && Matcher::is_encode_and_store_pattern(n, m)) { + // Make it possible to match "encode and store" patterns with non-shared + // encode operations that are pinned to a control node (e.g. by CastPP + // node removal in final graph reshaping). The matched instruction cannot + // float above the encode's control node because it is pinned to the + // store's control node. + return false; + } // Stop recursion if they have different Controls. Node* m_control = m->in(0); // Control of load's memory can post-dominates load's control. @@ -2671,6 +2694,10 @@ bool Matcher::gen_narrow_oop_implicit_null_checks() { // Compute RegMask for an ideal register. const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) { + assert(!C->failing_internal() || C->failure_is_artificial(), "already failing."); + if (C->failing()) { + return nullptr; + } const Type* t = Type::mreg2type[ideal_reg]; if (t == nullptr) { assert(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg); @@ -2701,7 +2728,10 @@ const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) { default: ShouldNotReachHere(); } MachNode* mspill = match_tree(spill); - assert(mspill != nullptr, "matching failed: %d", ideal_reg); + assert(mspill != nullptr || C->failure_is_artificial(), "matching failed: %d", ideal_reg); + if (C->failing()) { + return nullptr; + } // Handle generic vector operand case if (Matcher::supports_generic_vector_operands && t->isa_vect()) { specialize_mach_node(mspill); @@ -2833,9 +2863,21 @@ bool Matcher::is_non_long_integral_vector(const Node* n) { return is_subword_type(bt) || bt == T_INT; } +bool Matcher::is_encode_and_store_pattern(const Node* n, const Node* m) { + if (n == nullptr || + m == nullptr || + n->Opcode() != Op_StoreN || + !m->is_EncodeP() || + n->as_Store()->barrier_data() == 0) { + return false; + } + assert(m == n->in(MemNode::ValueIn), "m should be input to n"); + return true; +} + #ifdef ASSERT bool Matcher::verify_after_postselect_cleanup() { - assert(!C->failing(), "sanity"); + assert(!C->failing_internal() || C->failure_is_artificial(), "sanity"); if (supports_generic_vector_operands) { Unique_Node_List useful; C->identify_useful_nodes(useful); diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp index 84e48086f92d3..257628350881a 100644 --- a/src/hotspot/share/opto/matcher.hpp +++ b/src/hotspot/share/opto/matcher.hpp @@ -385,6 +385,8 @@ class Matcher : public PhaseTransform { return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare); } + static bool is_encode_and_store_pattern(const Node* n, const Node* m); + // These calls are all generated by the ADLC // Java-Java calling convention diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index eee14e5ba03f1..6613918826057 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -4644,6 +4644,11 @@ intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseGVN* phase, bool Node* mem = st->in(MemNode::Memory); if (!(mem->is_Proj() && mem->in(0) == this)) return FAIL; // must not be preceded by other stores + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + if ((st->Opcode() == Op_StoreP || st->Opcode() == Op_StoreN) && + !bs->can_initialize_object(st)) { + return FAIL; + } Node* adr = st->in(MemNode::Address); intptr_t offset; AllocateNode* alloc = AllocateNode::Ideal_allocation(adr, phase, offset); diff --git a/src/hotspot/share/opto/memnode.hpp b/src/hotspot/share/opto/memnode.hpp index 85d206749f6be..323ab3dba7d65 100644 --- a/src/hotspot/share/opto/memnode.hpp +++ b/src/hotspot/share/opto/memnode.hpp @@ -124,11 +124,7 @@ class MemNode : public Node { // Raw access function, to allow copying of adr_type efficiently in // product builds and retain the debug info for debug builds. const TypePtr *raw_adr_type() const { -#ifdef ASSERT - return _adr_type; -#else - return 0; -#endif + return DEBUG_ONLY(_adr_type) NOT_DEBUG(nullptr); } // Return the barrier data of n, if available, or 0 otherwise. diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp index b3f251bb361ba..eda0f65d6bc1f 100644 --- a/src/hotspot/share/opto/output.cpp +++ b/src/hotspot/share/opto/output.cpp @@ -1715,7 +1715,7 @@ void PhaseOutput::fill_buffer(C2_MacroAssembler* masm, uint* blk_starts) { node_offsets[n->_idx] = masm->offset(); } #endif - assert(!C->failing(), "Should not reach here if failing."); + assert(!C->failing_internal() || C->failure_is_artificial(), "Should not reach here if failing."); // "Normal" instruction case DEBUG_ONLY(uint instr_offset = masm->offset()); @@ -2022,6 +2022,8 @@ void PhaseOutput::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_s // Handle implicit null exception table updates if (n->is_MachNullCheck()) { + assert(n->in(1)->as_Mach()->barrier_data() == 0, + "Implicit null checks on memory accesses with barriers are not yet supported"); uint block_num = block->non_connector_successor(0)->_pre_order; _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos()); continue; @@ -3391,7 +3393,7 @@ uint PhaseOutput::scratch_emit_size(const Node* n) { n->emit(&masm, C->regalloc()); // Emitting into the scratch buffer should not fail - assert (!C->failing(), "Must not have pending failure. Reason is: %s", C->failure_reason()); + assert(!C->failing_internal() || C->failure_is_artificial(), "Must not have pending failure. Reason is: %s", C->failure_reason()); if (is_branch) // Restore label. n->as_MachBranch()->label_set(saveL, save_bnum); diff --git a/src/hotspot/share/opto/parse.hpp b/src/hotspot/share/opto/parse.hpp index 484c49367cc4d..039283bc863d1 100644 --- a/src/hotspot/share/opto/parse.hpp +++ b/src/hotspot/share/opto/parse.hpp @@ -426,7 +426,7 @@ class Parse : public GraphKit { void set_parse_bci(int bci); // Must this parse be aborted? - bool failing() { return C->failing(); } + bool failing() const { return C->failing_internal(); } // might have cascading effects, not stressing bailouts for now. Block* rpo_at(int rpo) { assert(0 <= rpo && rpo < _block_count, "oob"); diff --git a/src/hotspot/share/opto/predicates.cpp b/src/hotspot/share/opto/predicates.cpp index 3887e8a5f6cdf..18eea3a10bcc6 100644 --- a/src/hotspot/share/opto/predicates.cpp +++ b/src/hotspot/share/opto/predicates.cpp @@ -73,14 +73,6 @@ ParsePredicateNode* ParsePredicate::init_parse_predicate(Node* parse_predicate_p return nullptr; } -bool ParsePredicate::is_predicate(Node* maybe_success_proj) { - if (!maybe_success_proj->is_IfProj()) { - return false; - } - IfNode* if_node = maybe_success_proj->in(0)->as_If(); - return if_node->is_ParsePredicate(); -} - Deoptimization::DeoptReason RegularPredicateWithUCT::uncommon_trap_reason(IfProjNode* if_proj) { CallStaticJavaNode* uct_call = if_proj->is_uncommon_trap_if_pattern(); if (uct_call == nullptr) { @@ -90,27 +82,31 @@ Deoptimization::DeoptReason RegularPredicateWithUCT::uncommon_trap_reason(IfProj } bool RegularPredicateWithUCT::is_predicate(Node* maybe_success_proj) { - if (may_be_predicate_if(maybe_success_proj)) { - IfProjNode* success_proj = maybe_success_proj->as_IfProj(); - const Deoptimization::DeoptReason deopt_reason = uncommon_trap_reason(success_proj); - return (deopt_reason == Deoptimization::Reason_loop_limit_check || - deopt_reason == Deoptimization::Reason_predicate || - deopt_reason == Deoptimization::Reason_profile_predicate); + if (RegularPredicate::may_be_predicate_if(maybe_success_proj)) { + return has_valid_uncommon_trap(maybe_success_proj); } else { return false; } } -bool RegularPredicateWithUCT::is_predicate(Node* node, Deoptimization::DeoptReason deopt_reason) { - if (may_be_predicate_if(node)) { +bool RegularPredicateWithUCT::has_valid_uncommon_trap(const Node* success_proj) { + assert(RegularPredicate::may_be_predicate_if(success_proj), "must have been checked before"); + const Deoptimization::DeoptReason deopt_reason = uncommon_trap_reason(success_proj->as_IfProj()); + return (deopt_reason == Deoptimization::Reason_loop_limit_check || + deopt_reason == Deoptimization::Reason_predicate || + deopt_reason == Deoptimization::Reason_profile_predicate); +} + +bool RegularPredicateWithUCT::is_predicate(const Node* node, Deoptimization::DeoptReason deopt_reason) { + if (RegularPredicate::may_be_predicate_if(node)) { return deopt_reason == uncommon_trap_reason(node->as_IfProj()); } else { return false; } } -// A Runtime Predicate must have an If or a RangeCheck node, while the If should not be a zero trip guard check. -bool RegularPredicateWithUCT::may_be_predicate_if(Node* node) { +// A Regular Predicate must have an If or a RangeCheck node, while the If should not be a zero trip guard check. +bool RegularPredicate::may_be_predicate_if(const Node* node) { if (node->is_IfProj()) { const IfNode* if_node = node->in(0)->as_If(); const int opcode_if = if_node->Opcode(); @@ -122,39 +118,43 @@ bool RegularPredicateWithUCT::may_be_predicate_if(Node* node) { return false; } -bool RuntimePredicate::is_success_proj(Node* node, Deoptimization::DeoptReason deopt_reason) { +// Runtime Predicates always have an UCT since they could normally fail at runtime. In this case we execute the trap +// on the failing path. +bool RuntimePredicate::is_predicate(Node* node) { + return RegularPredicateWithUCT::is_predicate(node); +} + +bool RuntimePredicate::is_predicate(Node* node, Deoptimization::DeoptReason deopt_reason) { return RegularPredicateWithUCT::is_predicate(node, deopt_reason); } -ParsePredicateIterator::ParsePredicateIterator(const Predicates& predicates) : _current_index(0) { - const PredicateBlock* loop_limit_check_predicate_block = predicates.loop_limit_check_predicate_block(); - if (loop_limit_check_predicate_block->has_parse_predicate()) { - _parse_predicates.push(loop_limit_check_predicate_block->parse_predicate()); - } - if (UseProfiledLoopPredicate) { - const PredicateBlock* profiled_loop_predicate_block = predicates.profiled_loop_predicate_block(); - if (profiled_loop_predicate_block->has_parse_predicate()) { - _parse_predicates.push(profiled_loop_predicate_block->parse_predicate()); - } +// A Template Assertion Predicate has an If/RangeCheckNode and either an UCT or a halt node depending on where it +// was created. +bool TemplateAssertionPredicate::is_predicate(Node* node) { + if (!RegularPredicate::may_be_predicate_if(node)) { + return false; } - if (UseLoopPredicate) { - const PredicateBlock* loop_predicate_block = predicates.loop_predicate_block(); - if (loop_predicate_block->has_parse_predicate()) { - _parse_predicates.push(loop_predicate_block->parse_predicate()); - } + IfNode* if_node = node->in(0)->as_If(); + if (if_node->in(1)->is_Opaque4()) { + return RegularPredicateWithUCT::has_valid_uncommon_trap(node) || AssertionPredicateWithHalt::has_halt(node); } + return false; } -ParsePredicateNode* ParsePredicateIterator::next() { - assert(has_next(), "always check has_next() first"); - return _parse_predicates.at(_current_index++); +// Initialized Assertion Predicates always have the dedicated opaque node and a halt node. +bool InitializedAssertionPredicate::is_predicate(Node* node) { + if (!AssertionPredicateWithHalt::is_predicate(node)) { + return false; + } + IfNode* if_node = node->in(0)->as_If(); + return if_node->in(1)->is_OpaqueInitializedAssertionPredicate(); } #ifdef ASSERT // Check that the block has at most one Parse Predicate and that we only find Regular Predicate nodes (i.e. IfProj, // If, or RangeCheck nodes). -void PredicateBlock::verify_block() { - Node* next = _parse_predicate.entry(); // Skip unique Parse Predicate of this block if present +void RegularPredicateBlock::verify_block(Node* tail) { + Node* next = tail; while (next != _entry) { assert(!next->is_ParsePredicate(), "can only have one Parse Predicate in a block"); const int opcode = next->Opcode(); @@ -166,17 +166,6 @@ void PredicateBlock::verify_block() { } #endif // ASSERT -// Walk over all Regular Predicates of this block (if any) and return the first node not belonging to the block -// anymore (i.e. entry to the first Regular Predicate in this block if any or `regular_predicate_proj` otherwise). -Node* PredicateBlock::skip_regular_predicates(Node* regular_predicate_proj, Deoptimization::DeoptReason deopt_reason) { - Node* entry = regular_predicate_proj; - while (RuntimePredicate::is_success_proj(entry, deopt_reason)) { - assert(entry->in(0)->as_If(), "must be If node"); - entry = entry->in(0)->in(0); - } - return entry; -} - // This strategy clones the OpaqueLoopInit and OpaqueLoopStride nodes. class CloneStrategy : public TransformStrategyForOpaqueLoopNodes { PhaseIdealLoop* const _phase; @@ -381,8 +370,8 @@ bool TemplateAssertionExpressionNode::is_template_assertion_predicate(Node* node return node->is_If() && node->in(1)->is_Opaque4(); } -InitializedAssertionPredicate::InitializedAssertionPredicate(IfNode* template_assertion_predicate, Node* new_init, - Node* new_stride, PhaseIdealLoop* phase) +InitializedAssertionPredicateCreator::InitializedAssertionPredicateCreator(IfNode* template_assertion_predicate, Node* new_init, + Node* new_stride, PhaseIdealLoop* phase) : _template_assertion_predicate(template_assertion_predicate), _new_init(new_init), _new_stride(new_stride), @@ -408,7 +397,7 @@ InitializedAssertionPredicate::InitializedAssertionPredicate(IfNode* template_as // success fail path new success new Halt // proj (Halt or UCT) proj // -IfTrueNode* InitializedAssertionPredicate::create(Node* control) { +IfTrueNode* InitializedAssertionPredicateCreator::create(Node* control) { IdealLoopTree* loop = _phase->get_loop(control); OpaqueInitializedAssertionPredicateNode* assertion_expression = create_assertion_expression(control); IfNode* if_node = create_if_node(control, assertion_expression, loop); @@ -417,7 +406,7 @@ IfTrueNode* InitializedAssertionPredicate::create(Node* control) { } // Create a new Assertion Expression to be used as bool input for the Initialized Assertion Predicate IfNode. -OpaqueInitializedAssertionPredicateNode* InitializedAssertionPredicate::create_assertion_expression(Node* control) { +OpaqueInitializedAssertionPredicateNode* InitializedAssertionPredicateCreator::create_assertion_expression(Node* control) { Opaque4Node* template_opaque = _template_assertion_predicate->in(1)->as_Opaque4(); TemplateAssertionExpression template_assertion_expression(template_opaque); Opaque4Node* tmp_opaque = template_assertion_expression.clone_and_replace_init_and_stride(_new_init, _new_stride, @@ -428,9 +417,9 @@ OpaqueInitializedAssertionPredicateNode* InitializedAssertionPredicate::create_a return assertion_expression; } -IfNode* InitializedAssertionPredicate::create_if_node(Node* control, - OpaqueInitializedAssertionPredicateNode* assertion_expression, - IdealLoopTree* loop) { +IfNode* InitializedAssertionPredicateCreator::create_if_node(Node* control, + OpaqueInitializedAssertionPredicateNode* assertion_expression, + IdealLoopTree* loop) { const int if_opcode = _template_assertion_predicate->Opcode(); NOT_PRODUCT(const AssertionPredicateType assertion_predicate_type = _template_assertion_predicate->assertion_predicate_type();) IfNode* if_node = if_opcode == Op_If ? @@ -440,19 +429,19 @@ IfNode* InitializedAssertionPredicate::create_if_node(Node* control, return if_node; } -IfTrueNode* InitializedAssertionPredicate::create_success_path(IfNode* if_node, IdealLoopTree* loop) { +IfTrueNode* InitializedAssertionPredicateCreator::create_success_path(IfNode* if_node, IdealLoopTree* loop) { IfTrueNode* success_proj = new IfTrueNode(if_node); _phase->register_control(success_proj, loop, if_node); return success_proj; } -void InitializedAssertionPredicate::create_fail_path(IfNode* if_node, IdealLoopTree* loop) { +void InitializedAssertionPredicateCreator::create_fail_path(IfNode* if_node, IdealLoopTree* loop) { IfFalseNode* fail_proj = new IfFalseNode(if_node); _phase->register_control(fail_proj, loop, if_node); create_halt_node(fail_proj, loop); } -void InitializedAssertionPredicate::create_halt_node(IfFalseNode* fail_proj, IdealLoopTree* loop) { +void InitializedAssertionPredicateCreator::create_halt_node(IfFalseNode* fail_proj, IdealLoopTree* loop) { StartNode* start_node = _phase->C->start(); Node* frame = new ParmNode(start_node, TypeFunc::FramePtr); _phase->register_new_node(frame, start_node); @@ -461,17 +450,45 @@ void InitializedAssertionPredicate::create_halt_node(IfFalseNode* fail_proj, Ide _phase->register_control(halt, loop, fail_proj); } -// Is current node pointed to by iterator a predicate? -bool PredicateEntryIterator::has_next() const { - return ParsePredicate::is_predicate(_current) || - RegularPredicateWithUCT::is_predicate(_current) || - AssertionPredicateWithHalt::is_predicate(_current); +#ifndef PRODUCT +void PredicateBlock::dump() const { + dump(""); +} + +void PredicateBlock::dump(const char* prefix) const { + if (is_non_empty()) { + PredicatePrinter printer(prefix); + PredicateBlockIterator iterator(_tail, _deopt_reason); + iterator.for_each(printer); + } else { + tty->print_cr("%s- ", prefix); + } +} + +// Dumps all predicates from the loop to the earliest predicate in a pretty format. +void Predicates::dump() const { + if (has_any()) { + Node* loop_head = _tail->unique_ctrl_out(); + tty->print_cr("%d %s:", loop_head->_idx, loop_head->Name()); + tty->print_cr("- Loop Limit Check Predicate Block:"); + _loop_limit_check_predicate_block.dump(" "); + tty->print_cr("- Profiled Loop Predicate Block:"); + _profiled_loop_predicate_block.dump(" "); + tty->print_cr("- Loop Predicate Block:"); + _loop_predicate_block.dump(" "); + tty->cr(); + } else { + tty->print_cr(""); + } +} + +void Predicates::dump_at(Node* node) { + Predicates predicates(node); + predicates.dump(); } -// Skip the current predicate pointed to by iterator by returning the input into the predicate. This could possibly be -// a non-predicate node. -Node* PredicateEntryIterator::next_entry() { - assert(has_next(), "current must be predicate"); - _current = _current->in(0)->in(0); - return _current; +// Debug method to dump all predicates that are found above 'loop_node'. +void Predicates::dump_for_loop(LoopNode* loop_node) { + dump_at(loop_node->skip_strip_mined()->in(LoopNode::EntryControl)); } +#endif // NOT PRODUCT diff --git a/src/hotspot/share/opto/predicates.hpp b/src/hotspot/share/opto/predicates.hpp index 96f5c438b802f..b38b888cc3dba 100644 --- a/src/hotspot/share/opto/predicates.hpp +++ b/src/hotspot/share/opto/predicates.hpp @@ -30,6 +30,11 @@ #include "opto/opaquenode.hpp" class IdealLoopTree; +class InitializedAssertionPredicate; +class ParsePredicate; +class PredicateVisitor; +class RuntimePredicate; +class TemplateAssertionPredicate; /* * There are different kinds of predicates throughout the code. We differentiate between the following predicates: @@ -152,7 +157,8 @@ class IdealLoopTree; * together. * - Loop Limit Check Groups the Loop Limit Check Predicate (if created) and the Loop Limit * Predicate Block: Check Parse Predicate (if not removed, yet) together. - * + * - Regular Predicate Block: A block that only contains the Regular Predicates of a Predicate Block without the + * Parse Predicate. * * Initially, before applying any loop-splitting optimizations, we find the following structure after Loop Predication * (predicates inside square brackets [] do not need to exist if there are no checks to hoist): @@ -205,6 +211,41 @@ enum class AssertionPredicateType { }; #endif // NOT PRODUCT +// Interface to represent a C2 predicate. A predicate is always represented by two CFG nodes: +// - An If node (head) +// - An IfProj node representing the success projection of the If node (tail). +class Predicate : public StackObj { + public: + // Return the unique entry CFG node into the predicate. + virtual Node* entry() const = 0; + + // Return the head node of the predicate which is either: + // - A ParsePredicateNode if the predicate is a Parse Predicate + // - An IfNode or RangeCheckNode, otherwise. + virtual IfNode* head() const = 0; + + // Return the tail node of the predicate. Runtime Predicates can either have a true of false projection as success + // projection while Parse Predicates and Assertion Predicates always have a true projection as success projection. + virtual IfProjNode* tail() const = 0; +}; + +// Generic predicate visitor that does nothing. Subclass this visitor to add customized actions for each predicate. +// The visit methods of this visitor are called from the predicate iterator classes which walk the predicate chain. +// Use the UnifiedPredicateVisitor if the type of the predicate does not matter. +class PredicateVisitor : StackObj { + public: + virtual void visit(const ParsePredicate& parse_predicate) {} + virtual void visit(const RuntimePredicate& runtime_predicate) {} + virtual void visit(const TemplateAssertionPredicate& template_assertion_predicate) {} + virtual void visit(const InitializedAssertionPredicate& initialized_assertion_predicate) {} + + // This method can be overridden to stop the predicate iterators from visiting more predicates further up in the + // predicate chain. + virtual bool should_continue() const { + return true; + } +}; + // Class to represent Assertion Predicates with a HaltNode instead of an UCT (i.e. either an Initialized Assertion // Predicate or a Template Assertion Predicate created after the initial one at Loop Predication). class AssertionPredicatesWithHalt : public StackObj { @@ -228,9 +269,15 @@ class AssertionPredicatesWithHalt : public StackObj { // Note that all other Regular Predicates have an UCT node. class AssertionPredicateWithHalt : public StackObj { static bool has_assertion_predicate_opaque(const Node* predicate_proj); - static bool has_halt(const Node* success_proj); public: static bool is_predicate(const Node* maybe_success_proj); + static bool has_halt(const Node* success_proj); +}; + +// Utility class representing a Regular Predicate which is either a Runtime Predicate or an Assertion Predicate. +class RegularPredicate : public StackObj { + public: + static bool may_be_predicate_if(const Node* node); }; // Class to represent a single Regular Predicate with an UCT. This could either be: @@ -239,15 +286,15 @@ class AssertionPredicateWithHalt : public StackObj { // Note that all other Regular Predicates have a Halt node. class RegularPredicateWithUCT : public StackObj { static Deoptimization::DeoptReason uncommon_trap_reason(IfProjNode* if_proj); - static bool may_be_predicate_if(Node* node); public: static bool is_predicate(Node* maybe_success_proj); - static bool is_predicate(Node* node, Deoptimization::DeoptReason deopt_reason); + static bool is_predicate(const Node* node, Deoptimization::DeoptReason deopt_reason); + static bool has_valid_uncommon_trap(const Node* success_proj); }; // Class to represent a Parse Predicate. -class ParsePredicate : public StackObj { +class ParsePredicate : public Predicate { ParsePredicateSuccessProj* _success_proj; ParsePredicateNode* _parse_predicate_node; Node* _entry; @@ -267,7 +314,7 @@ class ParsePredicate : public StackObj { // Returns the control input node into this Parse Predicate if it is valid. Otherwise, it returns the passed node // into the constructor of this class. - Node* entry() const { + Node* entry() const override { return _entry; } @@ -277,23 +324,102 @@ class ParsePredicate : public StackObj { return _parse_predicate_node != nullptr; } - ParsePredicateNode* node() const { + ParsePredicateNode* head() const override { assert(is_valid(), "must be valid"); return _parse_predicate_node; } - ParsePredicateSuccessProj* success_proj() const { + ParsePredicateSuccessProj* tail() const override { assert(is_valid(), "must be valid"); return _success_proj; } +}; + +// Class to represent a Runtime Predicate which always has an associated UCT on the failing path. +class RuntimePredicate : public Predicate { + IfProjNode* _success_proj; + IfNode* _if_node; + + public: + explicit RuntimePredicate(IfProjNode* success_proj) + : _success_proj(success_proj), + _if_node(success_proj->in(0)->as_If()) { + assert(is_predicate(success_proj), "must be valid"); + } + NONCOPYABLE(RuntimePredicate); + private: static bool is_predicate(Node* maybe_success_proj); + + public: + Node* entry() const override { + return _if_node->in(0); + } + + IfNode* head() const override { + return _if_node; + } + + IfProjNode* tail() const override { + return _success_proj; + } + + static bool is_predicate(Node* node, Deoptimization::DeoptReason deopt_reason); }; -// Utility class for queries on Runtime Predicates. -class RuntimePredicate : public StackObj { +// Class to represent a Template Assertion Predicate. +class TemplateAssertionPredicate : public Predicate { + IfTrueNode* _success_proj; + IfNode* _if_node; + public: - static bool is_success_proj(Node* node, Deoptimization::DeoptReason deopt_reason); + explicit TemplateAssertionPredicate(IfTrueNode* success_proj) + : _success_proj(success_proj), + _if_node(success_proj->in(0)->as_If()) { + assert(is_predicate(success_proj), "must be valid"); + } + + Node* entry() const override { + return _if_node->in(0); + } + + IfNode* head() const override { + return _if_node; + } + + IfTrueNode* tail() const override { + return _success_proj; + } + + static bool is_predicate(Node* node); +}; + +// Class to represent an Initialized Assertion Predicate which always has a halt node on the failing path. +// This predicate should never fail at runtime by design. +class InitializedAssertionPredicate : public Predicate { + IfTrueNode* _success_proj; + IfNode* _if_node; + + public: + explicit InitializedAssertionPredicate(IfTrueNode* success_proj) + : _success_proj(success_proj), + _if_node(success_proj->in(0)->as_If()) { + assert(is_predicate(success_proj), "must be valid"); + } + + Node* entry() const override { + return _if_node->in(0); + } + + IfNode* head() const override { + return _if_node; + } + + IfTrueNode* tail() const override { + return _success_proj; + } + + static bool is_predicate(Node* node); }; // Interface to transform OpaqueLoopInit and OpaqueLoopStride nodes of a Template Assertion Expression. @@ -395,16 +521,16 @@ class TemplateAssertionExpressionNode : public StackObj { }; // This class creates a new Initialized Assertion Predicate. -class InitializedAssertionPredicate : public StackObj { +class InitializedAssertionPredicateCreator : public StackObj { IfNode* const _template_assertion_predicate; Node* const _new_init; Node* const _new_stride; PhaseIdealLoop* const _phase; public: - InitializedAssertionPredicate(IfNode* template_assertion_predicate, Node* new_init, Node* new_stride, - PhaseIdealLoop* phase); - NONCOPYABLE(InitializedAssertionPredicate); + InitializedAssertionPredicateCreator(IfNode* template_assertion_predicate, Node* new_init, Node* new_stride, + PhaseIdealLoop* phase); + NONCOPYABLE(InitializedAssertionPredicateCreator); IfTrueNode* create(Node* control); @@ -416,23 +542,208 @@ class InitializedAssertionPredicate : public StackObj { IfTrueNode* create_success_path(IfNode* if_node, IdealLoopTree* loop); }; +// This class iterates through all predicates of a Regular Predicate Block and applies the given visitor to each. +class RegularPredicateBlockIterator : public StackObj { + Node* const _start_node; + const Deoptimization::DeoptReason _deopt_reason; + + public: + RegularPredicateBlockIterator(Node* start_node, Deoptimization::DeoptReason deopt_reason) + : _start_node(start_node), + _deopt_reason(deopt_reason) {} + NONCOPYABLE(RegularPredicateBlockIterator); + + // Skip all predicates by just following the inputs. We do not call any user provided visitor. + Node* skip_all() const { + PredicateVisitor do_nothing; // No real visits, just do nothing. + return for_each(do_nothing); + } + + // Walk over all predicates of this block (if any) and apply the given 'predicate_visitor' to each predicate. + // Returns the entry to the earliest predicate. + Node* for_each(PredicateVisitor& predicate_visitor) const { + Node* current = _start_node; + while (predicate_visitor.should_continue()) { + if (TemplateAssertionPredicate::is_predicate(current)) { + TemplateAssertionPredicate template_assertion_predicate(current->as_IfTrue()); + predicate_visitor.visit(template_assertion_predicate); + current = template_assertion_predicate.entry(); + } else if (RuntimePredicate::is_predicate(current, _deopt_reason)) { + RuntimePredicate runtime_predicate(current->as_IfProj()); + predicate_visitor.visit(runtime_predicate); + current = runtime_predicate.entry(); + } else if (InitializedAssertionPredicate::is_predicate(current)) { + InitializedAssertionPredicate initialized_assertion_predicate(current->as_IfTrue()); + predicate_visitor.visit(initialized_assertion_predicate); + current = initialized_assertion_predicate.entry(); + } else { + // Either a Parse Predicate or not a Regular Predicate. In both cases, the node does not belong to this block. + break; + } + } + return current; + } +}; + +// This class iterates through all predicates of a Predicate Block and applies the given visitor to each. +class PredicateBlockIterator : public StackObj { + Node* const _start_node; + const ParsePredicate _parse_predicate; // Could be missing. + const RegularPredicateBlockIterator _regular_predicate_block_iterator; + + public: + PredicateBlockIterator(Node* start_node, Deoptimization::DeoptReason deopt_reason) + : _start_node(start_node), + _parse_predicate(start_node, deopt_reason), + _regular_predicate_block_iterator(_parse_predicate.entry(), deopt_reason) {} + + // Walk over all predicates of this block (if any) and apply the given 'predicate_visitor' to each predicate. + // Returns the entry to the earliest predicate. + Node* for_each(PredicateVisitor& predicate_visitor) const { + if (!predicate_visitor.should_continue()) { + return _start_node; + } + if (_parse_predicate.is_valid()) { + predicate_visitor.visit(_parse_predicate); + } + return _regular_predicate_block_iterator.for_each(predicate_visitor); + } +}; + +// Class to walk over all predicates starting at a node, which usually is the loop entry node, and following the inputs. +// At each predicate, a PredicateVisitor is applied which the user can implement freely. +class PredicateIterator : public StackObj { + Node* _start_node; + + public: + explicit PredicateIterator(Node* start_node) + : _start_node(start_node) {} + NONCOPYABLE(PredicateIterator); + + // Apply the 'predicate_visitor' for each predicate found in the predicate chain started at the provided node. + // Returns the entry to the earliest predicate. + Node* for_each(PredicateVisitor& predicate_visitor) const { + Node* current = _start_node; + PredicateBlockIterator loop_limit_check_predicate_iterator(current, Deoptimization::Reason_loop_limit_check); + current = loop_limit_check_predicate_iterator.for_each(predicate_visitor); + PredicateBlockIterator profiled_loop_predicate_iterator(current, Deoptimization::Reason_profile_predicate); + current = profiled_loop_predicate_iterator.for_each(predicate_visitor); + PredicateBlockIterator loop_predicate_iterator(current, Deoptimization::Reason_predicate); + return loop_predicate_iterator.for_each(predicate_visitor); + } +}; + +// Unified PredicateVisitor which only provides a single visit method for a generic Predicate. This visitor can be used +// when it does not matter what kind of predicate is visited. Note that we override all normal visit methods from +// PredicateVisitor by calling the unified method. These visit methods are marked final such that they cannot be +// overridden by implementors of this class. +class UnifiedPredicateVisitor : public PredicateVisitor { + public: + virtual void visit(const TemplateAssertionPredicate& template_assertion_predicate) override final { + visit_predicate(template_assertion_predicate); + } + + virtual void visit(const ParsePredicate& parse_predicate) override final { + visit_predicate(parse_predicate); + } + + virtual void visit(const RuntimePredicate& runtime_predicate) override final { + visit_predicate(runtime_predicate); + } + + virtual void visit(const InitializedAssertionPredicate& initialized_assertion_predicate) override final { + visit_predicate(initialized_assertion_predicate); + } + + virtual void visit_predicate(const Predicate& predicate) = 0; +}; + +// A block of Regular Predicates inside a Predicate Block without its Parse Predicate. +class RegularPredicateBlock : public StackObj { + const Deoptimization::DeoptReason _deopt_reason; + Node* const _entry; + + public: + RegularPredicateBlock(Node* tail, Deoptimization::DeoptReason deopt_reason) + : _deopt_reason(deopt_reason), + _entry(skip_all(tail)) { + DEBUG_ONLY(verify_block(tail);) + } + NONCOPYABLE(RegularPredicateBlock); + + private: + // Walk over all Regular Predicates of this block (if any) and return the first node not belonging to the block + // anymore (i.e. entry to the first Regular Predicate in this block if any or `tail` otherwise). + Node* skip_all(Node* tail) const { + RegularPredicateBlockIterator iterator(tail, _deopt_reason); + return iterator.skip_all(); + } + + DEBUG_ONLY(void verify_block(Node* tail);) + + public: + Node* entry() const { + return _entry; + } +}; + +#ifndef PRODUCT +// Visitor class to print all the visited predicates. Used by the Predicates class which does the printing starting +// at the loop node and then following the inputs to the earliest predicate. +class PredicatePrinter : public PredicateVisitor { + const char* _prefix; // Prefix added to each dumped string. + + public: + explicit PredicatePrinter(const char* prefix) : _prefix(prefix) {} + NONCOPYABLE(PredicatePrinter); + + void visit(const ParsePredicate& parse_predicate) override { + print_predicate_node("Parse Predicate", parse_predicate); + } + + void visit(const RuntimePredicate& runtime_predicate) override { + print_predicate_node("Runtime Predicate", runtime_predicate); + } + + void visit(const TemplateAssertionPredicate& template_assertion_predicate) override { + print_predicate_node("Template Assertion Predicate", template_assertion_predicate); + } + + void visit(const InitializedAssertionPredicate& initialized_assertion_predicate) override { + print_predicate_node("Initialized Assertion Predicate", initialized_assertion_predicate); + } + + private: + void print_predicate_node(const char* predicate_name, const Predicate& predicate) const { + tty->print_cr("%s- %s: %d %s", _prefix, predicate_name, predicate.head()->_idx, predicate.head()->Name()); + } +}; +#endif // NOT PRODUCT // This class represents a Predicate Block (i.e. either a Loop Predicate Block, a Profiled Loop Predicate Block, // or a Loop Limit Check Predicate Block). It contains zero or more Regular Predicates followed by a Parse Predicate // which, however, does not need to exist (we could already have decided to remove Parse Predicates for this loop). class PredicateBlock : public StackObj { - ParsePredicate _parse_predicate; // Could be missing. - Node* _entry; - - static Node* skip_regular_predicates(Node* regular_predicate_proj, Deoptimization::DeoptReason deopt_reason); - DEBUG_ONLY(void verify_block();) + const ParsePredicate _parse_predicate; // Could be missing. + const RegularPredicateBlock _regular_predicate_block; + Node* const _entry; +#ifndef PRODUCT + // Used for dumping. + Node* const _tail; + const Deoptimization::DeoptReason _deopt_reason; +#endif // NOT PRODUCT public: - PredicateBlock(Node* predicate_proj, Deoptimization::DeoptReason deopt_reason) - : _parse_predicate(predicate_proj, deopt_reason), - _entry(skip_regular_predicates(_parse_predicate.entry(), deopt_reason)) { - DEBUG_ONLY(verify_block();) - } + PredicateBlock(Node* tail, Deoptimization::DeoptReason deopt_reason) + : _parse_predicate(tail, deopt_reason), + _regular_predicate_block(_parse_predicate.entry(), deopt_reason), + _entry(_regular_predicate_block.entry()) +#ifndef PRODUCT + , _tail(tail) + , _deopt_reason(deopt_reason) +#endif // NOT PRODUCT + {} + NONCOPYABLE(PredicateBlock); // Returns the control input node into this Regular Predicate block. This is either: // - The control input to the first If node in the block representing a Runtime Predicate if there is at least one @@ -453,11 +764,11 @@ class PredicateBlock : public StackObj { } ParsePredicateNode* parse_predicate() const { - return _parse_predicate.node(); + return _parse_predicate.head(); } ParsePredicateSuccessProj* parse_predicate_success_proj() const { - return _parse_predicate.success_proj(); + return _parse_predicate.tail(); } bool has_runtime_predicates() const { @@ -471,25 +782,31 @@ class PredicateBlock : public StackObj { Node* skip_parse_predicate() const { return _parse_predicate.entry(); } + +#ifndef PRODUCT + void dump() const; + void dump(const char* prefix) const; +#endif // NOT PRODUCT }; // This class takes a loop entry node and finds all the available predicates for the loop. class Predicates : public StackObj { - Node* _loop_entry; - PredicateBlock _loop_limit_check_predicate_block; - PredicateBlock _profiled_loop_predicate_block; - PredicateBlock _loop_predicate_block; - Node* _entry; + Node* const _tail; + const PredicateBlock _loop_limit_check_predicate_block; + const PredicateBlock _profiled_loop_predicate_block; + const PredicateBlock _loop_predicate_block; + Node* const _entry; public: - Predicates(Node* loop_entry) - : _loop_entry(loop_entry), + explicit Predicates(Node* loop_entry) + : _tail(loop_entry), _loop_limit_check_predicate_block(loop_entry, Deoptimization::Reason_loop_limit_check), _profiled_loop_predicate_block(_loop_limit_check_predicate_block.entry(), Deoptimization::Reason_profile_predicate), _loop_predicate_block(_profiled_loop_predicate_block.entry(), Deoptimization::Reason_predicate), _entry(_loop_predicate_block.entry()) {} + NONCOPYABLE(Predicates); // Returns the control input the first predicate if there are any predicates. If there are no predicates, the same // node initially passed to the constructor is returned. @@ -510,35 +827,17 @@ class Predicates : public StackObj { } bool has_any() const { - return _entry != _loop_entry; + return _entry != _tail; } -}; - -// This class iterates over the Parse Predicates of a loop. -class ParsePredicateIterator : public StackObj { - GrowableArray _parse_predicates; - int _current_index; - - public: - ParsePredicateIterator(const Predicates& predicates); - bool has_next() const { - return _current_index < _parse_predicates.length(); - } - - ParsePredicateNode* next(); +#ifndef PRODUCT + /* + * Debug printing functions. + */ + void dump() const; + static void dump_at(Node* node); + static void dump_for_loop(LoopNode* loop_node); +#endif // NOT PRODUCT }; -// Special predicate iterator that can be used to walk through predicate entries, regardless of whether the predicate -// belongs to the same loop or not (i.e. leftovers from already folded nodes). The iterator returns the next entry -// to a predicate. -class PredicateEntryIterator : public StackObj { - Node* _current; - - public: - explicit PredicateEntryIterator(Node* start) : _current(start) {}; - - bool has_next() const; - Node* next_entry(); -}; #endif // SHARE_OPTO_PREDICATES_HPP diff --git a/src/hotspot/share/opto/reg_split.cpp b/src/hotspot/share/opto/reg_split.cpp index 9f89c683b34b5..6d948aff011cf 100644 --- a/src/hotspot/share/opto/reg_split.cpp +++ b/src/hotspot/share/opto/reg_split.cpp @@ -306,8 +306,8 @@ static Node* clone_node(Node* def, Block *b, Compile* C) { C->record_failure(C2Compiler::retry_no_subsuming_loads()); } else { // Bailout without retry - assert(false, "RA Split failed: attempt to clone node with anti_dependence"); - C->record_method_not_compilable("RA Split failed: attempt to clone node with anti_dependence"); + assert(C->failure_is_artificial(), "RA Split failed: attempt to clone node with anti_dependence"); + C->record_method_not_compilable("RA Split failed: attempt to clone node with anti_dependence" DEBUG_ONLY(COMMA true)); } return nullptr; } diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp index 967b4a815d09e..73f852c0f047d 100644 --- a/src/hotspot/share/opto/type.cpp +++ b/src/hotspot/share/opto/type.cpp @@ -3260,23 +3260,28 @@ void TypeRawPtr::dump2( Dict &d, uint depth, outputStream *st ) const { // Convenience common pre-built type. const TypeOopPtr *TypeOopPtr::BOTTOM; -TypeInterfaces::TypeInterfaces() - : Type(Interfaces), _list(Compile::current()->type_arena(), 0, 0, nullptr), +TypeInterfaces::TypeInterfaces(ciInstanceKlass** interfaces_base, int nb_interfaces) + : Type(Interfaces), _interfaces(interfaces_base, nb_interfaces), _hash(0), _exact_klass(nullptr) { - DEBUG_ONLY(_initialized = true); -} - -TypeInterfaces::TypeInterfaces(GrowableArray* interfaces) - : Type(Interfaces), _list(Compile::current()->type_arena(), interfaces->length(), 0, nullptr), - _hash(0), _exact_klass(nullptr) { - for (int i = 0; i < interfaces->length(); i++) { - add(interfaces->at(i)); - } + _interfaces.sort(compare); initialize(); } const TypeInterfaces* TypeInterfaces::make(GrowableArray* interfaces) { - TypeInterfaces* result = (interfaces == nullptr) ? new TypeInterfaces() : new TypeInterfaces(interfaces); + // hashcons() can only delete the last thing that was allocated: to + // make sure all memory for the newly created TypeInterfaces can be + // freed if an identical one exists, allocate space for the array of + // interfaces right after the TypeInterfaces object so that they + // form a contiguous piece of memory. + int nb_interfaces = interfaces == nullptr ? 0 : interfaces->length(); + size_t total_size = sizeof(TypeInterfaces) + nb_interfaces * sizeof(ciInstanceKlass*); + + void* allocated_mem = operator new(total_size); + ciInstanceKlass** interfaces_base = (ciInstanceKlass**)((char*)allocated_mem + sizeof(TypeInterfaces)); + for (int i = 0; i < nb_interfaces; ++i) { + interfaces_base[i] = interfaces->at(i); + } + TypeInterfaces* result = ::new (allocated_mem) TypeInterfaces(interfaces_base, nb_interfaces); return (const TypeInterfaces*)result->hashcons(); } @@ -3295,20 +3300,18 @@ int TypeInterfaces::compare(ciInstanceKlass* const& k1, ciInstanceKlass* const& return 0; } -void TypeInterfaces::add(ciInstanceKlass* interface) { - assert(interface->is_interface(), "for interfaces only"); - _list.insert_sorted(interface); - verify(); +int TypeInterfaces::compare(ciInstanceKlass** k1, ciInstanceKlass** k2) { + return compare(*k1, *k2); } bool TypeInterfaces::eq(const Type* t) const { const TypeInterfaces* other = (const TypeInterfaces*)t; - if (_list.length() != other->_list.length()) { + if (_interfaces.length() != other->_interfaces.length()) { return false; } - for (int i = 0; i < _list.length(); i++) { - ciKlass* k1 = _list.at(i); - ciKlass* k2 = other->_list.at(i); + for (int i = 0; i < _interfaces.length(); i++) { + ciKlass* k1 = _interfaces.at(i); + ciKlass* k2 = other->_interfaces.at(i); if (!k1->equals(k2)) { return false; } @@ -3319,12 +3322,12 @@ bool TypeInterfaces::eq(const Type* t) const { bool TypeInterfaces::eq(ciInstanceKlass* k) const { assert(k->is_loaded(), "should be loaded"); GrowableArray* interfaces = k->transitive_interfaces(); - if (_list.length() != interfaces->length()) { + if (_interfaces.length() != interfaces->length()) { return false; } for (int i = 0; i < interfaces->length(); i++) { bool found = false; - _list.find_sorted(interfaces->at(i), found); + _interfaces.find_sorted(interfaces->at(i), found); if (!found) { return false; } @@ -3344,8 +3347,8 @@ const Type* TypeInterfaces::xdual() const { void TypeInterfaces::compute_hash() { uint hash = 0; - for (int i = 0; i < _list.length(); i++) { - ciKlass* k = _list.at(i); + for (int i = 0; i < _interfaces.length(); i++) { + ciKlass* k = _interfaces.at(i); hash += k->hash(); } _hash = hash; @@ -3356,13 +3359,13 @@ static int compare_interfaces(ciInstanceKlass** k1, ciInstanceKlass** k2) { } void TypeInterfaces::dump(outputStream* st) const { - if (_list.length() == 0) { + if (_interfaces.length() == 0) { return; } ResourceMark rm; st->print(" ("); GrowableArray interfaces; - interfaces.appendAll(&_list); + interfaces.appendAll(&_interfaces); // Sort the interfaces so they are listed in the same order from one run to the other of the same compilation interfaces.sort(compare_interfaces); for (int i = 0; i < interfaces.length(); i++) { @@ -3377,9 +3380,9 @@ void TypeInterfaces::dump(outputStream* st) const { #ifdef ASSERT void TypeInterfaces::verify() const { - for (int i = 1; i < _list.length(); i++) { - ciInstanceKlass* k1 = _list.at(i-1); - ciInstanceKlass* k2 = _list.at(i); + for (int i = 1; i < _interfaces.length(); i++) { + ciInstanceKlass* k1 = _interfaces.at(i-1); + ciInstanceKlass* k2 = _interfaces.at(i); assert(compare(k2, k1) > 0, "should be ordered"); assert(k1 != k2, "no duplicate"); } @@ -3390,23 +3393,23 @@ const TypeInterfaces* TypeInterfaces::union_with(const TypeInterfaces* other) co GrowableArray result_list; int i = 0; int j = 0; - while (i < _list.length() || j < other->_list.length()) { - while (i < _list.length() && - (j >= other->_list.length() || - compare(_list.at(i), other->_list.at(j)) < 0)) { - result_list.push(_list.at(i)); + while (i < _interfaces.length() || j < other->_interfaces.length()) { + while (i < _interfaces.length() && + (j >= other->_interfaces.length() || + compare(_interfaces.at(i), other->_interfaces.at(j)) < 0)) { + result_list.push(_interfaces.at(i)); i++; } - while (j < other->_list.length() && - (i >= _list.length() || - compare(other->_list.at(j), _list.at(i)) < 0)) { - result_list.push(other->_list.at(j)); + while (j < other->_interfaces.length() && + (i >= _interfaces.length() || + compare(other->_interfaces.at(j), _interfaces.at(i)) < 0)) { + result_list.push(other->_interfaces.at(j)); j++; } - if (i < _list.length() && - j < other->_list.length() && - _list.at(i) == other->_list.at(j)) { - result_list.push(_list.at(i)); + if (i < _interfaces.length() && + j < other->_interfaces.length() && + _interfaces.at(i) == other->_interfaces.at(j)) { + result_list.push(_interfaces.at(i)); i++; j++; } @@ -3414,14 +3417,14 @@ const TypeInterfaces* TypeInterfaces::union_with(const TypeInterfaces* other) co const TypeInterfaces* result = TypeInterfaces::make(&result_list); #ifdef ASSERT result->verify(); - for (int i = 0; i < _list.length(); i++) { - assert(result->_list.contains(_list.at(i)), "missing"); + for (int i = 0; i < _interfaces.length(); i++) { + assert(result->_interfaces.contains(_interfaces.at(i)), "missing"); } - for (int i = 0; i < other->_list.length(); i++) { - assert(result->_list.contains(other->_list.at(i)), "missing"); + for (int i = 0; i < other->_interfaces.length(); i++) { + assert(result->_interfaces.contains(other->_interfaces.at(i)), "missing"); } - for (int i = 0; i < result->_list.length(); i++) { - assert(_list.contains(result->_list.at(i)) || other->_list.contains(result->_list.at(i)), "missing"); + for (int i = 0; i < result->_interfaces.length(); i++) { + assert(_interfaces.contains(result->_interfaces.at(i)) || other->_interfaces.contains(result->_interfaces.at(i)), "missing"); } #endif return result; @@ -3431,21 +3434,21 @@ const TypeInterfaces* TypeInterfaces::intersection_with(const TypeInterfaces* ot GrowableArray result_list; int i = 0; int j = 0; - while (i < _list.length() || j < other->_list.length()) { - while (i < _list.length() && - (j >= other->_list.length() || - compare(_list.at(i), other->_list.at(j)) < 0)) { + while (i < _interfaces.length() || j < other->_interfaces.length()) { + while (i < _interfaces.length() && + (j >= other->_interfaces.length() || + compare(_interfaces.at(i), other->_interfaces.at(j)) < 0)) { i++; } - while (j < other->_list.length() && - (i >= _list.length() || - compare(other->_list.at(j), _list.at(i)) < 0)) { + while (j < other->_interfaces.length() && + (i >= _interfaces.length() || + compare(other->_interfaces.at(j), _interfaces.at(i)) < 0)) { j++; } - if (i < _list.length() && - j < other->_list.length() && - _list.at(i) == other->_list.at(j)) { - result_list.push(_list.at(i)); + if (i < _interfaces.length() && + j < other->_interfaces.length() && + _interfaces.at(i) == other->_interfaces.at(j)) { + result_list.push(_interfaces.at(i)); i++; j++; } @@ -3453,14 +3456,14 @@ const TypeInterfaces* TypeInterfaces::intersection_with(const TypeInterfaces* ot const TypeInterfaces* result = TypeInterfaces::make(&result_list); #ifdef ASSERT result->verify(); - for (int i = 0; i < _list.length(); i++) { - assert(!other->_list.contains(_list.at(i)) || result->_list.contains(_list.at(i)), "missing"); + for (int i = 0; i < _interfaces.length(); i++) { + assert(!other->_interfaces.contains(_interfaces.at(i)) || result->_interfaces.contains(_interfaces.at(i)), "missing"); } - for (int i = 0; i < other->_list.length(); i++) { - assert(!_list.contains(other->_list.at(i)) || result->_list.contains(other->_list.at(i)), "missing"); + for (int i = 0; i < other->_interfaces.length(); i++) { + assert(!_interfaces.contains(other->_interfaces.at(i)) || result->_interfaces.contains(other->_interfaces.at(i)), "missing"); } - for (int i = 0; i < result->_list.length(); i++) { - assert(_list.contains(result->_list.at(i)) && other->_list.contains(result->_list.at(i)), "missing"); + for (int i = 0; i < result->_interfaces.length(); i++) { + assert(_interfaces.contains(result->_interfaces.at(i)) && other->_interfaces.contains(result->_interfaces.at(i)), "missing"); } #endif return result; @@ -3473,13 +3476,13 @@ ciInstanceKlass* TypeInterfaces::exact_klass() const { } void TypeInterfaces::compute_exact_klass() { - if (_list.length() == 0) { + if (_interfaces.length() == 0) { _exact_klass = nullptr; return; } ciInstanceKlass* res = nullptr; - for (int i = 0; i < _list.length(); i++) { - ciInstanceKlass* interface = _list.at(i); + for (int i = 0; i < _interfaces.length(); i++) { + ciInstanceKlass* interface = _interfaces.at(i); if (eq(interface)) { assert(res == nullptr, ""); res = interface; @@ -3490,8 +3493,8 @@ void TypeInterfaces::compute_exact_klass() { #ifdef ASSERT void TypeInterfaces::verify_is_loaded() const { - for (int i = 0; i < _list.length(); i++) { - ciKlass* interface = _list.at(i); + for (int i = 0; i < _interfaces.length(); i++) { + ciKlass* interface = _interfaces.at(i); assert(interface->is_loaded(), "Interface not loaded"); } } diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp index b9883d5139136..902155e975d16 100644 --- a/src/hotspot/share/opto/type.hpp +++ b/src/hotspot/share/opto/type.hpp @@ -877,19 +877,18 @@ class TypeVectMask : public TypeVect { // Set of implemented interfaces. Referenced from TypeOopPtr and TypeKlassPtr. class TypeInterfaces : public Type { private: - GrowableArray _list; + GrowableArrayFromArray _interfaces; uint _hash; ciInstanceKlass* _exact_klass; DEBUG_ONLY(bool _initialized;) void initialize(); - void add(ciInstanceKlass* interface); void verify() const NOT_DEBUG_RETURN; void compute_hash(); void compute_exact_klass(); - TypeInterfaces(); - TypeInterfaces(GrowableArray* interfaces); + + TypeInterfaces(ciInstanceKlass** interfaces_base, int nb_interfaces); NONCOPYABLE(TypeInterfaces); public: @@ -904,12 +903,13 @@ class TypeInterfaces : public Type { bool contains(const TypeInterfaces* other) const { return intersection_with(other)->eq(other); } - bool empty() const { return _list.length() == 0; } + bool empty() const { return _interfaces.length() == 0; } ciInstanceKlass* exact_klass() const; void verify_is_loaded() const NOT_DEBUG_RETURN; static int compare(ciInstanceKlass* const& k1, ciInstanceKlass* const& k2); + static int compare(ciInstanceKlass** k1, ciInstanceKlass** k2); const Type* xmeet(const Type* t) const; diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index cfcd903e79d95..8eb26c6c519f3 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -468,11 +468,11 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { Node* operation = nullptr; if (opc == Op_CallLeafVector) { assert(UseVectorStubs, "sanity"); - operation = gen_call_to_svml(opr->get_con(), elem_bt, num_elem, opd1, opd2); + operation = gen_call_to_vector_math(opr->get_con(), elem_bt, num_elem, opd1, opd2); if (operation == nullptr) { - log_if_needed(" ** svml call failed for %s_%s_%d", - (elem_bt == T_FLOAT)?"float":"double", - VectorSupport::svmlname[opr->get_con() - VectorSupport::VECTOR_OP_SVML_START], + log_if_needed(" ** Vector math call failed for %s_%s_%d", + (elem_bt == T_FLOAT) ? "float" : "double", + VectorSupport::mathname[opr->get_con() - VectorSupport::VECTOR_OP_MATH_START], num_elem * type2aelembytes(elem_bt)); return false; } @@ -757,6 +757,64 @@ bool LibraryCallKit::inline_vector_shuffle_to_vector() { return true; } +// public static +// > +// SH wrapShuffleIndexes(Class eClass, Class shClass, SH sh, int length, +// ShuffleWrapIndexesOperation defaultImpl) +bool LibraryCallKit::inline_vector_wrap_shuffle_indexes() { + const TypeInstPtr* elem_klass = gvn().type(argument(0))->isa_instptr(); + const TypeInstPtr* shuffle_klass = gvn().type(argument(1))->isa_instptr(); + Node* shuffle = argument(2); + const TypeInt* vlen = gvn().type(argument(3))->isa_int(); + + if (elem_klass == nullptr || shuffle_klass == nullptr || shuffle->is_top() || vlen == nullptr || + !vlen->is_con() || shuffle_klass->const_oop() == nullptr) { + // not enough info for intrinsification + return false; + } + + if (!is_klass_initialized(shuffle_klass)) { + log_if_needed(" ** klass argument not initialized"); + return false; + } + + int num_elem = vlen->get_con(); + if ((num_elem < 4) || !is_power_of_2(num_elem)) { + log_if_needed(" ** vlen < 4 or not power of two=%d", num_elem); + return false; + } + + // Shuffles use byte array based backing storage + BasicType shuffle_bt = T_BYTE; + if (!arch_supports_vector(Op_AndV, num_elem, shuffle_bt, VecMaskNotUsed) || + !arch_supports_vector(Op_Replicate, num_elem, shuffle_bt, VecMaskNotUsed)) { + log_if_needed(" ** not supported: op=wrapShuffleIndexes vlen=%d etype=%s", + num_elem, type2name(shuffle_bt)); + return false; + } + + ciKlass* sbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass(); + const TypeInstPtr* shuffle_box_type = TypeInstPtr::make_exact(TypePtr::NotNull, sbox_klass); + + // Unbox shuffle with true flag to indicate its load shuffle to vector + // shuffle is a byte array + Node* shuffle_vec = unbox_vector(shuffle, shuffle_box_type, shuffle_bt, num_elem, true); + + const TypeVect* vt = TypeVect::make(shuffle_bt, num_elem); + const Type* shuffle_type_bt = Type::get_const_basic_type(shuffle_bt); + Node* mod_mask = gvn().makecon(TypeInt::make(num_elem-1)); + Node* bcast_mod_mask = gvn().transform(VectorNode::scalar2vector(mod_mask, num_elem, shuffle_type_bt)); + // Wrap the indices greater than lane count. + Node* res = gvn().transform(VectorNode::make(Op_AndV, shuffle_vec, bcast_mod_mask, vt)); + + // Wrap it up in VectorBox to keep object type information. + res = box_vector(res, shuffle_box_type, shuffle_bt, num_elem); + set_result(res); + C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(shuffle_bt)))); + return true; +} + // public static // , @@ -2013,12 +2071,12 @@ bool LibraryCallKit::inline_vector_rearrange() { return true; } -static address get_svml_address(int vop, int bits, BasicType bt, char* name_ptr, int name_len) { +static address get_vector_math_address(int vop, int bits, BasicType bt, char* name_ptr, int name_len) { address addr = nullptr; assert(UseVectorStubs, "sanity"); assert(name_ptr != nullptr, "unexpected"); - assert((vop >= VectorSupport::VECTOR_OP_SVML_START) && (vop <= VectorSupport::VECTOR_OP_SVML_END), "unexpected"); - int op = vop - VectorSupport::VECTOR_OP_SVML_START; + assert((vop >= VectorSupport::VECTOR_OP_MATH_START) && (vop <= VectorSupport::VECTOR_OP_MATH_END), "unexpected"); + int op = vop - VectorSupport::VECTOR_OP_MATH_START; switch(bits) { case 64: //fallthough @@ -2026,34 +2084,191 @@ static address get_svml_address(int vop, int bits, BasicType bt, char* name_ptr, case 256: //fallthough case 512: if (bt == T_FLOAT) { - snprintf(name_ptr, name_len, "vector_%s_float%d", VectorSupport::svmlname[op], bits); + snprintf(name_ptr, name_len, "vector_%s_float_%dbits_fixed", VectorSupport::mathname[op], bits); addr = StubRoutines::_vector_f_math[exact_log2(bits/64)][op]; } else { assert(bt == T_DOUBLE, "must be FP type only"); - snprintf(name_ptr, name_len, "vector_%s_double%d", VectorSupport::svmlname[op], bits); + snprintf(name_ptr, name_len, "vector_%s_double_%dbits_fixed", VectorSupport::mathname[op], bits); addr = StubRoutines::_vector_d_math[exact_log2(bits/64)][op]; } break; default: - snprintf(name_ptr, name_len, "invalid"); - addr = nullptr; - Unimplemented(); + if (!Matcher::supports_scalable_vector() || !Matcher::vector_size_supported(bt, bits/type2aelembytes(bt)) ) { + snprintf(name_ptr, name_len, "invalid"); + addr = nullptr; + Unimplemented(); + } break; } + if (addr == nullptr && Matcher::supports_scalable_vector()) { + if (bt == T_FLOAT) { + snprintf(name_ptr, name_len, "vector_%s_float_%dbits_scalable", VectorSupport::mathname[op], bits); + addr = StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op]; + } else { + assert(bt == T_DOUBLE, "must be FP type only"); + snprintf(name_ptr, name_len, "vector_%s_double_%dbits_scalable", VectorSupport::mathname[op], bits); + addr = StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op]; + } + } + return addr; } -Node* LibraryCallKit::gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2) { +// public static +// , +// M extends VectorMask, +// E> +// V selectFromOp(Class vClass, Class mClass, Class eClass, +// int length, V v1, V v2, M m, +// VectorSelectFromOp defaultImpl) +bool LibraryCallKit::inline_vector_select_from() { + const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); + const TypeInstPtr* mask_klass = gvn().type(argument(1))->isa_instptr(); + const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr(); + const TypeInt* vlen = gvn().type(argument(3))->isa_int(); + + if (vector_klass == nullptr || elem_klass == nullptr || vlen == nullptr || + vector_klass->const_oop() == nullptr || + elem_klass->const_oop() == nullptr || + !vlen->is_con()) { + log_if_needed(" ** missing constant: vclass=%s etype=%s vlen=%s", + NodeClassNames[argument(0)->Opcode()], + NodeClassNames[argument(2)->Opcode()], + NodeClassNames[argument(3)->Opcode()]); + return false; // not enough info for intrinsification + } + if (!is_klass_initialized(vector_klass)) { + log_if_needed(" ** klass argument not initialized"); + return false; + } + ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type(); + if (!elem_type->is_primitive_type()) { + log_if_needed(" ** not a primitive bt=%d", elem_type->basic_type()); + return false; // should be primitive type + } + BasicType elem_bt = elem_type->basic_type(); + int num_elem = vlen->get_con(); + if (!is_power_of_2(num_elem)) { + log_if_needed(" ** vlen not power of two=%d", num_elem); + return false; + } + + int cast_vopc = VectorCastNode::opcode(-1, elem_bt); // from vector of type elem_bt + if (!arch_supports_vector(Op_VectorLoadShuffle, num_elem, elem_bt, VecMaskNotUsed)|| + !arch_supports_vector(Op_AndV, num_elem, T_BYTE, VecMaskNotUsed) || + !arch_supports_vector(Op_Replicate, num_elem, T_BYTE, VecMaskNotUsed) || + !arch_supports_vector(cast_vopc, num_elem, T_BYTE, VecMaskNotUsed)) { + log_if_needed(" ** not supported: arity=0 op=selectFrom vlen=%d etype=%s ismask=no", + num_elem, type2name(elem_bt)); + return false; // not supported + } + + bool is_masked_op = argument(6)->bottom_type() != TypePtr::NULL_PTR; + bool use_predicate = is_masked_op; + if (is_masked_op && + (mask_klass == nullptr || + mask_klass->const_oop() == nullptr || + !is_klass_initialized(mask_klass))) { + log_if_needed(" ** mask_klass argument not initialized"); + return false; // not supported + } + VectorMaskUseType checkFlags = (VectorMaskUseType)(is_masked_op ? (VecMaskUseLoad | VecMaskUsePred) : VecMaskNotUsed); + if (!arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, checkFlags)) { + use_predicate = false; + if(!is_masked_op || + (!arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, VecMaskNotUsed) || + !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad) || + !arch_supports_vector(Op_Replicate, num_elem, elem_bt, VecMaskNotUsed))) { + log_if_needed(" ** not supported: op=selectFrom vlen=%d etype=%s is_masked_op=%d", + num_elem, type2name(elem_bt), is_masked_op); + return false; // not supported + } + } + ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass(); + const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass); + + // v1 is the index vector + Node* v1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem); + // v2 is the vector being rearranged + Node* v2 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem); + + if (v1 == nullptr) { + log_if_needed(" ** unbox failed v1=%s", NodeClassNames[argument(4)->Opcode()]); + return false; // operand unboxing failed + } + + if (v2 == nullptr) { + log_if_needed(" ** unbox failed v2=%s", NodeClassNames[argument(5)->Opcode()]); + return false; // operand unboxing failed + } + + Node* mask = nullptr; + if (is_masked_op) { + ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); + const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass); + mask = unbox_vector(argument(6), mbox_type, elem_bt, num_elem); + if (mask == nullptr) { + log_if_needed(" ** unbox failed mask=%s", NodeClassNames[argument(6)->Opcode()]); + return false; + } + } + + // cast index vector from elem_bt vector to byte vector + const Type * byte_bt = Type::get_const_basic_type(T_BYTE); + const TypeVect * byte_vt = TypeVect::make(byte_bt, num_elem); + Node* byte_shuffle = gvn().transform(VectorCastNode::make(cast_vopc, v1, T_BYTE, num_elem)); + + // wrap the byte vector lanes to (num_elem - 1) to form the shuffle vector where num_elem is vector length + // this is a simple AND operation as we come here only for power of two vector length + Node* mod_val = gvn().makecon(TypeInt::make(num_elem-1)); + Node* bcast_mod = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, byte_bt)); + byte_shuffle = gvn().transform(VectorNode::make(Op_AndV, byte_shuffle, bcast_mod, byte_vt)); + + // load the shuffle to use in rearrange + const TypeVect * shuffle_vt = TypeVect::make(elem_bt, num_elem); + Node* load_shuffle = gvn().transform(new VectorLoadShuffleNode(byte_shuffle, shuffle_vt)); + + // and finally rearrange + Node* rearrange = new VectorRearrangeNode(v2, load_shuffle); + if (is_masked_op) { + if (use_predicate) { + // masked rearrange is supported so use that directly + rearrange->add_req(mask); + rearrange->add_flag(Node::Flag_is_predicated_vector); + } else { + // masked rearrange is not supported so emulate usig blend + const TypeVect* vt = v1->bottom_type()->is_vect(); + rearrange = gvn().transform(rearrange); + + // create a zero vector with each lane element set as zero + Node* zero = gvn().makecon(Type::get_zero_type(elem_bt)); + Node* zerovec = gvn().transform(VectorNode::scalar2vector(zero, num_elem, Type::get_const_basic_type(elem_bt))); + + // For each lane for which mask is set, blend in the rearranged lane into zero vector + rearrange = new VectorBlendNode(zerovec, rearrange, mask); + } + } + rearrange = gvn().transform(rearrange); + + // box the result + Node* box = box_vector(rearrange, vbox_type, elem_bt, num_elem); + set_result(box); + + C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt)))); + return true; +} + +Node* LibraryCallKit::gen_call_to_vector_math(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2) { assert(UseVectorStubs, "sanity"); - assert(vector_api_op_id >= VectorSupport::VECTOR_OP_SVML_START && vector_api_op_id <= VectorSupport::VECTOR_OP_SVML_END, "need valid op id"); + assert(vector_api_op_id >= VectorSupport::VECTOR_OP_MATH_START && vector_api_op_id <= VectorSupport::VECTOR_OP_MATH_END, "need valid op id"); assert(opd1 != nullptr, "must not be null"); const TypeVect* vt = TypeVect::make(bt, num_elem); const TypeFunc* call_type = OptoRuntime::Math_Vector_Vector_Type(opd2 != nullptr ? 2 : 1, vt, vt); char name[100] = ""; - // Get address for svml method. - address addr = get_svml_address(vector_api_op_id, vt->length_in_bytes() * BitsPerByte, bt, name, 100); + // Get address for vector math method. + address addr = get_vector_math_address(vector_api_op_id, vt->length_in_bytes() * BitsPerByte, bt, name, 100); if (addr == nullptr) { return nullptr; diff --git a/src/hotspot/share/prims/jni.cpp b/src/hotspot/share/prims/jni.cpp index 1f115c783e6d7..a869e9821a0b2 100644 --- a/src/hotspot/share/prims/jni.cpp +++ b/src/hotspot/share/prims/jni.cpp @@ -444,9 +444,11 @@ JNI_ENTRY(jobject, jni_ToReflectedMethod(JNIEnv *env, jclass cls, jmethodID meth methodHandle m (THREAD, Method::resolve_jmethod_id(method_id)); assert(m->is_static() == (isStatic != 0), "jni_ToReflectedMethod access flags doesn't match"); oop reflection_method; - if (m->is_initializer()) { + if (m->is_object_initializer()) { reflection_method = Reflection::new_constructor(m, CHECK_NULL); } else { + // Note: Static initializers can theoretically be here, if JNI users manage + // to get their jmethodID. Record them as plain methods. reflection_method = Reflection::new_method(m, false, CHECK_NULL); } ret = JNIHandles::make_local(THREAD, reflection_method); diff --git a/src/hotspot/share/prims/jvm.cpp b/src/hotspot/share/prims/jvm.cpp index bf9874956bae2..5d5d8aa7df9fa 100644 --- a/src/hotspot/share/prims/jvm.cpp +++ b/src/hotspot/share/prims/jvm.cpp @@ -1826,14 +1826,6 @@ JVM_ENTRY(jobjectArray, JVM_GetRecordComponents(JNIEnv* env, jclass ofClass)) } JVM_END -static bool select_method(const methodHandle& method, bool want_constructor) { - if (want_constructor) { - return (method->is_initializer() && !method->is_static()); - } else { - return (!method->is_initializer() && !method->is_overpass()); - } -} - static jobjectArray get_class_declared_methods_helper( JNIEnv *env, jclass ofClass, jboolean publicOnly, @@ -1866,14 +1858,22 @@ static jobjectArray get_class_declared_methods_helper( GrowableArray* idnums = new GrowableArray(methods_length); int num_methods = 0; + // Select methods matching the criteria. for (int i = 0; i < methods_length; i++) { - methodHandle method(THREAD, methods->at(i)); - if (select_method(method, want_constructor)) { - if (!publicOnly || method->is_public()) { - idnums->push(method->method_idnum()); - ++num_methods; - } + Method* method = methods->at(i); + if (want_constructor && !method->is_object_initializer()) { + continue; + } + if (!want_constructor && + (method->is_object_initializer() || method->is_static_initializer() || + method->is_overpass())) { + continue; } + if (publicOnly && !method->is_public()) { + continue; + } + idnums->push(method->method_idnum()); + ++num_methods; } // Allocate result @@ -2175,10 +2175,11 @@ static jobject get_method_at_helper(const constantPoolHandle& cp, jint index, bo THROW_MSG_NULL(vmSymbols::java_lang_RuntimeException(), "Unable to look up method in target class"); } oop method; - if (!m->is_initializer() || m->is_static()) { - method = Reflection::new_method(m, true, CHECK_NULL); - } else { + if (m->is_object_initializer()) { method = Reflection::new_constructor(m, CHECK_NULL); + } else { + // new_method accepts as Method here + method = Reflection::new_method(m, true, CHECK_NULL); } return JNIHandles::make_local(THREAD, method); } @@ -2947,9 +2948,10 @@ JVM_ENTRY(void, JVM_StartThread(JNIEnv* env, jobject jthread)) // We must release the Threads_lock before we can post a jvmti event // in Thread::start. { + ConditionalMutexLocker throttle_ml(ThreadsLockThrottle_lock, UseThreadsLockThrottleLock); // Ensure that the C++ Thread and OSThread structures aren't freed before // we operate. - MutexLocker mu(Threads_lock); + MutexLocker ml(Threads_lock); // Since JDK 5 the java.lang.Thread threadStatus is used to prevent // re-starting an already started thread, so we should usually find diff --git a/src/hotspot/share/prims/methodHandles.cpp b/src/hotspot/share/prims/methodHandles.cpp index 4f33055d6a3fe..498da559cf526 100644 --- a/src/hotspot/share/prims/methodHandles.cpp +++ b/src/hotspot/share/prims/methodHandles.cpp @@ -313,8 +313,9 @@ oop MethodHandles::init_method_MemberName(Handle mname, CallInfo& info) { case CallInfo::direct_call: vmindex = Method::nonvirtual_vtable_index; if (m->is_static()) { + assert(!m->is_static_initializer(), "Cannot be static initializer"); flags |= IS_METHOD | (JVM_REF_invokeStatic << REFERENCE_KIND_SHIFT); - } else if (m->is_initializer()) { + } else if (m->is_object_initializer()) { flags |= IS_CONSTRUCTOR | (JVM_REF_invokeSpecial << REFERENCE_KIND_SHIFT); } else { // "special" reflects that this is a direct call, not that it diff --git a/src/hotspot/share/prims/upcallLinker.cpp b/src/hotspot/share/prims/upcallLinker.cpp index b02746911a808..1abce57652a9f 100644 --- a/src/hotspot/share/prims/upcallLinker.cpp +++ b/src/hotspot/share/prims/upcallLinker.cpp @@ -22,7 +22,7 @@ */ #include "precompiled.hpp" -#include "classfile/javaClasses.hpp" +#include "classfile/javaClasses.inline.hpp" #include "classfile/symbolTable.hpp" #include "classfile/systemDictionary.hpp" #include "compiler/compilationPolicy.hpp" @@ -73,7 +73,7 @@ JavaThread* UpcallLinker::maybe_attach_and_get_thread() { } // modelled after JavaCallWrapper::JavaCallWrapper -JavaThread* UpcallLinker::on_entry(UpcallStub::FrameData* context, jobject receiver) { +JavaThread* UpcallLinker::on_entry(UpcallStub::FrameData* context) { JavaThread* thread = maybe_attach_and_get_thread(); guarantee(thread->thread_state() == _thread_in_native, "wrong thread state for upcall"); context->thread = thread; @@ -108,8 +108,6 @@ JavaThread* UpcallLinker::on_entry(UpcallStub::FrameData* context, jobject recei debug_only(thread->inc_java_call_counter()); thread->set_active_handles(context->new_handles); // install new handle block and reset Java frame linkage - thread->set_vm_result(JNIHandles::resolve(receiver)); - return thread; } @@ -138,11 +136,10 @@ void UpcallLinker::on_exit(UpcallStub::FrameData* context) { } void UpcallLinker::handle_uncaught_exception(oop exception) { - ResourceMark rm; - // Based on CATCH macro tty->print_cr("Uncaught exception:"); - exception->print(); - ShouldNotReachHere(); + Handle exception_h(Thread::current(), exception); + java_lang_Throwable::print_stack_trace(exception_h, tty); + fatal("Unrecoverable uncaught exception encountered"); } JVM_ENTRY(jlong, UL_MakeUpcallStub(JNIEnv *env, jclass unused, jobject mh, jobject abi, jobject conv, @@ -150,36 +147,30 @@ JVM_ENTRY(jlong, UL_MakeUpcallStub(JNIEnv *env, jclass unused, jobject mh, jobje ResourceMark rm(THREAD); Handle mh_h(THREAD, JNIHandles::resolve(mh)); jobject mh_j = JNIHandles::make_global(mh_h); + oop type = java_lang_invoke_MethodHandle::type(mh_h()); - oop lform = java_lang_invoke_MethodHandle::form(mh_h()); - oop vmentry = java_lang_invoke_LambdaForm::vmentry(lform); - Method* entry = java_lang_invoke_MemberName::vmtarget(vmentry); - const methodHandle mh_entry(THREAD, entry); - - assert(entry->method_holder()->is_initialized(), "no clinit barrier"); - CompilationPolicy::compile_if_required(mh_entry, CHECK_0); - - assert(entry->is_static(), "static only"); // Fill in the signature array, for the calling-convention call. - const int total_out_args = entry->size_of_parameters(); - assert(total_out_args > 0, "receiver arg"); + const int total_out_args = java_lang_invoke_MethodType::ptype_slot_count(type) + 1; // +1 for receiver + bool create_new = true; + TempNewSymbol signature = java_lang_invoke_MethodType::as_signature(type, create_new); BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_out_args); BasicType ret_type; { int i = 0; - SignatureStream ss(entry->signature()); + out_sig_bt[i++] = T_OBJECT; // receiver MH + SignatureStream ss(signature); for (; !ss.at_return_type(); ss.next()) { out_sig_bt[i++] = ss.type(); // Collect remaining bits of signature if (ss.type() == T_LONG || ss.type() == T_DOUBLE) out_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots } - assert(i == total_out_args, ""); + assert(i == total_out_args, "%d != %d", i, total_out_args); ret_type = ss.type(); } return (jlong) UpcallLinker::make_upcall_stub( - mh_j, entry, out_sig_bt, total_out_args, ret_type, + mh_j, signature, out_sig_bt, total_out_args, ret_type, abi, conv, needs_return_buffer, checked_cast(ret_buf_size)); JVM_END diff --git a/src/hotspot/share/prims/upcallLinker.hpp b/src/hotspot/share/prims/upcallLinker.hpp index d80516b256678..765ed63fc5a56 100644 --- a/src/hotspot/share/prims/upcallLinker.hpp +++ b/src/hotspot/share/prims/upcallLinker.hpp @@ -34,10 +34,10 @@ class UpcallLinker { private: static JavaThread* maybe_attach_and_get_thread(); - static JavaThread* on_entry(UpcallStub::FrameData* context, jobject receiver); + static JavaThread* on_entry(UpcallStub::FrameData* context); static void on_exit(UpcallStub::FrameData* context); public: - static address make_upcall_stub(jobject mh, Method* entry, + static address make_upcall_stub(jobject mh, Symbol* signature, BasicType* out_sig_bt, int total_out_args, BasicType ret_type, jobject jabi, jobject jconv, diff --git a/src/hotspot/share/prims/vectorSupport.cpp b/src/hotspot/share/prims/vectorSupport.cpp index e0517c91e957d..65bc6c48fee7b 100644 --- a/src/hotspot/share/prims/vectorSupport.cpp +++ b/src/hotspot/share/prims/vectorSupport.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -43,7 +43,7 @@ #endif // COMPILER2 #ifdef COMPILER2 -const char* VectorSupport::svmlname[VectorSupport::NUM_SVML_OP] = { +const char* VectorSupport::mathname[VectorSupport::NUM_VECTOR_OP_MATH] = { "tan", "tanh", "sin", diff --git a/src/hotspot/share/prims/vectorSupport.hpp b/src/hotspot/share/prims/vectorSupport.hpp index 7302e0060648b..6f8e52e9ec0c6 100644 --- a/src/hotspot/share/prims/vectorSupport.hpp +++ b/src/hotspot/share/prims/vectorSupport.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -121,9 +121,9 @@ class VectorSupport : AllStatic { VECTOR_OP_EXPM1 = 117, VECTOR_OP_HYPOT = 118, - VECTOR_OP_SVML_START = VECTOR_OP_TAN, - VECTOR_OP_SVML_END = VECTOR_OP_HYPOT, - NUM_SVML_OP = VECTOR_OP_SVML_END - VECTOR_OP_SVML_START + 1 + VECTOR_OP_MATH_START = VECTOR_OP_TAN, + VECTOR_OP_MATH_END = VECTOR_OP_HYPOT, + NUM_VECTOR_OP_MATH = VECTOR_OP_MATH_END - VECTOR_OP_MATH_START + 1 }; enum { @@ -131,7 +131,8 @@ class VectorSupport : AllStatic { VEC_SIZE_128 = 1, VEC_SIZE_256 = 2, VEC_SIZE_512 = 3, - NUM_VEC_SIZES = 4 + VEC_SIZE_SCALABLE = 4, + NUM_VEC_SIZES = 5 }; enum { @@ -139,7 +140,7 @@ class VectorSupport : AllStatic { MODE_BITS_COERCED_LONG_TO_MASK = 1 }; - static const char* svmlname[VectorSupport::NUM_SVML_OP]; + static const char* mathname[VectorSupport::NUM_VECTOR_OP_MATH]; static int vop2ideal(jint vop, BasicType bt); diff --git a/src/hotspot/share/prims/whitebox.cpp b/src/hotspot/share/prims/whitebox.cpp index ca440b69913fa..24f6156224d26 100644 --- a/src/hotspot/share/prims/whitebox.cpp +++ b/src/hotspot/share/prims/whitebox.cpp @@ -2159,7 +2159,8 @@ WB_ENTRY(jboolean, WB_IsJVMCISupportedByGC(JNIEnv* env)) WB_END WB_ENTRY(jboolean, WB_CanWriteJavaHeapArchive(JNIEnv* env)) - return HeapShared::can_write(); + return HeapShared::can_write() + && ArchiveHeapLoader::can_use(); // work-around JDK-8341371 WB_END diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp index 0d0b58412aefa..fe9641063b33e 100644 --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -336,6 +336,11 @@ bool Arguments::is_internal_module_property(const char* property) { return false; } +// Return true if the key matches the --module-path property name ("jdk.module.path"). +bool Arguments::is_module_path_property(const char* key) { + return (strcmp(key, MODULE_PROPERTY_PREFIX PATH) == 0); +} + // Process java launcher properties. void Arguments::process_sun_java_launcher_properties(JavaVMInitArgs* args) { // See if sun.java.launcher or sun.java.launcher.is_altjvm is defined. @@ -1817,17 +1822,6 @@ bool Arguments::check_vm_args_consistency() { } #endif -#if !defined(X86) && !defined(AARCH64) && !defined(RISCV64) && !defined(ARM) && !defined(PPC64) && !defined(S390) - if (LockingMode == LM_LIGHTWEIGHT) { - FLAG_SET_CMDLINE(LockingMode, LM_LEGACY); - warning("New lightweight locking not supported on this platform"); - } - if (UseObjectMonitorTable) { - FLAG_SET_CMDLINE(UseObjectMonitorTable, false); - warning("UseObjectMonitorTable not supported on this platform"); - } -#endif - if (UseObjectMonitorTable && LockingMode != LM_LIGHTWEIGHT) { // ObjectMonitorTable requires lightweight locking. FLAG_SET_CMDLINE(UseObjectMonitorTable, false); diff --git a/src/hotspot/share/runtime/arguments.hpp b/src/hotspot/share/runtime/arguments.hpp index 8251db3d0d59a..e1bfc0438dc90 100644 --- a/src/hotspot/share/runtime/arguments.hpp +++ b/src/hotspot/share/runtime/arguments.hpp @@ -461,6 +461,7 @@ class Arguments : AllStatic { static int PropertyList_readable_count(SystemProperty* pl); static bool is_internal_module_property(const char* option); + static bool is_module_path_property(const char* key); // Miscellaneous System property value getter and setters. static void set_dll_dir(const char *value) { _sun_boot_library_path->set_value(value); } diff --git a/src/hotspot/share/runtime/basicLock.inline.hpp b/src/hotspot/share/runtime/basicLock.inline.hpp index 30abd575da46a..1090241c3e1f0 100644 --- a/src/hotspot/share/runtime/basicLock.inline.hpp +++ b/src/hotspot/share/runtime/basicLock.inline.hpp @@ -39,7 +39,7 @@ inline void BasicLock::set_displaced_header(markWord header) { inline ObjectMonitor* BasicLock::object_monitor_cache() const { assert(UseObjectMonitorTable, "must be"); -#if defined(X86) || defined(AARCH64) || defined(RISCV64) || defined(PPC64) || defined(S390) +#if !defined(ZERO) && (defined(X86) || defined(AARCH64) || defined(RISCV64) || defined(PPC64) || defined(S390)) return reinterpret_cast(get_metadata()); #else // Other platforms do not make use of the cache yet, diff --git a/src/hotspot/share/runtime/deoptimization.cpp b/src/hotspot/share/runtime/deoptimization.cpp index 7961e56598f25..2006f340450e5 100644 --- a/src/hotspot/share/runtime/deoptimization.cpp +++ b/src/hotspot/share/runtime/deoptimization.cpp @@ -447,7 +447,7 @@ bool Deoptimization::deoptimize_objects_internal(JavaThread* thread, GrowableArr RegisterMap map(chunk->at(0)->register_map()); bool deoptimized_objects = false; - bool const jvmci_enabled = JVMCI_ONLY(UseJVMCICompiler) NOT_JVMCI(false); + bool const jvmci_enabled = JVMCI_ONLY(EnableJVMCI) NOT_JVMCI(false); // Reallocate the non-escaping objects and restore their fields. if (jvmci_enabled COMPILER2_PRESENT(|| (DoEscapeAnalysis && EliminateAllocations) diff --git a/src/hotspot/share/runtime/escapeBarrier.hpp b/src/hotspot/share/runtime/escapeBarrier.hpp index df32deef98639..454e0b555e118 100644 --- a/src/hotspot/share/runtime/escapeBarrier.hpp +++ b/src/hotspot/share/runtime/escapeBarrier.hpp @@ -71,7 +71,7 @@ class EscapeBarrier : StackObj { // Revert ea based optimizations for given deoptee thread EscapeBarrier(bool barrier_active, JavaThread* calling_thread, JavaThread* deoptee_thread) : _calling_thread(calling_thread), _deoptee_thread(deoptee_thread), - _barrier_active(barrier_active && (JVMCI_ONLY(UseJVMCICompiler) NOT_JVMCI(false) + _barrier_active(barrier_active && (JVMCI_ONLY(EnableJVMCI) NOT_JVMCI(false) COMPILER2_PRESENT(|| DoEscapeAnalysis))) { if (_barrier_active) sync_and_suspend_one(); @@ -80,7 +80,7 @@ class EscapeBarrier : StackObj { // Revert ea based optimizations for all java threads EscapeBarrier(bool barrier_active, JavaThread* calling_thread) : _calling_thread(calling_thread), _deoptee_thread(nullptr), - _barrier_active(barrier_active && (JVMCI_ONLY(UseJVMCICompiler) NOT_JVMCI(false) + _barrier_active(barrier_active && (JVMCI_ONLY(EnableJVMCI) NOT_JVMCI(false) COMPILER2_PRESENT(|| DoEscapeAnalysis))) { if (_barrier_active) sync_and_suspend_all(); diff --git a/src/hotspot/share/runtime/frame.cpp b/src/hotspot/share/runtime/frame.cpp index e193271eff658..17078a69ab991 100644 --- a/src/hotspot/share/runtime/frame.cpp +++ b/src/hotspot/share/runtime/frame.cpp @@ -719,6 +719,8 @@ void frame::print_on_error(outputStream* st, char* buf, int buflen, bool verbose st->print("v ~MethodHandlesAdapterBlob " PTR_FORMAT, p2i(pc())); } else if (_cb->is_uncommon_trap_stub()) { st->print("v ~UncommonTrapBlob " PTR_FORMAT, p2i(pc())); + } else if (_cb->is_upcall_stub()) { + st->print("v ~UpcallStub::%s " PTR_FORMAT, _cb->name(), p2i(pc())); } else { st->print("v blob " PTR_FORMAT, p2i(pc())); } @@ -1116,6 +1118,19 @@ void frame::oops_entry_do(OopClosure* f, const RegisterMap* map) const { entry_frame_call_wrapper()->oops_do(f); } +void frame::oops_upcall_do(OopClosure* f, const RegisterMap* map) const { + assert(map != nullptr, "map must be set"); + if (map->include_argument_oops()) { + // Upcall stubs call a MethodHandle impl method of which only the receiver + // is ever an oop. + // Currently we should not be able to get here, since there are no + // safepoints in the one resolve stub we can get into (handle_wrong_method) + // Leave this here as a trap in case we ever do: + ShouldNotReachHere(); // not implemented + } + _cb->as_upcall_stub()->oops_do(f, *this); +} + bool frame::is_deoptimized_frame() const { assert(_deopt_state != unknown, "not answerable"); if (_deopt_state == is_deoptimized) { @@ -1147,7 +1162,7 @@ void frame::oops_do_internal(OopClosure* f, NMethodClosure* cf, } else if (is_entry_frame()) { oops_entry_do(f, map); } else if (is_upcall_stub_frame()) { - _cb->as_upcall_stub()->oops_do(f, *this); + oops_upcall_do(f, map); } else if (CodeCache::contains(pc())) { oops_nmethod_do(f, cf, df, derived_mode, map); } else { diff --git a/src/hotspot/share/runtime/frame.hpp b/src/hotspot/share/runtime/frame.hpp index 1c57e3de4daa6..50aafce3837ac 100644 --- a/src/hotspot/share/runtime/frame.hpp +++ b/src/hotspot/share/runtime/frame.hpp @@ -464,6 +464,7 @@ class frame { const RegisterMap* map, bool use_interpreter_oop_map_cache) const; void oops_entry_do(OopClosure* f, const RegisterMap* map) const; + void oops_upcall_do(OopClosure* f, const RegisterMap* map) const; void oops_nmethod_do(OopClosure* f, NMethodClosure* cf, DerivedOopClosure* df, DerivedPointerIterationMode derived_mode, const RegisterMap* map) const; diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index 8dafc9a508d99..b568e76930476 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -289,9 +289,6 @@ const int ObjectAlignmentInBytes = 8; product(bool, UseInlineCaches, true, \ "Use Inline Caches for virtual calls ") \ \ - product(size_t, InlineCacheBufferSize, 10*K, EXPERIMENTAL, \ - "InlineCacheBuffer size") \ - \ product(bool, InlineArrayCopy, true, DIAGNOSTIC, \ "Inline arraycopy native that is known to be part of " \ "base library DLL") \ @@ -1994,6 +1991,10 @@ const int ObjectAlignmentInBytes = 8; \ product(bool, StressSecondarySupers, false, DIAGNOSTIC, \ "Use a terrible hash function in order to generate many collisions.") \ + \ + product(bool, UseThreadsLockThrottleLock, true, DIAGNOSTIC, \ + "Use an extra lock during Thread start and exit to alleviate" \ + "contention on Threads_lock.") \ // end of RUNTIME_FLAGS diff --git a/src/hotspot/share/runtime/javaThread.cpp b/src/hotspot/share/runtime/javaThread.cpp index 3528fc5b1bced..14528f6d908fc 100644 --- a/src/hotspot/share/runtime/javaThread.cpp +++ b/src/hotspot/share/runtime/javaThread.cpp @@ -487,6 +487,7 @@ JavaThread::JavaThread(MemTag mem_tag) : _cont_fastpath_thread_state(1), _held_monitor_count(0), _jni_monitor_count(0), + _unlocked_inflated_monitor(nullptr), _handshake(this), diff --git a/src/hotspot/share/runtime/javaThread.hpp b/src/hotspot/share/runtime/javaThread.hpp index e36b7dfe888a8..20bb08a4acbca 100644 --- a/src/hotspot/share/runtime/javaThread.hpp +++ b/src/hotspot/share/runtime/javaThread.hpp @@ -464,6 +464,7 @@ class JavaThread: public Thread { // It's signed for error detection. intx _held_monitor_count; // used by continuations for fast lock detection intx _jni_monitor_count; + ObjectMonitor* _unlocked_inflated_monitor; private: @@ -615,6 +616,12 @@ class JavaThread: public Thread { intx jni_monitor_count() { return _jni_monitor_count; } void clear_jni_monitor_count() { _jni_monitor_count = 0; } + // Support for SharedRuntime::monitor_exit_helper() + ObjectMonitor* unlocked_inflated_monitor() const { return _unlocked_inflated_monitor; } + void clear_unlocked_inflated_monitor() { + _unlocked_inflated_monitor = nullptr; + } + inline bool is_vthread_mounted() const; inline const ContinuationEntry* vthread_continuation() const; @@ -828,6 +835,7 @@ class JavaThread: public Thread { static ByteSize cont_fastpath_offset() { return byte_offset_of(JavaThread, _cont_fastpath); } static ByteSize held_monitor_count_offset() { return byte_offset_of(JavaThread, _held_monitor_count); } static ByteSize jni_monitor_count_offset() { return byte_offset_of(JavaThread, _jni_monitor_count); } + static ByteSize unlocked_inflated_monitor_offset() { return byte_offset_of(JavaThread, _unlocked_inflated_monitor); } #if INCLUDE_JVMTI static ByteSize is_in_VTMS_transition_offset() { return byte_offset_of(JavaThread, _is_in_VTMS_transition); } diff --git a/src/hotspot/share/runtime/mutexLocker.cpp b/src/hotspot/share/runtime/mutexLocker.cpp index f033f42624987..769c7695192a8 100644 --- a/src/hotspot/share/runtime/mutexLocker.cpp +++ b/src/hotspot/share/runtime/mutexLocker.cpp @@ -45,7 +45,6 @@ Mutex* SharedDictionary_lock = nullptr; Monitor* ClassInitError_lock = nullptr; Mutex* Module_lock = nullptr; Mutex* CompiledIC_lock = nullptr; -Mutex* InlineCacheBuffer_lock = nullptr; Mutex* VMStatistic_lock = nullptr; Mutex* JmethodIdCreation_lock = nullptr; Mutex* JfieldIdCreation_lock = nullptr; @@ -67,6 +66,7 @@ Monitor* CodeCache_lock = nullptr; Mutex* TouchedMethodLog_lock = nullptr; Mutex* RetData_lock = nullptr; Monitor* VMOperation_lock = nullptr; +Monitor* ThreadsLockThrottle_lock = nullptr; Monitor* Threads_lock = nullptr; Mutex* NonJavaThreadsList_lock = nullptr; Mutex* NonJavaThreadsListSync_lock = nullptr; @@ -262,7 +262,7 @@ void mutex_init() { MUTEX_DEFN(JfieldIdCreation_lock , PaddedMutex , safepoint); - MUTEX_DEFN(CompiledIC_lock , PaddedMutex , nosafepoint); // locks VtableStubs_lock, InlineCacheBuffer_lock + MUTEX_DEFN(CompiledIC_lock , PaddedMutex , nosafepoint); // locks VtableStubs_lock MUTEX_DEFN(MethodCompileQueue_lock , PaddedMonitor, safepoint); MUTEX_DEFN(CompileStatistics_lock , PaddedMutex , safepoint); MUTEX_DEFN(DirectivesStack_lock , PaddedMutex , nosafepoint); @@ -318,8 +318,9 @@ void mutex_init() { MUTEX_DEFN(JVMCIRuntime_lock , PaddedMonitor, safepoint, true); #endif + MUTEX_DEFN(ThreadsLockThrottle_lock , PaddedMonitor, safepoint); + // These locks have relative rankings, and inherit safepoint checking attributes from that rank. - MUTEX_DEFL(InlineCacheBuffer_lock , PaddedMutex , CompiledIC_lock); MUTEX_DEFL(VtableStubs_lock , PaddedMutex , CompiledIC_lock); // Also holds DumpTimeTable_lock MUTEX_DEFL(CodeCache_lock , PaddedMonitor, VtableStubs_lock); MUTEX_DEFL(NMethodState_lock , PaddedMutex , CodeCache_lock); diff --git a/src/hotspot/share/runtime/mutexLocker.hpp b/src/hotspot/share/runtime/mutexLocker.hpp index 160e6c97db07f..98cb27d0b812d 100644 --- a/src/hotspot/share/runtime/mutexLocker.hpp +++ b/src/hotspot/share/runtime/mutexLocker.hpp @@ -40,7 +40,6 @@ extern Mutex* SharedDictionary_lock; // a lock on the CDS shared dic extern Monitor* ClassInitError_lock; // a lock on the class initialization error table extern Mutex* Module_lock; // a lock on module and package related data structures extern Mutex* CompiledIC_lock; // a lock used to guard compiled IC patching and access -extern Mutex* InlineCacheBuffer_lock; // a lock used to guard the InlineCacheBuffer extern Mutex* VMStatistic_lock; // a lock used to guard statistics count increment extern Mutex* JmethodIdCreation_lock; // a lock on creating JNI method identifiers extern Mutex* JfieldIdCreation_lock; // a lock on creating JNI static field identifiers @@ -62,6 +61,8 @@ extern Monitor* CodeCache_lock; // a lock on the CodeCache extern Mutex* TouchedMethodLog_lock; // a lock on allocation of LogExecutedMethods info extern Mutex* RetData_lock; // a lock on installation of RetData inside method data extern Monitor* VMOperation_lock; // a lock on queue of vm_operations waiting to execute +extern Monitor* ThreadsLockThrottle_lock; // used by Thread start/exit to reduce competition for Threads_lock, + // so a VM thread calling a safepoint is prioritized extern Monitor* Threads_lock; // a lock on the Threads table of active Java threads // (also used by Safepoints too to block threads creation/destruction) extern Mutex* NonJavaThreadsList_lock; // a lock on the NonJavaThreads list diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 367d79a5283db..755d49d2c6c58 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -178,7 +178,7 @@ OopStorage* ObjectMonitor::_oop_storage = nullptr; // // Cxq points to the set of Recently Arrived Threads attempting entry. // Because we push threads onto _cxq with CAS, the RATs must take the form of -// a singly-linked LIFO. We drain _cxq into EntryList at unlock-time when +// a singly-linked LIFO. We drain _cxq into EntryList at unlock-time when // the unlocking thread notices that EntryList is null but _cxq is != null. // // The EntryList is ordered by the prevailing queue discipline and @@ -210,19 +210,6 @@ OopStorage* ObjectMonitor::_oop_storage = nullptr; // unpark the notifyee. Unparking a notifee in notify() is inefficient - // it's likely the notifyee would simply impale itself on the lock held // by the notifier. -// -// * An interesting alternative is to encode cxq as (List,LockByte) where -// the LockByte is 0 iff the monitor is owned. _owner is simply an auxiliary -// variable, like _recursions, in the scheme. The threads or Events that form -// the list would have to be aligned in 256-byte addresses. A thread would -// try to acquire the lock or enqueue itself with CAS, but exiting threads -// could use a 1-0 protocol and simply STB to set the LockByte to 0. -// Note that is is *not* word-tearing, but it does presume that full-word -// CAS operations are coherent with intermix with STB operations. That's true -// on most common processors. -// -// * See also http://blogs.sun.com/dave - // Check that object() and set_object() are called from the right context: static void check_object_context() { @@ -257,7 +244,6 @@ ObjectMonitor::ObjectMonitor(oop object) : _EntryList(nullptr), _cxq(nullptr), _succ(nullptr), - _Responsible(nullptr), _SpinDuration(ObjectMonitor::Knob_SpinLimit), _contentions(0), _WaitSet(nullptr), @@ -320,17 +306,11 @@ bool ObjectMonitor::enter_is_async_deflating() { return false; } -void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { - // Used by ObjectSynchronizer::enter_for to enter for another thread. - // The monitor is private to or already owned by locking_thread which must be suspended. - // So this code may only contend with deflation. - assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); +bool ObjectMonitor::TryLockWithContentionMark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { assert(contention_mark._monitor == this, "must be"); assert(!is_being_async_deflated(), "must be"); - void* prev_owner = try_set_owner_from(nullptr, locking_thread); - bool success = false; if (prev_owner == nullptr) { @@ -343,8 +323,16 @@ void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, O // Racing with deflation. prev_owner = try_set_owner_from(DEFLATER_MARKER, locking_thread); if (prev_owner == DEFLATER_MARKER) { - // Cancelled deflation. Increment contentions as part of the deflation protocol. - add_to_contentions(1); + // We successfully cancelled the in-progress async deflation by + // changing owner from DEFLATER_MARKER to current. We now extend + // the lifetime of the contention_mark (e.g. contentions++) here + // to prevent the deflater thread from winning the last part of + // the 2-part async deflation protocol after the regular + // decrement occurs when the contention_mark goes out of + // scope. ObjectMonitor::deflate_monitor() which is called by + // the deflater thread will decrement contentions after it + // recognizes that the async deflation was cancelled. + contention_mark.extend(); success = true; } else if (prev_owner == nullptr) { // At this point we cannot race with deflation as we have both incremented @@ -360,12 +348,28 @@ void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, O set_owner_from_BasicLock(prev_owner, locking_thread); success = true; } + assert(!success || owner_raw() == locking_thread, "must be"); + + return success; +} + +void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { + // Used by LightweightSynchronizer::inflate_and_enter in deoptimization path to enter for another thread. + // The monitor is private to or already owned by locking_thread which must be suspended. + // So this code may only contend with deflation. + assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); + bool success = TryLockWithContentionMark(locking_thread, contention_mark); + assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT - ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}, observed owner: " INTPTR_FORMAT, - p2i(locking_thread), p2i(this), p2i(owner_raw()), p2i(prev_owner)); + ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}", + p2i(locking_thread), p2i(this), p2i(owner_raw())); } bool ObjectMonitor::enter_for(JavaThread* locking_thread) { + // Used by ObjectSynchronizer::enter_for() to enter for another thread. + // The monitor is private to or already owned by locking_thread which must be suspended. + // So this code may only contend with deflation. + assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); // Block out deflation as soon as possible. ObjectMonitorContentionMark contention_mark(this); @@ -375,19 +379,29 @@ bool ObjectMonitor::enter_for(JavaThread* locking_thread) { return false; } - enter_for_with_contention_mark(locking_thread, contention_mark); + bool success = TryLockWithContentionMark(locking_thread, contention_mark); + + assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT + ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}", + p2i(locking_thread), p2i(this), p2i(owner_raw())); assert(owner_raw() == locking_thread, "must be"); return true; } -bool ObjectMonitor::try_enter(JavaThread* current) { - // TryLock avoids the CAS +bool ObjectMonitor::try_enter(JavaThread* current, bool check_for_recursion) { + // TryLock avoids the CAS and handles deflation. TryLockResult r = TryLock(current); if (r == TryLockResult::Success) { assert(_recursions == 0, "invariant"); return true; } + // If called from SharedRuntime::monitor_exit_helper(), we know that + // this thread doesn't already own the lock. + if (!check_for_recursion) { + return false; + } + if (r == TryLockResult::HasOwner && owner() == current) { _recursions++; return true; @@ -400,7 +414,6 @@ bool ObjectMonitor::try_enter(JavaThread* current) { set_owner_from_BasicLock(cur, current); // Convert from BasicLock* to Thread*. return true; } - return false; } @@ -561,16 +574,40 @@ void ObjectMonitor::enter_with_contention_mark(JavaThread *current, ObjectMonito ObjectMonitor::TryLockResult ObjectMonitor::TryLock(JavaThread* current) { void* own = owner_raw(); - if (own != nullptr) return TryLockResult::HasOwner; - if (try_set_owner_from(nullptr, current) == nullptr) { - assert(_recursions == 0, "invariant"); - return TryLockResult::Success; + void* first_own = own; + + for (;;) { + if (own == DEFLATER_MARKER) { + // Block out deflation as soon as possible. + ObjectMonitorContentionMark contention_mark(this); + + // Check for deflation. + if (enter_is_async_deflating()) { + // Treat deflation as interference. + return TryLockResult::Interference; + } + if (TryLockWithContentionMark(current, contention_mark)) { + assert(_recursions == 0, "invariant"); + return TryLockResult::Success; + } else { + // Deflation won or change of owner; dont spin + break; + } + } else if (own == nullptr) { + void* prev_own = try_set_owner_from(nullptr, current); + if (prev_own == nullptr) { + assert(_recursions == 0, "invariant"); + return TryLockResult::Success; + } else { + // The lock had been free momentarily, but we lost the race to the lock. + own = prev_own; + } + } else { + // Retry doesn't make as much sense because the lock was just acquired. + break; + } } - // The lock had been free momentarily, but we lost the race to the lock. - // Interference -- the CAS failed. - // We can either return -1 or retry. - // Retry doesn't make as much sense because the lock was just acquired. - return TryLockResult::Interference; + return first_own == own ? TryLockResult::HasOwner : TryLockResult::Interference; } // Deflate the specified ObjectMonitor if not in-use. Returns true if it @@ -746,8 +783,6 @@ const char* ObjectMonitor::is_busy_to_string(stringStream* ss) { return ss->base(); } -#define MAX_RECHECK_INTERVAL 1000 - void ObjectMonitor::EnterI(JavaThread* current) { assert(current->thread_state() == _thread_blocked, "invariant"); @@ -755,25 +790,6 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (TryLock(current) == TryLockResult::Success) { assert(_succ != current, "invariant"); assert(owner_raw() == current, "invariant"); - assert(_Responsible != current, "invariant"); - return; - } - - if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { - // Cancelled the in-progress async deflation by changing owner from - // DEFLATER_MARKER to current. As part of the contended enter protocol, - // contentions was incremented to a positive value before EnterI() - // was called and that prevents the deflater thread from winning the - // last part of the 2-part async deflation protocol. After EnterI() - // returns to enter(), contentions is decremented because the caller - // now owns the monitor. We bump contentions an extra time here to - // prevent the deflater thread from winning the last part of the - // 2-part async deflation protocol after the regular decrement - // occurs in enter(). The deflater thread will decrement contentions - // after it recognizes that the async deflation was cancelled. - add_to_contentions(1); - assert(_succ != current, "invariant"); - assert(_Responsible != current, "invariant"); return; } @@ -789,14 +805,12 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (TrySpin(current)) { assert(owner_raw() == current, "invariant"); assert(_succ != current, "invariant"); - assert(_Responsible != current, "invariant"); return; } // The Spin failed -- Enqueue and park the thread ... assert(_succ != current, "invariant"); assert(owner_raw() != current, "invariant"); - assert(_Responsible != current, "invariant"); // Enqueue "current" on ObjectMonitor's _cxq. // @@ -826,40 +840,10 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (TryLock(current) == TryLockResult::Success) { assert(_succ != current, "invariant"); assert(owner_raw() == current, "invariant"); - assert(_Responsible != current, "invariant"); return; } } - // Check for cxq|EntryList edge transition to non-null. This indicates - // the onset of contention. While contention persists exiting threads - // will use a ST:MEMBAR:LD 1-1 exit protocol. When contention abates exit - // operations revert to the faster 1-0 mode. This enter operation may interleave - // (race) a concurrent 1-0 exit operation, resulting in stranding, so we - // arrange for one of the contending thread to use a timed park() operations - // to detect and recover from the race. (Stranding is form of progress failure - // where the monitor is unlocked but all the contending threads remain parked). - // That is, at least one of the contended threads will periodically poll _owner. - // One of the contending threads will become the designated "Responsible" thread. - // The Responsible thread uses a timed park instead of a normal indefinite park - // operation -- it periodically wakes and checks for and recovers from potential - // strandings admitted by 1-0 exit operations. We need at most one Responsible - // thread per-monitor at any given moment. Only threads on cxq|EntryList may - // be responsible for a monitor. - // - // Currently, one of the contended threads takes on the added role of "Responsible". - // A viable alternative would be to use a dedicated "stranding checker" thread - // that periodically iterated over all the threads (or active monitors) and unparked - // successors where there was risk of stranding. This would help eliminate the - // timer scalability issues we see on some platforms as we'd only have one thread - // -- the checker -- parked on a timer. - - if (nxt == nullptr && _EntryList == nullptr) { - // Try to assume the role of responsible thread for the monitor. - // CONSIDER: ST vs CAS vs { if (Responsible==null) Responsible=current } - Atomic::replace_if_null(&_Responsible, current); - } - // The lock might have been released while this thread was occupied queueing // itself onto _cxq. To close the race and avoid "stranding" and // progress-liveness failure we must resample-retry _owner before parking. @@ -871,8 +855,6 @@ void ObjectMonitor::EnterI(JavaThread* current) { // to defer the state transitions until absolutely necessary, // and in doing so avoid some transitions ... - int recheckInterval = 1; - for (;;) { if (TryLock(current) == TryLockResult::Success) { @@ -881,37 +863,12 @@ void ObjectMonitor::EnterI(JavaThread* current) { assert(owner_raw() != current, "invariant"); // park self - if (_Responsible == current) { - current->_ParkEvent->park((jlong) recheckInterval); - // Increase the recheckInterval, but clamp the value. - recheckInterval *= 8; - if (recheckInterval > MAX_RECHECK_INTERVAL) { - recheckInterval = MAX_RECHECK_INTERVAL; - } - } else { - current->_ParkEvent->park(); - } + current->_ParkEvent->park(); if (TryLock(current) == TryLockResult::Success) { break; } - if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { - // Cancelled the in-progress async deflation by changing owner from - // DEFLATER_MARKER to current. As part of the contended enter protocol, - // contentions was incremented to a positive value before EnterI() - // was called and that prevents the deflater thread from winning the - // last part of the 2-part async deflation protocol. After EnterI() - // returns to enter(), contentions is decremented because the caller - // now owns the monitor. We bump contentions an extra time here to - // prevent the deflater thread from winning the last part of the - // 2-part async deflation protocol after the regular decrement - // occurs in enter(). The deflater thread will decrement contentions - // after it recognizes that the async deflation was cancelled. - add_to_contentions(1); - break; - } - // The lock is still contested. // Keep a tally of the # of futile wakeups. @@ -953,44 +910,23 @@ void ObjectMonitor::EnterI(JavaThread* current) { assert(owner_raw() == current, "invariant"); UnlinkAfterAcquire(current, &node); - if (_succ == current) _succ = nullptr; - - assert(_succ != current, "invariant"); - if (_Responsible == current) { - _Responsible = nullptr; - OrderAccess::fence(); // Dekker pivot-point - - // We may leave threads on cxq|EntryList without a designated - // "Responsible" thread. This is benign. When this thread subsequently - // exits the monitor it can "see" such preexisting "old" threads -- - // threads that arrived on the cxq|EntryList before the fence, above -- - // by LDing cxq|EntryList. Newly arrived threads -- that is, threads - // that arrive on cxq after the ST:MEMBAR, above -- will set Responsible - // non-null and elect a new "Responsible" timer thread. - // - // This thread executes: - // ST Responsible=null; MEMBAR (in enter epilogue - here) - // LD cxq|EntryList (in subsequent exit) - // - // Entering threads in the slow/contended path execute: - // ST cxq=nonnull; MEMBAR; LD Responsible (in enter prolog) - // The (ST cxq; MEMBAR) is accomplished with CAS(). - // - // The MEMBAR, above, prevents the LD of cxq|EntryList in the subsequent - // exit operation from floating above the ST Responsible=null. + if (_succ == current) { + _succ = nullptr; + // Note that we don't need to do OrderAccess::fence() after clearing + // _succ here, since we own the lock. } // We've acquired ownership with CAS(). // CAS is serializing -- it has MEMBAR/FENCE-equivalent semantics. // But since the CAS() this thread may have also stored into _succ, - // EntryList, cxq or Responsible. These meta-data updates must be + // EntryList or cxq. These meta-data updates must be // visible __before this thread subsequently drops the lock. // Consider what could occur if we didn't enforce this constraint -- // STs to monitor meta-data and user-data could reorder with (become // visible after) the ST in exit that drops ownership of the lock. // Some other thread could then acquire the lock, but observe inconsistent // or old monitor meta-data and heap data. That violates the JMM. - // To that end, the 1-0 exit() operation must have at least STST|LDST + // To that end, the exit() operation must have at least STST|LDST // "release" barrier semantics. Specifically, there must be at least a // STST|LDST barrier in exit() before the ST of null into _owner that drops // the lock. The barrier ensures that changes to monitor meta-data and data @@ -1000,8 +936,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { // // Critically, any prior STs to _succ or EntryList must be visible before // the ST of null into _owner in the *subsequent* (following) corresponding - // monitorexit. Recall too, that in 1-0 mode monitorexit does not necessarily - // execute a serializing instruction. + // monitorexit. return; } @@ -1174,39 +1109,32 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren // In that case exit() is called with _thread_state == _thread_blocked, // but the monitor's _contentions field is > 0, which inhibits reclamation. // -// 1-0 exit -// ~~~~~~~~ -// ::exit() uses a canonical 1-1 idiom with a MEMBAR although some of -// the fast-path operators have been optimized so the common ::exit() -// operation is 1-0, e.g., see macroAssembler_x86.cpp: fast_unlock(). -// The code emitted by fast_unlock() elides the usual MEMBAR. This -// greatly improves latency -- MEMBAR and CAS having considerable local -// latency on modern processors -- but at the cost of "stranding". Absent the -// MEMBAR, a thread in fast_unlock() can race a thread in the slow -// ::enter() path, resulting in the entering thread being stranding -// and a progress-liveness failure. Stranding is extremely rare. -// We use timers (timed park operations) & periodic polling to detect -// and recover from stranding. Potentially stranded threads periodically -// wake up and poll the lock. See the usage of the _Responsible variable. +// This is the exit part of the locking protocol, often implemented in +// C2_MacroAssembler::fast_unlock() +// +// 1. A release barrier ensures that changes to monitor meta-data +// (_succ, _EntryList, _cxq) and data protected by the lock will be +// visible before we release the lock. +// 2. Release the lock by clearing the owner. +// 3. A storeload MEMBAR is needed between releasing the owner and +// subsequently reading meta-data to safely determine if the lock is +// contended (step 4) without an elected successor (step 5). +// 4. If both _EntryList and _cxq are null, we are done, since there is no +// other thread waiting on the lock to wake up. I.e. there is no +// contention. +// 5. If there is a successor (_succ is non-null), we are done. The +// responsibility for guaranteeing progress-liveness has now implicitly +// been moved from the exiting thread to the successor. +// 6. There are waiters in the entry list (_EntryList and/or cxq are +// non-null), but there is no successor (_succ is null), so we need to +// wake up (unpark) a waiting thread to avoid stranding. // -// The CAS() in enter provides for safety and exclusion, while the CAS or -// MEMBAR in exit provides for progress and avoids stranding. 1-0 locking -// eliminates the CAS/MEMBAR from the exit path, but it admits stranding. -// We detect and recover from stranding with timers. +// Note that since only the current lock owner can manipulate the _EntryList +// or drain _cxq, we need to reacquire the lock before we can wake up +// (unpark) a waiting thread. // -// If a thread transiently strands it'll park until (a) another -// thread acquires the lock and then drops the lock, at which time the -// exiting thread will notice and unpark the stranded thread, or, (b) -// the timer expires. If the lock is high traffic then the stranding latency -// will be low due to (a). If the lock is low traffic then the odds of -// stranding are lower, although the worst-case stranding latency -// is longer. Critically, we don't want to put excessive load in the -// platform's timer subsystem. We want to minimize both the timer injection -// rate (timers created/sec) as well as the number of timers active at -// any one time. (more precisely, we want to minimize timer-seconds, which is -// the integral of the # of active timers at any instant over time). -// Both impinge on OS scalability. Given that, at most one thread parked on -// a monitor will use a timer. +// The CAS() in enter provides for safety and exclusion, while the +// MEMBAR in exit provides for progress and avoids stranding. // // There is also the risk of a futile wake-up. If we drop the lock // another thread can reacquire the lock immediately, and we can @@ -1248,10 +1176,6 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { return; } - // Invariant: after setting Responsible=null an thread must execute - // a MEMBAR or other serializing instruction before fetching EntryList|cxq. - _Responsible = nullptr; - #if INCLUDE_JFR // get the owner's thread id for the MonitorEnter event // if it is enabled and the thread isn't suspended @@ -1278,14 +1202,15 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // Other threads are blocked trying to acquire the lock. // Normally the exiting thread is responsible for ensuring succession, - // but if other successors are ready or other entering threads are spinning - // then this thread can simply store null into _owner and exit without - // waking a successor. The existence of spinners or ready successors - // guarantees proper succession (liveness). Responsibility passes to the - // ready or running successors. The exiting thread delegates the duty. - // More precisely, if a successor already exists this thread is absolved - // of the responsibility of waking (unparking) one. - // + // but if this thread observes other successors are ready or other + // entering threads are spinning after it has stored null into _owner + // then it can exit without waking a successor. The existence of + // spinners or ready successors guarantees proper succession (liveness). + // Responsibility passes to the ready or running successors. The exiting + // thread delegates the duty. More precisely, if a successor already + // exists this thread is absolved of the responsibility of waking + // (unparking) one. + // The _succ variable is critical to reducing futile wakeup frequency. // _succ identifies the "heir presumptive" thread that has been made // ready (unparked) but that has not yet run. We need only one such @@ -1296,24 +1221,20 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // Note that spinners in Enter() also set _succ non-null. // In the current implementation spinners opportunistically set // _succ so that exiting threads might avoid waking a successor. - // Another less appealing alternative would be for the exiting thread - // to drop the lock and then spin briefly to see if a spinner managed - // to acquire the lock. If so, the exiting thread could exit - // immediately without waking a successor, otherwise the exiting - // thread would need to dequeue and wake a successor. - // (Note that we'd need to make the post-drop spin short, but no - // shorter than the worst-case round-trip cache-line migration time. - // The dropped lock needs to become visible to the spinner, and then - // the acquisition of the lock by the spinner must become visible to - // the exiting thread). + // Which means that the exiting thread could exit immediately without + // waking a successor, if it observes a successor after it has dropped + // the lock. Note that the dropped lock needs to become visible to the + // spinner. // It appears that an heir-presumptive (successor) must be made ready. // Only the current lock owner can manipulate the EntryList or // drain _cxq, so we need to reacquire the lock. If we fail // to reacquire the lock the responsibility for ensuring succession // falls to the new owner. - // - if (try_set_owner_from(nullptr, current) != nullptr) { + + if (TryLock(current) != TryLockResult::Success) { + // Some other thread acquired the lock (or the monitor was + // deflated). Either way we are done. return; } @@ -1376,7 +1297,7 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { q = p; } - // In 1-0 mode we need: ST EntryList; MEMBAR #storestore; ST _owner = nullptr + // We need to: ST EntryList; MEMBAR #storestore; ST _owner = nullptr // The MEMBAR is satisfied by the release_store() operation in ExitEpilog(). // See if we can abdicate to a spinner instead of waking a thread. @@ -1566,8 +1487,6 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) { AddWaiter(&node); Thread::SpinRelease(&_WaitSetLock); - _Responsible = nullptr; - intx save = _recursions; // record the old recursion count _waiters++; // increment the number of waiters _recursions = 0; // set the recursion level to be 1 @@ -2245,7 +2164,6 @@ void ObjectMonitor::print() const { print_on(tty); } // _EntryList = 0x0000000000000000 // _cxq = 0x0000000000000000 // _succ = 0x0000000000000000 -// _Responsible = 0x0000000000000000 // _SpinDuration = 5000 // _contentions = 0 // _WaitSet = 0x0000700009756248 @@ -2274,7 +2192,6 @@ void ObjectMonitor::print_debug_style_on(outputStream* st) const { st->print_cr(" _EntryList = " INTPTR_FORMAT, p2i(_EntryList)); st->print_cr(" _cxq = " INTPTR_FORMAT, p2i(_cxq)); st->print_cr(" _succ = " INTPTR_FORMAT, p2i(_succ)); - st->print_cr(" _Responsible = " INTPTR_FORMAT, p2i(_Responsible)); st->print_cr(" _SpinDuration = %d", _SpinDuration); st->print_cr(" _contentions = %d", contentions()); st->print_cr(" _WaitSet = " INTPTR_FORMAT, p2i(_WaitSet)); diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index ef85559c2b6c3..30d2e5094162d 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -179,7 +179,6 @@ class ObjectMonitor : public CHeapObj { ObjectWaiter* volatile _cxq; // LL of recently-arrived threads blocked on entry. JavaThread* volatile _succ; // Heir presumptive thread - used for futile wakeup throttling - JavaThread* volatile _Responsible; volatile int _SpinDuration; @@ -348,7 +347,7 @@ class ObjectMonitor : public CHeapObj { void enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); bool enter_for(JavaThread* locking_thread); bool enter(JavaThread* current); - bool try_enter(JavaThread* current); + bool try_enter(JavaThread* current, bool check_for_recursion = true); bool spin_enter(JavaThread* current); void enter_with_contention_mark(JavaThread* current, ObjectMonitorContentionMark& contention_mark); void exit(JavaThread* current, bool not_suspended = true); @@ -377,6 +376,7 @@ class ObjectMonitor : public CHeapObj { enum class TryLockResult { Interference = -1, HasOwner = 0, Success = 1 }; + bool TryLockWithContentionMark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); TryLockResult TryLock(JavaThread* current); bool TrySpin(JavaThread* current); @@ -395,12 +395,17 @@ class ObjectMonitorContentionMark : StackObj { DEBUG_ONLY(friend class ObjectMonitor;) ObjectMonitor* _monitor; + bool _extended; NONCOPYABLE(ObjectMonitorContentionMark); public: explicit ObjectMonitorContentionMark(ObjectMonitor* monitor); ~ObjectMonitorContentionMark(); + + // Extends the contention scope beyond this objects lifetime. + // Requires manual decrement of the contentions counter. + void extend(); }; #endif // SHARE_RUNTIME_OBJECTMONITOR_HPP diff --git a/src/hotspot/share/runtime/objectMonitor.inline.hpp b/src/hotspot/share/runtime/objectMonitor.inline.hpp index d26c459b1b415..6d3c6ff24c38b 100644 --- a/src/hotspot/share/runtime/objectMonitor.inline.hpp +++ b/src/hotspot/share/runtime/objectMonitor.inline.hpp @@ -206,15 +206,32 @@ inline void ObjectMonitor::set_next_om(ObjectMonitor* new_value) { Atomic::store(&_next_om, new_value); } +// Block out deflation. inline ObjectMonitorContentionMark::ObjectMonitorContentionMark(ObjectMonitor* monitor) - : _monitor(monitor) { + : _monitor(monitor), _extended(false) { + // Contentions is incremented to a positive value as part of the + // contended enter protocol, which prevents the deflater thread from + // winning the last part of the 2-part async deflation + // protocol. See: ObjectMonitor::deflate_monitor() and + // ObjectMonitor::TryLockWithContentionMark(). _monitor->add_to_contentions(1); } inline ObjectMonitorContentionMark::~ObjectMonitorContentionMark() { + // Decrement contentions when the contention mark goes out of + // scope. This opens up for deflation, if the contention mark + // hasn't been extended. _monitor->add_to_contentions(-1); } +inline void ObjectMonitorContentionMark::extend() { + // Used by ObjectMonitor::TryLockWithContentionMark() to "extend the + // lifetime" of the contention mark. + assert(!_extended, "extending twice is probably a bad design"); + _monitor->add_to_contentions(1); + _extended = true; +} + inline oop ObjectMonitor::object_peek() const { if (_object.is_null()) { return nullptr; diff --git a/src/hotspot/share/runtime/osThread.hpp b/src/hotspot/share/runtime/osThread.hpp index b0e0588a6a2b6..597cf8e4d3fcb 100644 --- a/src/hotspot/share/runtime/osThread.hpp +++ b/src/hotspot/share/runtime/osThread.hpp @@ -25,111 +25,8 @@ #ifndef SHARE_RUNTIME_OSTHREAD_HPP #define SHARE_RUNTIME_OSTHREAD_HPP -#include "runtime/frame.hpp" -#include "runtime/handles.hpp" -#include "runtime/javaFrameAnchor.hpp" -#include "runtime/objectMonitor.hpp" -#include "runtime/suspendedThreadTask.hpp" #include "utilities/macros.hpp" - -#if defined(LINUX) || defined(AIX) || defined(BSD) -#include "suspendResume_posix.hpp" -#endif - -class Monitor; - -// The OSThread class holds OS-specific thread information. It is equivalent -// to the sys_thread_t structure of the classic JVM implementation. - -// The thread states represented by the ThreadState values are platform-specific -// and are likely to be only approximate, because most OSes don't give you access -// to precise thread state information. - -// Note: the ThreadState is legacy code and is not correctly implemented. -// Uses of ThreadState need to be replaced by the state in the JavaThread. - -enum ThreadState { - ALLOCATED, // Memory has been allocated but not initialized - INITIALIZED, // The thread has been initialized but yet started - RUNNABLE, // Has been started and is runnable, but not necessarily running - MONITOR_WAIT, // Waiting on a contended monitor lock - CONDVAR_WAIT, // Waiting on a condition variable - OBJECT_WAIT, // Waiting on an Object.wait() call - BREAKPOINTED, // Suspended at breakpoint - SLEEPING, // Thread.sleep() - ZOMBIE // All done, but not reclaimed yet -}; - -typedef int (*OSThreadStartFunc)(void*); - -class OSThread: public CHeapObj { - friend class VMStructs; - friend class JVMCIVMStructs; - private: - volatile ThreadState _state; // Thread state *hint* - - // Methods - public: - void set_state(ThreadState state) { _state = state; } - ThreadState get_state() { return _state; } - - OSThread(); - ~OSThread(); - - // Printing - void print_on(outputStream* st) const; - void print() const; - - // Platform dependent stuff +// The actual class declaration is platform specific. #include OS_HEADER(osThread) - public: - - thread_id_t thread_id() const { return _thread_id; } - - void set_thread_id(thread_id_t id) { _thread_id = id; } - - private: - // _thread_id is kernel thread id (similar to LWP id on Solaris). Each - // thread has a unique thread_id (BsdThreads or NPTL). It can be used - // to access /proc. - thread_id_t _thread_id; -}; - - -// Utility class for use with condition variables: -class OSThreadWaitState : public StackObj { - OSThread* _osthread; - ThreadState _old_state; - public: - OSThreadWaitState(OSThread* osthread, bool is_object_wait) { - _osthread = osthread; - _old_state = osthread->get_state(); - if (is_object_wait) { - osthread->set_state(OBJECT_WAIT); - } else { - osthread->set_state(CONDVAR_WAIT); - } - } - ~OSThreadWaitState() { - _osthread->set_state(_old_state); - } -}; - - -// Utility class for use with contended monitors: -class OSThreadContendState : public StackObj { - OSThread* _osthread; - ThreadState _old_state; - public: - OSThreadContendState(OSThread* osthread) { - _osthread = osthread; - _old_state = osthread->get_state(); - osthread->set_state(MONITOR_WAIT); - } - ~OSThreadContendState() { - _osthread->set_state(_old_state); - } -}; - #endif // SHARE_RUNTIME_OSTHREAD_HPP diff --git a/src/hotspot/share/runtime/osThread.cpp b/src/hotspot/share/runtime/osThreadBase.cpp similarity index 87% rename from src/hotspot/share/runtime/osThread.cpp rename to src/hotspot/share/runtime/osThreadBase.cpp index edaefaa1070d2..7bb7ae6aa69f8 100644 --- a/src/hotspot/share/runtime/osThread.cpp +++ b/src/hotspot/share/runtime/osThreadBase.cpp @@ -24,19 +24,11 @@ #include "precompiled.hpp" #include "oops/oop.inline.hpp" -#include "runtime/osThread.hpp" - -OSThread::OSThread() { - pd_initialize(); -} - -OSThread::~OSThread() { - pd_destroy(); -} +#include "runtime/osThreadBase.hpp" // Printing -void OSThread::print_on(outputStream *st) const { - st->print("nid=" UINT64_FORMAT " ", (uint64_t)thread_id()); +void OSThreadBase::print_on(outputStream *st) const { + st->print("nid=" UINTX_FORMAT " ", thread_id_for_printing()); switch (_state) { case ALLOCATED: st->print("allocated "); break; case INITIALIZED: st->print("initialized "); break; @@ -51,4 +43,4 @@ void OSThread::print_on(outputStream *st) const { } } -void OSThread::print() const { print_on(tty); } +void OSThreadBase::print() const { print_on(tty); } diff --git a/src/hotspot/share/runtime/osThreadBase.hpp b/src/hotspot/share/runtime/osThreadBase.hpp new file mode 100644 index 0000000000000..4063da18519d5 --- /dev/null +++ b/src/hotspot/share/runtime/osThreadBase.hpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_RUNTIME_OSTHREAD_BASE_HPP +#define SHARE_RUNTIME_OSTHREAD_BASE_HPP + +#include "memory/allocation.hpp" + +class Monitor; + +// The OSThread class holds OS-specific thread information. It is equivalent +// to the sys_thread_t structure of the classic JVM implementation. + +// The thread states represented by the ThreadState values are platform-specific +// and are likely to be only approximate, because most OSes don't give you access +// to precise thread state information. + +// Note: the ThreadState is legacy code and is not correctly implemented. +// Uses of ThreadState need to be replaced by the state in the JavaThread. + +enum ThreadState { + ALLOCATED, // Memory has been allocated but not initialized + INITIALIZED, // The thread has been initialized but yet started + RUNNABLE, // Has been started and is runnable, but not necessarily running + MONITOR_WAIT, // Waiting on a contended monitor lock + CONDVAR_WAIT, // Waiting on a condition variable + OBJECT_WAIT, // Waiting on an Object.wait() call + BREAKPOINTED, // Suspended at breakpoint + SLEEPING, // Thread.sleep() + ZOMBIE // All done, but not reclaimed yet +}; + +typedef int (*OSThreadStartFunc)(void*); + +class OSThreadBase: public CHeapObj { + friend class VMStructs; + friend class JVMCIVMStructs; + private: + volatile ThreadState _state; // Thread state *hint* + + // Methods + public: + OSThreadBase() {} + virtual ~OSThreadBase() {} + NONCOPYABLE(OSThreadBase); + + void set_state(ThreadState state) { _state = state; } + ThreadState get_state() { return _state; } + + + virtual uintx thread_id_for_printing() const = 0; + + // Printing + void print_on(outputStream* st) const; + void print() const; +}; + + +// Utility class for use with condition variables: +class OSThreadWaitState : public StackObj { + OSThreadBase* _osthread; + ThreadState _old_state; + public: + OSThreadWaitState(OSThreadBase* osthread, bool is_object_wait) { + _osthread = osthread; + _old_state = osthread->get_state(); + if (is_object_wait) { + osthread->set_state(OBJECT_WAIT); + } else { + osthread->set_state(CONDVAR_WAIT); + } + } + ~OSThreadWaitState() { + _osthread->set_state(_old_state); + } +}; + + +// Utility class for use with contended monitors: +class OSThreadContendState : public StackObj { + OSThreadBase* _osthread; + ThreadState _old_state; + public: + OSThreadContendState(OSThreadBase* osthread) { + _osthread = osthread; + _old_state = osthread->get_state(); + osthread->set_state(MONITOR_WAIT); + } + ~OSThreadContendState() { + _osthread->set_state(_old_state); + } +}; + +#endif // SHARE_RUNTIME_OSTHREAD_BASE_HPP diff --git a/src/hotspot/share/runtime/reflection.cpp b/src/hotspot/share/runtime/reflection.cpp index ab3d82ad7e2a4..15172b7f4c3d8 100644 --- a/src/hotspot/share/runtime/reflection.cpp +++ b/src/hotspot/share/runtime/reflection.cpp @@ -766,10 +766,10 @@ static Handle new_type(Symbol* signature, Klass* k, TRAPS) { } oop Reflection::new_method(const methodHandle& method, bool for_constant_pool_access, TRAPS) { - // Allow sun.reflect.ConstantPool to refer to methods as java.lang.reflect.Methods. - assert(!method()->is_initializer() || - (for_constant_pool_access && method()->is_static()), - "should call new_constructor instead"); + // Allow jdk.internal.reflect.ConstantPool to refer to methods as java.lang.reflect.Methods. + assert(!method()->is_object_initializer() && + (for_constant_pool_access || !method()->is_static_initializer()), + "Should not be the initializer"); InstanceKlass* holder = method->method_holder(); int slot = method->method_idnum(); @@ -817,7 +817,7 @@ oop Reflection::new_method(const methodHandle& method, bool for_constant_pool_ac oop Reflection::new_constructor(const methodHandle& method, TRAPS) { - assert(method()->is_initializer(), "should call new_method instead"); + assert(method()->is_object_initializer(), "Should be the initializer"); InstanceKlass* holder = method->method_holder(); int slot = method->method_idnum(); diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index d9b38133f9944..e4d4e6aea0f8c 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -651,7 +651,7 @@ void SharedRuntime::throw_and_post_jvmti_exception(JavaThread* current, Handle h } #if INCLUDE_JVMCI - if (EnableJVMCI && UseJVMCICompiler) { + if (EnableJVMCI) { vframeStream vfst(current, true); methodHandle method = methodHandle(current, vfst.method()); int bci = vfst.bci(); @@ -1963,6 +1963,26 @@ void SharedRuntime::monitor_exit_helper(oopDesc* obj, BasicLock* lock, JavaThrea assert(JavaThread::current() == current, "invariant"); // Exit must be non-blocking, and therefore no exceptions can be thrown. ExceptionMark em(current); + + // Check if C2_MacroAssembler::fast_unlock() or + // C2_MacroAssembler::fast_unlock_lightweight() unlocked an inflated + // monitor before going slow path. Since there is no safepoint + // polling when calling into the VM, we can be sure that the monitor + // hasn't been deallocated. + ObjectMonitor* m = current->unlocked_inflated_monitor(); + if (m != nullptr) { + assert(m->owner_raw() != current, "must be"); + current->clear_unlocked_inflated_monitor(); + + // We need to reacquire the lock before we can call ObjectSynchronizer::exit(). + if (!m->try_enter(current, /*check_for_recursion*/ false)) { + // Some other thread acquired the lock (or the monitor was + // deflated). Either way we are done. + current->dec_held_monitor_count(); + return; + } + } + // The object could become unlocked through a JNI call, which we have no other checks for. // Give a fatal message if CheckJNICalls. Otherwise we ignore it. if (obj->is_unlocked()) { diff --git a/src/hotspot/share/runtime/statSampler.cpp b/src/hotspot/share/runtime/statSampler.cpp index 5fd038bf845c1..bbd8d3096bba0 100644 --- a/src/hotspot/share/runtime/statSampler.cpp +++ b/src/hotspot/share/runtime/statSampler.cpp @@ -201,7 +201,8 @@ void StatSampler::assert_system_property(const char* name, const char* value, TR // convert Java String to utf8 string char* system_value = java_lang_String::as_utf8_string(value_oop); - assert(strcmp(value, system_value) == 0, "property value mustn't differ from System.getProperty"); + assert(strcmp(value, system_value) == 0, "property value mustn't differ from System.getProperty. Our value is: %s, System.getProperty is: %s", + value, system_value); #endif // ASSERT } diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp index c13f64fca4bed..c881b64b59280 100644 --- a/src/hotspot/share/runtime/stubRoutines.cpp +++ b/src/hotspot/share/runtime/stubRoutines.cpp @@ -171,12 +171,13 @@ address StubRoutines::_dlibm_sin_cos_huge = nullptr; address StubRoutines::_dlibm_reduce_pi04l = nullptr; address StubRoutines::_dlibm_tan_cot_huge = nullptr; address StubRoutines::_dtan = nullptr; +address StubRoutines::_dtanh = nullptr; address StubRoutines::_f2hf = nullptr; address StubRoutines::_hf2f = nullptr; -address StubRoutines::_vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}}; -address StubRoutines::_vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}}; +address StubRoutines::_vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH] = {{nullptr}, {nullptr}}; +address StubRoutines::_vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH] = {{nullptr}, {nullptr}}; address StubRoutines::_method_entry_barrier = nullptr; address StubRoutines::_array_sort = nullptr; @@ -187,6 +188,7 @@ address StubRoutines::_cont_returnBarrier = nullptr; address StubRoutines::_cont_returnBarrierExc = nullptr; address StubRoutines::_upcall_stub_exception_handler = nullptr; +address StubRoutines::_upcall_stub_load_target = nullptr; address StubRoutines::_lookup_secondary_supers_table_slow_path_stub = nullptr; address StubRoutines::_lookup_secondary_supers_table_stubs[Klass::SECONDARY_SUPERS_TABLE_SIZE] = { nullptr }; diff --git a/src/hotspot/share/runtime/stubRoutines.hpp b/src/hotspot/share/runtime/stubRoutines.hpp index f5b932569be81..f025742b60585 100644 --- a/src/hotspot/share/runtime/stubRoutines.hpp +++ b/src/hotspot/share/runtime/stubRoutines.hpp @@ -281,6 +281,7 @@ class StubRoutines: AllStatic { static address _dlibm_reduce_pi04l; static address _dlibm_tan_cot_huge; static address _dtan; + static address _dtanh; static address _fmod; static address _f2hf; @@ -293,10 +294,11 @@ class StubRoutines: AllStatic { static address _cont_returnBarrierExc; // Vector Math Routines - static address _vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP]; - static address _vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP]; + static address _vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH]; + static address _vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH]; static address _upcall_stub_exception_handler; + static address _upcall_stub_load_target; static address _lookup_secondary_supers_table_stubs[]; static address _lookup_secondary_supers_table_slow_path_stub; @@ -472,6 +474,7 @@ class StubRoutines: AllStatic { static address dlibm_sin_cos_huge() { return _dlibm_sin_cos_huge; } static address dlibm_tan_cot_huge() { return _dlibm_tan_cot_huge; } static address dtan() { return _dtan; } + static address dtanh() { return _dtanh; } // These are versions of the java.lang.Float::floatToFloat16() and float16ToFloat() // methods which perform the same operations as the intrinsic version. @@ -504,6 +507,11 @@ class StubRoutines: AllStatic { return _upcall_stub_exception_handler; } + static address upcall_stub_load_target() { + assert(_upcall_stub_load_target != nullptr, "not implemented"); + return _upcall_stub_load_target; + } + static address lookup_secondary_supers_table_stub(u1 slot) { assert(slot < Klass::SECONDARY_SUPERS_TABLE_SIZE, "out of bounds"); assert(_lookup_secondary_supers_table_stubs[slot] != nullptr, "not implemented"); diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp index 7c01c8d4e16b3..df6a660a0aa21 100644 --- a/src/hotspot/share/runtime/thread.cpp +++ b/src/hotspot/share/runtime/thread.cpp @@ -83,7 +83,6 @@ Thread::Thread(MemTag mem_tag) { set_handle_area(new (mem_tag) HandleArea(mem_tag, nullptr)); set_metadata_handles(new (mtClass) GrowableArray(30, mtClass)); set_last_handle_mark(nullptr); - DEBUG_ONLY(_missed_ic_stub_refill_verifier = nullptr); // Initial value of zero ==> never claimed. _threads_do_token = 0; @@ -145,6 +144,16 @@ Thread::Thread(MemTag mem_tag) { MACOS_AARCH64_ONLY(DEBUG_ONLY(_wx_init = false)); } +#ifdef ASSERT +address Thread::stack_base() const { + // Note: can't report Thread::name() here as that can require a ResourceMark which we + // can't use because this gets called too early in the thread initialization. + assert(_stack_base != nullptr, "Stack base not yet set for thread id:%d (0 if not set)", + osthread() != nullptr ? osthread()->thread_id() : 0); + return _stack_base; +} +#endif + void Thread::initialize_tlab() { if (UseTLAB) { tlab().initialize(); diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp index 567a76d0eadea..45c39eae151d2 100644 --- a/src/hotspot/share/runtime/thread.hpp +++ b/src/hotspot/share/runtime/thread.hpp @@ -46,7 +46,6 @@ class CompilerThread; class HandleArea; class HandleMark; -class ICRefillVerifier; class JvmtiRawMonitor; class NMethodClosure; class Metadata; @@ -242,20 +241,6 @@ class Thread: public ThreadShadow { public: void set_last_handle_mark(HandleMark* mark) { _last_handle_mark = mark; } HandleMark* last_handle_mark() const { return _last_handle_mark; } - private: - -#ifdef ASSERT - ICRefillVerifier* _missed_ic_stub_refill_verifier; - - public: - ICRefillVerifier* missed_ic_stub_refill_verifier() { - return _missed_ic_stub_refill_verifier; - } - - void set_missed_ic_stub_refill_verifier(ICRefillVerifier* verifier) { - _missed_ic_stub_refill_verifier = verifier; - } -#endif // ASSERT private: // Used by SkipGCALot class. @@ -532,7 +517,7 @@ class Thread: public ThreadShadow { public: // Stack overflow support - address stack_base() const { assert(_stack_base != nullptr,"Sanity check"); return _stack_base; } + address stack_base() const DEBUG_ONLY(;) NOT_DEBUG({ return _stack_base; }) void set_stack_base(address base) { _stack_base = base; } size_t stack_size() const { return _stack_size; } void set_stack_size(size_t size) { _stack_size = size; } diff --git a/src/hotspot/share/runtime/threads.cpp b/src/hotspot/share/runtime/threads.cpp index 8266bd86a9682..45aaa769856af 100644 --- a/src/hotspot/share/runtime/threads.cpp +++ b/src/hotspot/share/runtime/threads.cpp @@ -1028,7 +1028,9 @@ void Threads::add(JavaThread* p, bool force_daemon) { void Threads::remove(JavaThread* p, bool is_daemon) { // Extra scope needed for Thread_lock, so we can check // that we do not remove thread without safepoint code notice - { MonitorLocker ml(Threads_lock); + { + ConditionalMutexLocker throttle_ml(ThreadsLockThrottle_lock, UseThreadsLockThrottleLock); + MonitorLocker ml(Threads_lock); if (ThreadIdTable::is_initialized()) { // This cleanup must be done before the current thread's GC barrier @@ -1076,7 +1078,7 @@ void Threads::remove(JavaThread* p, bool is_daemon) { // Notify threads waiting in EscapeBarriers EscapeBarrier::thread_removed(p); - } // unlock Threads_lock + } // unlock Threads_lock and ThreadsLockThrottle_lock // Reduce the ObjectMonitor ceiling for the exiting thread. ObjectSynchronizer::dec_in_use_list_ceiling(); diff --git a/src/hotspot/share/runtime/vmOperation.hpp b/src/hotspot/share/runtime/vmOperation.hpp index 532a9231b70e8..eede52f00d566 100644 --- a/src/hotspot/share/runtime/vmOperation.hpp +++ b/src/hotspot/share/runtime/vmOperation.hpp @@ -109,7 +109,6 @@ template(PrintCompileQueue) \ template(PrintClassHierarchy) \ template(PrintClasses) \ - template(ICBufferFull) \ template(PrintMetadata) \ template(GTestExecuteAtSafepoint) \ template(GTestStopSafepoint) \ diff --git a/src/hotspot/share/runtime/vmOperations.hpp b/src/hotspot/share/runtime/vmOperations.hpp index 5ccf689eaf3d9..ea7f62df37db8 100644 --- a/src/hotspot/share/runtime/vmOperations.hpp +++ b/src/hotspot/share/runtime/vmOperations.hpp @@ -60,12 +60,6 @@ class VM_ForceSafepoint: public VM_EmptyOperation { VMOp_Type type() const { return VMOp_ForceSafepoint; } }; -// empty vm op, when forcing a safepoint due to inline cache buffers being full -class VM_ICBufferFull: public VM_EmptyOperation { - public: - VMOp_Type type() const { return VMOp_ICBufferFull; } -}; - class VM_ClearICs: public VM_Operation { private: bool _preserve_static_stubs; diff --git a/src/hotspot/share/services/heapDumper.cpp b/src/hotspot/share/services/heapDumper.cpp index 5b3749381a01b..10e1a804ad213 100644 --- a/src/hotspot/share/services/heapDumper.cpp +++ b/src/hotspot/share/services/heapDumper.cpp @@ -1512,6 +1512,38 @@ class ClassDumper : public KlassClosure { } }; +// Support class used to generate HPROF_LOAD_CLASS records + +class LoadedClassDumper : public LockedClassesDo { + private: + AbstractDumpWriter* _writer; + GrowableArray* _klass_map; + u4 _class_serial_num; + AbstractDumpWriter* writer() const { return _writer; } + void add_class_serial_number(Klass* k, int serial_num) { + _klass_map->at_put_grow(serial_num, k); + } + public: + LoadedClassDumper(AbstractDumpWriter* writer, GrowableArray* klass_map) + : _writer(writer), _klass_map(klass_map), _class_serial_num(0) {} + + void do_klass(Klass* k) { + // len of HPROF_LOAD_CLASS record + u4 remaining = 2 * oopSize + 2 * sizeof(u4); + DumperSupport::write_header(writer(), HPROF_LOAD_CLASS, remaining); + // class serial number is just a number + writer()->write_u4(++_class_serial_num); + // class ID + writer()->write_classID(k); + // add the Klass* and class serial number pair + add_class_serial_number(k, _class_serial_num); + writer()->write_u4(STACK_TRACE_ID); + // class name ID + Symbol* name = k->name(); + writer()->write_symbolID(name); + } +}; + // Support class used to generate HPROF_GC_ROOT_JNI_LOCAL records class JNILocalsDumper : public OopClosure { @@ -2190,9 +2222,7 @@ void DumpMerger::do_merge() { // The VM operation that performs the heap dump class VM_HeapDumper : public VM_GC_Operation, public WorkerTask, public UnmountedVThreadDumper { private: - static VM_HeapDumper* _global_dumper; - static DumpWriter* _global_writer; - DumpWriter* _local_writer; + DumpWriter* _writer; JavaThread* _oome_thread; Method* _oome_constructor; bool _gc_before_heap_dump; @@ -2218,33 +2248,13 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask, public Unmounte return Atomic::fetch_then_add(&_dump_seq, 1); } - // accessors and setters - static VM_HeapDumper* dumper() { assert(_global_dumper != nullptr, "Error"); return _global_dumper; } - static DumpWriter* writer() { assert(_global_writer != nullptr, "Error"); return _global_writer; } - - void set_global_dumper() { - assert(_global_dumper == nullptr, "Error"); - _global_dumper = this; - } - void set_global_writer() { - assert(_global_writer == nullptr, "Error"); - _global_writer = _local_writer; - } - void clear_global_dumper() { _global_dumper = nullptr; } - void clear_global_writer() { _global_writer = nullptr; } + DumpWriter* writer() const { return _writer; } bool skip_operation() const; - // writes a HPROF_LOAD_CLASS record to global writer - static void do_load_class(Klass* k); - // HPROF_GC_ROOT_THREAD_OBJ records for platform and mounted virtual threads void dump_threads(AbstractDumpWriter* writer); - void add_class_serial_number(Klass* k, int serial_num) { - _klass_map->at_put_grow(serial_num, k); - } - bool is_oom_thread(JavaThread* thread) const { return thread == _oome_thread && _oome_constructor != nullptr; } @@ -2259,7 +2269,7 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask, public Unmounte 0 /* total full collections, dummy, ignored */, gc_before_heap_dump), WorkerTask("dump heap") { - _local_writer = writer; + _writer = writer; _gc_before_heap_dump = gc_before_heap_dump; _klass_map = new (mtServiceability) GrowableArray(INITIAL_CLASS_COUNT, mtServiceability); @@ -2313,9 +2323,6 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask, public Unmounte void dump_vthread(oop vt, AbstractDumpWriter* segment_writer); }; -VM_HeapDumper* VM_HeapDumper::_global_dumper = nullptr; -DumpWriter* VM_HeapDumper::_global_writer = nullptr; - bool VM_HeapDumper::skip_operation() const { return false; } @@ -2329,31 +2336,6 @@ void DumperSupport::end_of_dump(AbstractDumpWriter* writer) { writer->write_u4(0); } -// writes a HPROF_LOAD_CLASS record for the class -void VM_HeapDumper::do_load_class(Klass* k) { - static u4 class_serial_num = 0; - - // len of HPROF_LOAD_CLASS record - u4 remaining = 2*oopSize + 2*sizeof(u4); - - DumperSupport::write_header(writer(), HPROF_LOAD_CLASS, remaining); - - // class serial number is just a number - writer()->write_u4(++class_serial_num); - - // class ID - writer()->write_classID(k); - - // add the Klass* and class serial number pair - dumper()->add_class_serial_number(k, class_serial_num); - - writer()->write_u4(STACK_TRACE_ID); - - // class name ID - Symbol* name = k->name(); - writer()->write_symbolID(name); -} - // Write a HPROF_GC_ROOT_THREAD_OBJ record for platform/carrier and mounted virtual threads. // Then walk the stack so that locals and JNI locals are dumped. void VM_HeapDumper::dump_threads(AbstractDumpWriter* writer) { @@ -2430,11 +2412,6 @@ void VM_HeapDumper::doit() { } } - // At this point we should be the only dumper active, so - // the following should be safe. - set_global_dumper(); - set_global_writer(); - WorkerThreads* workers = ch->safepoint_workers(); prepare_parallel_dump(workers); @@ -2446,10 +2423,6 @@ void VM_HeapDumper::doit() { workers->run_task(this, _num_dumper_threads); _poi = nullptr; } - - // Now we clear the global variables, so that a future dumper can run. - clear_global_dumper(); - clear_global_writer(); } void VM_HeapDumper::work(uint worker_id) { @@ -2480,8 +2453,8 @@ void VM_HeapDumper::work(uint worker_id) { // write HPROF_LOAD_CLASS records { - LockedClassesDo locked_load_classes(&do_load_class); - ClassLoaderDataGraph::classes_do(&locked_load_classes); + LoadedClassDumper loaded_class_dumper(writer(), _klass_map); + ClassLoaderDataGraph::classes_do(&loaded_class_dumper); } // write HPROF_FRAME and HPROF_TRACE records diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp index 1034dec0d9aaa..23094c9e8c4ac 100644 --- a/src/hotspot/share/utilities/macros.hpp +++ b/src/hotspot/share/utilities/macros.hpp @@ -338,6 +338,7 @@ #define NOT_PRODUCT_ARG(arg) #define PRODUCT_RETURN {} #define PRODUCT_RETURN0 { return 0; } +#define PRODUCT_RETURN_NULL { return nullptr; } #define PRODUCT_RETURN_(code) { code } #else // PRODUCT #define PRODUCT_ONLY(code) @@ -345,6 +346,7 @@ #define NOT_PRODUCT_ARG(arg) arg, #define PRODUCT_RETURN /*next token must be ;*/ #define PRODUCT_RETURN0 /*next token must be ;*/ +#define PRODUCT_RETURN_NULL /* next token must be ;*/ #define PRODUCT_RETURN_(code) /*next token must be ;*/ #endif // PRODUCT diff --git a/src/java.base/macosx/native/libjli/java_md_macosx.m b/src/java.base/macosx/native/libjli/java_md_macosx.m index 4ac2f2c10a215..7aeb32be859f2 100644 --- a/src/java.base/macosx/native/libjli/java_md_macosx.m +++ b/src/java.base/macosx/native/libjli/java_md_macosx.m @@ -60,115 +60,79 @@ #define LD_LIBRARY_PATH "DYLD_FALLBACK_LIBRARY_PATH" /* - * If a processor / os combination has the ability to run binaries of - * two data models and cohabitation of jre/jdk bits with both data - * models is supported, then DUAL_MODE is defined. MacOSX is a hybrid - * system in that, the universal library can contain all types of libraries - * 32/64 and client/server, thus the spawn is capable of linking with the - * appropriate library as requested. + * Following is the high level flow of the launcher + * code residing in the common java.c and this + * macosx specific java_md_macosx file: * - * Notes: - * 1. VM. DUAL_MODE is disabled, and not supported, however, it is left here in - * for experimentation and perhaps enable it in the future. - * 2. At the time of this writing, the universal library contains only - * a server 64-bit server JVM. - * 3. "-client" command line option is supported merely as a command line flag, - * for, compatibility reasons, however, a server VM will be launched. - */ - -/* - * Flowchart of launcher execs and options processing on unix + * - JLI_Launch function, which is the entry point + * to the launcher, calls CreateExecutionEnvironment. + * + * - CreateExecutionEnvironment does the following + * (not necessarily in this order): + * - determines the relevant JVM type that needs + * to be ultimately created + * - determines the path and asserts the presence + * of libjava and relevant libjvm library + * - removes any JVM selection options from the + * arguments that were passed to the launcher + * + * - CreateExecutionEnvironment then creates a new + * thread, within the same process, to launch the + * application's main() Java method and parks the + * current thread, on which CreateExecutionEnvironment + * was invoked, in Apple's Cocoa event loop. Before + * doing so, CreateExecutionEnvironment maintains a + * state flag to keep note that a new thread has + * been spawned. + * + * - The newly created thread (in which the application's + * main() method will ultimately run) starts right from + * the beginning of the current process' main function, + * which effectively means that JLI_Launch is re-invoked + * on this new thread and the same above sequence of code + * flow repeats again. During this "recursive" call, when + * at the point of creating a new thread in + * CreateExecutionEnvironment, the CreateExecutionEnvironment + * will check for the state flag to see if a new thread + * has already been spawned and upon noticing that it + * has, it will skip spawning any more threads and will + * return back from CreateExecutionEnvironment. + * + * - The control returns back from CreateExecutionEnvironment + * to JLI_Launch, and the thread on which the control + * returns is the thread on which the application's main() + * Java method will be invoked. + * + * - JLI_Launch then invokes LoadJavaVM which dlopen()s the + * JVM library and asserts the presence of JNI Invocation + * Functions "JNI_CreateJavaVM", "JNI_GetDefaultJavaVMInitArgs" + * and "JNI_GetCreatedJavaVMs" in that library. It then sets + * internal function pointers in the launcher to point to + * those functions. + * + * - JLI_Launch then translates any -J options by invoking + * TranslateApplicationArgs. + * + * - JLI_Launch then invokes ParseArguments to parse/process + * the launcher arguments. * - * The selection of the proper vm shared library to open depends on - * several classes of command line options, including vm "flavor" - * options (-client, -server) and the data model options, -d32 and - * -d64, as well as a version specification which may have come from - * the command line or from the manifest of an executable jar file. - * The vm selection options are not passed to the running - * virtual machine; they must be screened out by the launcher. + * - JLI_Launch then ultimately calls JVMInit. * - * The version specification (if any) is processed first by the - * platform independent routine SelectVersion. This may result in - * the exec of the specified launcher version. + * - JVMInit then invokes JavaMain. * - * Now, in most cases,the launcher will dlopen the target libjvm.so. All - * required libraries are loaded by the runtime linker, using the known paths - * baked into the shared libraries at compile time. Therefore, - * in most cases, the launcher will only exec, if the data models are - * mismatched, and will not set any environment variables, regardless of the - * data models. + * - JavaMain, before launching the application, invokes + * PostJVMInit. * + * - PostJVMInit invokes ShowSplashScreen which displays + * a splash screen for the application, if applicable. * + * - Control then returns back from PostJVMInit into + * JavaMain, which then loads the application's main + * class and invokes the relevant main() Java method. * - * Main - * (incoming argv) - * | - * \|/ - * CreateExecutionEnvironment - * (determines desired data model) - * | - * | - * \|/ - * Have Desired Model ? --> NO --> Is Dual-Mode ? --> NO --> Exit(with error) - * | | - * | | - * | \|/ - * | YES - * | | - * | | - * | \|/ - * | CheckJvmType - * | (removes -client, -server etc.) - * | | - * | | - * \|/ \|/ - * YES Find the desired executable/library - * | | - * | | - * \|/ \|/ - * CheckJvmType POINT A - * (removes -client, -server, etc.) - * | - * | - * \|/ - * TranslateDashJArgs... - * (Prepare to pass args to vm) - * | - * | - * \|/ - * ParseArguments - * (processes version options, - * creates argument list for vm, - * etc.) - * | - * | - * \|/ - * POINT A - * | - * | - * \|/ - * Path is desired JRE ? YES --> Continue - * NO - * | - * | - * \|/ - * Paths have well known - * jvm paths ? --> NO --> Continue - * YES - * | - * | - * \|/ - * Does libjvm.so exist - * in any of them ? --> NO --> Continue - * YES - * | - * | - * \|/ - * Re-exec / Spawn - * | - * | - * \|/ - * Main + * - JavaMain then returns back an integer result which + * then gets propagated as a return value all the way + * out of the JLI_Launch function. */ /* Store the name of the executable once computed */ @@ -333,6 +297,7 @@ static void ParkEventLoop() { static void MacOSXStartup(int argc, char *argv[]) { // Thread already started? static jboolean started = false; + int rc; if (started) { return; } @@ -345,12 +310,14 @@ static void MacOSXStartup(int argc, char *argv[]) { // Fire up the main thread pthread_t main_thr; - if (pthread_create(&main_thr, NULL, &apple_main, &args) != 0) { - JLI_ReportErrorMessageSys("Could not create main thread: %s\n", strerror(errno)); + rc = pthread_create(&main_thr, NULL, &apple_main, &args); + if (rc != 0) { + JLI_ReportErrorMessageSys("Could not create main thread, return code: %d\n", rc); exit(1); } - if (pthread_detach(main_thr)) { - JLI_ReportErrorMessageSys("pthread_detach() failed: %s\n", strerror(errno)); + rc = pthread_detach(main_thr); + if (rc != 0) { + JLI_ReportErrorMessage("pthread_detach() failed, return code: %d\n", rc); exit(1); } diff --git a/src/java.base/share/classes/com/sun/crypto/provider/GaloisCounterMode.java b/src/java.base/share/classes/com/sun/crypto/provider/GaloisCounterMode.java index 44cfb76d1628e..478593dfac1ac 100644 --- a/src/java.base/share/classes/com/sun/crypto/provider/GaloisCounterMode.java +++ b/src/java.base/share/classes/com/sun/crypto/provider/GaloisCounterMode.java @@ -72,7 +72,7 @@ abstract class GaloisCounterMode extends CipherSpi { // data size when buffer is divided up to aid in intrinsics private static final int TRIGGERLEN = 65536; // 64k // x86-64 parallel intrinsic data size - private static final int PARALLEL_LEN = 7680; + private static final int PARALLEL_LEN = 512; // max data size for x86-64 intrinsic private static final int SPLIT_LEN = 1048576; // 1MB diff --git a/src/java.base/share/classes/java/io/BufferedInputStream.java b/src/java.base/share/classes/java/io/BufferedInputStream.java index cfc2a3d2c75b9..c401873ce12e4 100644 --- a/src/java.base/share/classes/java/io/BufferedInputStream.java +++ b/src/java.base/share/classes/java/io/BufferedInputStream.java @@ -50,6 +50,11 @@ * reread before new bytes are taken from * the contained input stream. * + * @apiNote + * Once wrapped in a {@code BufferedInputStream}, the underlying + * {@code InputStream} should not be used directly nor wrapped with + * another stream. + * * @author Arthur van Hoff * @since 1.0 */ diff --git a/src/java.base/share/classes/java/io/BufferedOutputStream.java b/src/java.base/share/classes/java/io/BufferedOutputStream.java index ecc1e8a8a4845..687f0c91bc4f7 100644 --- a/src/java.base/share/classes/java/io/BufferedOutputStream.java +++ b/src/java.base/share/classes/java/io/BufferedOutputStream.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,6 +35,11 @@ * output stream without necessarily causing a call to the underlying * system for each byte written. * + * @apiNote + * Once wrapped in a {@code BufferedOutputStream}, the underlying + * {@code OutputStream} should not be used directly nor wrapped with + * another stream. + * * @author Arthur van Hoff * @since 1.0 */ diff --git a/src/java.base/share/classes/java/io/BufferedReader.java b/src/java.base/share/classes/java/io/BufferedReader.java index 2cd027c8bd8a2..c2f6b89e08622 100644 --- a/src/java.base/share/classes/java/io/BufferedReader.java +++ b/src/java.base/share/classes/java/io/BufferedReader.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -43,8 +43,9 @@ * *

In general, each read request made of a Reader causes a corresponding * read request to be made of the underlying character or byte stream. It is - * therefore advisable to wrap a BufferedReader around any Reader whose read() - * operations may be costly, such as FileReaders and InputStreamReaders. For + * therefore advisable to wrap a {@code BufferedReader} around any + * {@code Reader} whose {@code read()} operations may be costly, such as + * {@code FileReader}s and {@code InputStreamReader}s. For * example, * * {@snippet lang=java : @@ -52,12 +53,18 @@ * } * * will buffer the input from the specified file. Without buffering, each - * invocation of read() or readLine() could cause bytes to be read from the - * file, converted into characters, and then returned, which can be very - * inefficient. + * invocation of {@code read()} or {@code readLine()} could cause bytes to be + * read from the file, converted into characters, and then returned, which can + * be very inefficient. * - *

Programs that use DataInputStreams for textual input can be localized by - * replacing each DataInputStream with an appropriate BufferedReader. + *

Programs that use {@code DataInputStream}s for textual input can be + * localized by replacing each {@code DataInputStream} with an appropriate + * {@code BufferedReader}. + * + * @apiNote + * Once wrapped in a {@code BufferedReader}, the underlying + * {@code Reader} should not be used directly nor wrapped with + * another reader. * * @see FileReader * @see InputStreamReader diff --git a/src/java.base/share/classes/java/io/BufferedWriter.java b/src/java.base/share/classes/java/io/BufferedWriter.java index 4904f7180724a..17862a265ae82 100644 --- a/src/java.base/share/classes/java/io/BufferedWriter.java +++ b/src/java.base/share/classes/java/io/BufferedWriter.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -37,25 +37,31 @@ *

The buffer size may be specified, or the default size may be accepted. * The default is large enough for most purposes. * - *

A newLine() method is provided, which uses the platform's own notion of - * line separator as defined by the system property {@code line.separator}. - * Not all platforms use the newline character ('\n') to terminate lines. - * Calling this method to terminate each output line is therefore preferred to - * writing a newline character directly. + *

A {@code newLine()} method is provided, which uses the platform's own + * notion of line separator as defined by the system property + * {@linkplain System#lineSeparator() line.separator}. Not all platforms use the newline character ('\n') + * to terminate lines. Calling this method to terminate each output line is + * therefore preferred to writing a newline character directly. * - *

In general, a Writer sends its output immediately to the underlying - * character or byte stream. Unless prompt output is required, it is advisable - * to wrap a BufferedWriter around any Writer whose write() operations may be - * costly, such as FileWriters and OutputStreamWriters. For example, + *

In general, a {@code Writer} sends its output immediately to the + * underlying character or byte stream. Unless prompt output is required, it + * is advisable to wrap a {@code BufferedWriter} around any {@code Writer} whose + * {@code write()} operations may be costly, such as {@code FileWriter}s and + * {@code OutputStreamWriter}s. For example, * * {@snippet lang=java : * PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("foo.out"))); * } * - * will buffer the PrintWriter's output to the file. Without buffering, each - * invocation of a print() method would cause characters to be converted into - * bytes that would then be written immediately to the file, which can be very - * inefficient. + * will buffer the {@code PrintWriter}'s output to the file. Without buffering, + * each invocation of a {@code print()} method would cause characters to be + * converted into bytes that would then be written immediately to the file, + * which can be very inefficient. + * + * @apiNote + * Once wrapped in a {@code BufferedWriter}, the underlying + * {@code Writer} should not be used directly nor wrapped with + * another writer. * * @see PrintWriter * @see FileWriter diff --git a/src/java.base/share/classes/java/io/DataOutputStream.java b/src/java.base/share/classes/java/io/DataOutputStream.java index d16ae73f913bf..4b22d65bd39fa 100644 --- a/src/java.base/share/classes/java/io/DataOutputStream.java +++ b/src/java.base/share/classes/java/io/DataOutputStream.java @@ -1,5 +1,6 @@ /* - * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,8 +26,13 @@ package java.io; +import jdk.internal.access.JavaLangAccess; +import jdk.internal.access.SharedSecrets; import jdk.internal.util.ByteArray; +import static jdk.internal.util.ModifiedUtf.putChar; +import static jdk.internal.util.ModifiedUtf.utfLen; + /** * A data output stream lets an application write primitive Java data * types to an output stream in a portable way. An application can @@ -44,6 +50,8 @@ * @since 1.0 */ public class DataOutputStream extends FilterOutputStream implements DataOutput { + private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); + /** * The number of bytes written to the data output stream so far. * If this counter overflows, it will be wrapped to Integer.MAX_VALUE. @@ -352,15 +360,11 @@ public final void writeUTF(String str) throws IOException { * {@code str} would exceed 65535 bytes in length * @throws IOException if some other I/O error occurs. */ + @SuppressWarnings("deprecation") static int writeUTF(String str, DataOutput out) throws IOException { final int strlen = str.length(); - int utflen = strlen; // optimized for ASCII - - for (int i = 0; i < strlen; i++) { - int c = str.charAt(i); - if (c >= 0x80 || c == 0) - utflen += (c >= 0x800) ? 2 : 1; - } + int countNonZeroAscii = JLA.countNonZeroAscii(str); + int utflen = utfLen(str, countNonZeroAscii); if (utflen > 65535 || /* overflow */ utflen < strlen) throw new UTFDataFormatException(tooLongMsg(str, utflen)); @@ -377,25 +381,11 @@ static int writeUTF(String str, DataOutput out) throws IOException { int count = 0; ByteArray.setUnsignedShort(bytearr, count, utflen); count += 2; - int i = 0; - for (i = 0; i < strlen; i++) { // optimized for initial run of ASCII - int c = str.charAt(i); - if (c >= 0x80 || c == 0) break; - bytearr[count++] = (byte) c; - } + str.getBytes(0, countNonZeroAscii, bytearr, count); + count += countNonZeroAscii; - for (; i < strlen; i++) { - int c = str.charAt(i); - if (c < 0x80 && c != 0) { - bytearr[count++] = (byte) c; - } else if (c >= 0x800) { - bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F)); - bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F)); - bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); - } else { - bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F)); - bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); - } + for (int i = countNonZeroAscii; i < strlen;) { + count = putChar(bytearr, count, str.charAt(i++)); } out.write(bytearr, 0, utflen + 2); return utflen + 2; diff --git a/src/java.base/share/classes/java/io/FileInputStream.java b/src/java.base/share/classes/java/io/FileInputStream.java index 60b289637fdfc..180b2e416a914 100644 --- a/src/java.base/share/classes/java/io/FileInputStream.java +++ b/src/java.base/share/classes/java/io/FileInputStream.java @@ -82,10 +82,9 @@ public class FileInputStream extends InputStream private volatile boolean closed; /** - * Creates a {@code FileInputStream} by - * opening a connection to an actual file, - * the file named by the path name {@code name} - * in the file system. {@linkplain java.nio.file##links Symbolic links} + * Creates a {@code FileInputStream} to read from an existing file + * named by the path name {@code name}. + * {@linkplain java.nio.file##links Symbolic links} * are automatically redirected to the target of the link. * A new {@code FileDescriptor} * object is created to represent this file @@ -115,10 +114,8 @@ public FileInputStream(String name) throws FileNotFoundException { } /** - * Creates a {@code FileInputStream} by - * opening a connection to an actual file, - * the file named by the {@code File} - * object {@code file} in the file system. + * Creates a {@code FileInputStream} to read from an existing file + * represented by the {@code File} object {@code file}. * {@linkplain java.nio.file##links Symbolic links} * are automatically redirected to the target of the link. * A new {@code FileDescriptor} object diff --git a/src/java.base/share/classes/java/io/InputStream.java b/src/java.base/share/classes/java/io/InputStream.java index 736b6ebd90435..a87870bfce0dd 100644 --- a/src/java.base/share/classes/java/io/InputStream.java +++ b/src/java.base/share/classes/java/io/InputStream.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,8 @@ package java.io; +import jdk.internal.util.ArraysSupport; + import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -305,12 +307,9 @@ public int read(byte[] b, int off, int len) throws IOException { } /** - * The maximum size of array to allocate. - * Some VMs reserve some header words in an array. - * Attempts to allocate larger arrays may result in - * OutOfMemoryError: Requested array size exceeds VM limit + * The maximum size of array to allocate */ - private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8; + private static final int MAX_BUFFER_SIZE = ArraysSupport.SOFT_MAX_ARRAY_LENGTH; /** * Reads all remaining bytes from the input stream. This method blocks until diff --git a/src/java.base/share/classes/java/io/ObjectOutputStream.java b/src/java.base/share/classes/java/io/ObjectOutputStream.java index 3650b10135356..bde069a1774d1 100644 --- a/src/java.base/share/classes/java/io/ObjectOutputStream.java +++ b/src/java.base/share/classes/java/io/ObjectOutputStream.java @@ -1,5 +1,6 @@ /* * Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,8 +35,13 @@ import java.util.StringJoiner; import jdk.internal.util.ByteArray; +import jdk.internal.access.JavaLangAccess; +import jdk.internal.access.SharedSecrets; import sun.reflect.misc.ReflectUtil; +import static jdk.internal.util.ModifiedUtf.putChar; +import static jdk.internal.util.ModifiedUtf.utfLen; + /** * An ObjectOutputStream writes primitive data types and graphs of Java objects * to an OutputStream. The objects can be read (reconstituted) using an @@ -169,6 +175,7 @@ public class ObjectOutputStream extends OutputStream implements ObjectOutput, ObjectStreamConstants { + private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); private static class Caches { /** cache of subclass security audit results */ @@ -885,7 +892,7 @@ public void writeChars(String str) throws IOException { * stream */ public void writeUTF(String str) throws IOException { - bout.writeUTF(str); + bout.writeUTFInternal(str, false); } /** @@ -1317,14 +1324,7 @@ private void writeNonProxyDesc(ObjectStreamClass desc, boolean unshared) */ private void writeString(String str, boolean unshared) throws IOException { handles.assign(unshared ? null : str); - long utflen = bout.getUTFLength(str); - if (utflen <= 0xFFFF) { - bout.writeByte(TC_STRING); - bout.writeUTF(str, utflen); - } else { - bout.writeByte(TC_LONGSTRING); - bout.writeLongUTF(str, utflen); - } + bout.writeUTFInternal(str, true); } /** @@ -1994,26 +1994,27 @@ public void writeDouble(double v) throws IOException { } } - public void writeBytes(String s) throws IOException { - int endoff = s.length(); - int cpos = 0; - int csize = 0; - for (int off = 0; off < endoff; ) { - if (cpos >= csize) { - cpos = 0; - csize = Math.min(endoff - off, CHAR_BUF_SIZE); - s.getChars(off, off + csize, cbuf, 0); - } - if (pos >= MAX_BLOCK_SIZE) { + @SuppressWarnings("deprecation") + void writeBytes(String s, int len) throws IOException { + int pos = this.pos; + for (int strpos = 0; strpos < len;) { + int rem = MAX_BLOCK_SIZE - pos; + int csize = Math.min(len - strpos, rem); + s.getBytes(strpos, strpos + csize, buf, pos); + pos += csize; + strpos += csize; + + if (pos == MAX_BLOCK_SIZE) { + this.pos = pos; drain(); + pos = 0; } - int n = Math.min(csize - cpos, MAX_BLOCK_SIZE - pos); - int stop = pos + n; - while (pos < stop) { - buf[pos++] = (byte) cbuf[cpos++]; - } - off += n; } + this.pos = pos; + } + + public void writeBytes(String s) throws IOException { + writeBytes(s, s.length()); } public void writeChars(String s) throws IOException { @@ -2026,8 +2027,47 @@ public void writeChars(String s) throws IOException { } } - public void writeUTF(String s) throws IOException { - writeUTF(s, getUTFLength(s)); + public void writeUTF(String str) throws IOException { + writeUTFInternal(str, false); + } + + private void writeUTFInternal(String str, boolean writeHeader) throws IOException { + int strlen = str.length(); + int countNonZeroAscii = JLA.countNonZeroAscii(str); + int utflen = utfLen(str, countNonZeroAscii); + if (utflen <= 0xFFFF) { + if(writeHeader) { + writeByte(TC_STRING); + } + writeShort(utflen); + } else { + if(writeHeader) { + writeByte(TC_LONGSTRING); + } + writeLong(utflen); + } + + if (countNonZeroAscii != 0) { + writeBytes(str, countNonZeroAscii); + } + if (countNonZeroAscii != strlen) { + writeMoreUTF(str, countNonZeroAscii); + } + } + + private void writeMoreUTF(String str, int stroff) throws IOException { + int pos = this.pos; + for (int strlen = str.length(); stroff < strlen;) { + char c = str.charAt(stroff++); + int csize = c != 0 && c < 0x80 ? 1 : c >= 0x800 ? 3 : 2; + if (pos + csize >= MAX_BLOCK_SIZE) { + this.pos = pos; + drain(); + pos = 0; + } + pos = putChar(buf, pos, c); + } + this.pos = pos; } @@ -2153,112 +2193,6 @@ void writeDoubles(double[] v, int off, int len) throws IOException { } } } - - /** - * Returns the length in bytes of the UTF encoding of the given string. - */ - long getUTFLength(String s) { - int len = s.length(); - long utflen = 0; - for (int off = 0; off < len; ) { - int csize = Math.min(len - off, CHAR_BUF_SIZE); - s.getChars(off, off + csize, cbuf, 0); - for (int cpos = 0; cpos < csize; cpos++) { - char c = cbuf[cpos]; - if (c >= 0x0001 && c <= 0x007F) { - utflen++; - } else if (c > 0x07FF) { - utflen += 3; - } else { - utflen += 2; - } - } - off += csize; - } - return utflen; - } - - /** - * Writes the given string in UTF format. This method is used in - * situations where the UTF encoding length of the string is already - * known; specifying it explicitly avoids a prescan of the string to - * determine its UTF length. - */ - void writeUTF(String s, long utflen) throws IOException { - if (utflen > 0xFFFFL) { - throw new UTFDataFormatException(); - } - writeShort((int) utflen); - if (utflen == (long) s.length()) { - writeBytes(s); - } else { - writeUTFBody(s); - } - } - - /** - * Writes given string in "long" UTF format. "Long" UTF format is - * identical to standard UTF, except that it uses an 8 byte header - * (instead of the standard 2 bytes) to convey the UTF encoding length. - */ - void writeLongUTF(String s) throws IOException { - writeLongUTF(s, getUTFLength(s)); - } - - /** - * Writes given string in "long" UTF format, where the UTF encoding - * length of the string is already known. - */ - void writeLongUTF(String s, long utflen) throws IOException { - writeLong(utflen); - if (utflen == (long) s.length()) { - writeBytes(s); - } else { - writeUTFBody(s); - } - } - - /** - * Writes the "body" (i.e., the UTF representation minus the 2-byte or - * 8-byte length header) of the UTF encoding for the given string. - */ - private void writeUTFBody(String s) throws IOException { - int limit = MAX_BLOCK_SIZE - 3; - int len = s.length(); - for (int off = 0; off < len; ) { - int csize = Math.min(len - off, CHAR_BUF_SIZE); - s.getChars(off, off + csize, cbuf, 0); - for (int cpos = 0; cpos < csize; cpos++) { - char c = cbuf[cpos]; - if (pos <= limit) { - if (c <= 0x007F && c != 0) { - buf[pos++] = (byte) c; - } else if (c > 0x07FF) { - buf[pos + 2] = (byte) (0x80 | ((c >> 0) & 0x3F)); - buf[pos + 1] = (byte) (0x80 | ((c >> 6) & 0x3F)); - buf[pos + 0] = (byte) (0xE0 | ((c >> 12) & 0x0F)); - pos += 3; - } else { - buf[pos + 1] = (byte) (0x80 | ((c >> 0) & 0x3F)); - buf[pos + 0] = (byte) (0xC0 | ((c >> 6) & 0x1F)); - pos += 2; - } - } else { // write one byte at a time to normalize block - if (c <= 0x007F && c != 0) { - write(c); - } else if (c > 0x07FF) { - write(0xE0 | ((c >> 12) & 0x0F)); - write(0x80 | ((c >> 6) & 0x3F)); - write(0x80 | ((c >> 0) & 0x3F)); - } else { - write(0xC0 | ((c >> 6) & 0x1F)); - write(0x80 | ((c >> 0) & 0x3F)); - } - } - } - off += csize; - } - } } /** diff --git a/src/java.base/share/classes/java/lang/Boolean.java b/src/java.base/share/classes/java/lang/Boolean.java index ba88157dc923e..2c0925a979016 100644 --- a/src/java.base/share/classes/java/lang/Boolean.java +++ b/src/java.base/share/classes/java/lang/Boolean.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1994, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -37,10 +37,10 @@ import static java.lang.constant.ConstantDescs.CD_Boolean; /** - * The Boolean class wraps a value of the primitive type - * {@code boolean} in an object. An object of type - * {@code Boolean} contains a single field whose type is - * {@code boolean}. + * The {@code Boolean} class is the {@linkplain + * java.lang##wrapperClass wrapper class} for values of the primitive + * type {@code boolean}. An object of type {@code Boolean} contains a + * single field whose type is {@code boolean}. * *

In addition, this class provides many methods for * converting a {@code boolean} to a {@code String} and a diff --git a/src/java.base/share/classes/java/lang/Byte.java b/src/java.base/share/classes/java/lang/Byte.java index 18502abf69c4d..ade75a7a99eeb 100644 --- a/src/java.base/share/classes/java/lang/Byte.java +++ b/src/java.base/share/classes/java/lang/Byte.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,10 +39,10 @@ import static java.lang.constant.ConstantDescs.DEFAULT_NAME; /** - * - * The {@code Byte} class wraps a value of primitive type {@code byte} - * in an object. An object of type {@code Byte} contains a single - * field whose type is {@code byte}. + * The {@code Byte} class is the {@linkplain + * java.lang##wrapperClass wrapper class} for values of the primitive + * type {@code byte}. An object of type {@code Byte} contains a + * single field whose type is {@code byte}. * *

In addition, this class provides several methods for converting * a {@code byte} to a {@code String} and a {@code String} to a {@code diff --git a/src/java.base/share/classes/java/lang/Character.java b/src/java.base/share/classes/java/lang/Character.java index a829d71e11367..84db550d7cc4d 100644 --- a/src/java.base/share/classes/java/lang/Character.java +++ b/src/java.base/share/classes/java/lang/Character.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -43,12 +43,12 @@ import static java.lang.constant.ConstantDescs.DEFAULT_NAME; /** - * The {@code Character} class wraps a value of the primitive - * type {@code char} in an object. An object of class - * {@code Character} contains a single field whose type is - * {@code char}. - *

- * In addition, this class provides a large number of static methods for + * The {@code Character} class is the {@linkplain + * java.lang##wrapperClass wrapper class} for values of the primitive + * type {@code char}. An object of type {@code Character} contains a + * single field whose type is {@code char}. + * + *

In addition, this class provides a large number of static methods for * determining a character's category (lowercase letter, digit, etc.) * and for converting characters from uppercase to lowercase and vice * versa. diff --git a/src/java.base/share/classes/java/lang/Class.java b/src/java.base/share/classes/java/lang/Class.java index 48ffeea5289ff..79cd57011b0ed 100644 --- a/src/java.base/share/classes/java/lang/Class.java +++ b/src/java.base/share/classes/java/lang/Class.java @@ -140,22 +140,24 @@ * }} * * It is also possible to get the {@code Class} object for a named - * class or interface (or for {@code void}) using a class literal. + * class or interface (or for {@code void}) using a class literal + * (JLS {@jls 15.8.2}). * For example: * * {@snippet lang="java" : - * System.out.println("The name of class Foo is: "+Foo.class.getName()); + * System.out.println("The name of class Foo is: " + Foo.class.getName()); // @highlight substring="Foo.class" * } * *

Some methods of class {@code Class} expose whether the declaration of * a class or interface in Java source code was enclosed within * another declaration. Other methods describe how a class or interface - * is situated in a nest. A nest is a set of + * is situated in a {@index "nest"}. A nest is a set of * classes and interfaces, in the same run-time package, that * allow mutual access to their {@code private} members. - * The classes and interfaces are known as nestmates. + * The classes and interfaces are known as {@index "nestmates"} + * (JVMS {@jvms 4.7.29}). * One nestmate acts as the - * nest host, and enumerates the other nestmates which + * nest host (JVMS {@jvms 4.7.28}), and enumerates the other nestmates which * belong to the nest; each of them in turn records it as the nest host. * The classes and interfaces which belong to a nest, including its host, are * determined when @@ -167,7 +169,7 @@ *

Hidden Classes

* A class or interface created by the invocation of * {@link java.lang.invoke.MethodHandles.Lookup#defineHiddenClass(byte[], boolean, MethodHandles.Lookup.ClassOption...) - * Lookup::defineHiddenClass} is a {@linkplain Class#isHidden() hidden} + * Lookup::defineHiddenClass} is a {@linkplain Class#isHidden() hidden} * class or interface. * All kinds of class, including enum classes and record classes, may be * hidden classes; all kinds of interface, including annotation interfaces, @@ -216,7 +218,6 @@ * * @see java.lang.ClassLoader#defineClass(byte[], int, int) * @since 1.0 - * @jls 15.8.2 Class Literals */ public final class Class implements java.io.Serializable, GenericDeclaration, diff --git a/src/java.base/share/classes/java/lang/Double.java b/src/java.base/share/classes/java/lang/Double.java index 9b11964d9e6fa..68fff6a2fbdd2 100644 --- a/src/java.base/share/classes/java/lang/Double.java +++ b/src/java.base/share/classes/java/lang/Double.java @@ -36,10 +36,10 @@ import jdk.internal.vm.annotation.IntrinsicCandidate; /** - * The {@code Double} class wraps a value of the primitive type - * {@code double} in an object. An object of type - * {@code Double} contains a single field whose type is - * {@code double}. + * The {@code Double} class is the {@linkplain + * java.lang##wrapperClass wrapper class} for values of the primitive + * type {@code double}. An object of type {@code Double} contains a + * single field whose type is {@code double}. * *

In addition, this class provides several methods for converting a * {@code double} to a {@code String} and a @@ -148,7 +148,7 @@ * relations that can be defined over floating-point values: * *

- *
numerical equality ({@code ==} + *
{@index "numerical equality"} ({@code ==} * operator): (Not an equivalence relation)
*
Two floating-point values represent the same extended real * number. The extended real numbers are the real numbers augmented @@ -158,7 +158,7 @@ * number and is not equal to any value, including itself. *
* - *
bit-wise equivalence:
+ *
{@index "bit-wise equivalence"}:
*
The bits of the two floating-point values are the same. This * equivalence relation for {@code double} values {@code a} and {@code * b} is implemented by the expression @@ -168,7 +168,7 @@ * is distinguished from every other bit pattern encoding a NaN. *
* - *
representation equivalence:
+ *
{@index "representation equivalence"}:
*
The two floating-point values represent the same IEEE 754 * datum. In particular, for {@linkplain #isFinite(double) * finite} values, the sign, {@linkplain Math#getExponent(double) diff --git a/src/java.base/share/classes/java/lang/Float.java b/src/java.base/share/classes/java/lang/Float.java index 02b6600777331..470eb71cf701f 100644 --- a/src/java.base/share/classes/java/lang/Float.java +++ b/src/java.base/share/classes/java/lang/Float.java @@ -36,10 +36,10 @@ import jdk.internal.vm.annotation.IntrinsicCandidate; /** - * The {@code Float} class wraps a value of primitive type - * {@code float} in an object. An object of type - * {@code Float} contains a single field whose type is - * {@code float}. + * The {@code Float} class is the {@linkplain + * java.lang##wrapperClass wrapper class} for values of the primitive + * type {@code float}. An object of type {@code Float} contains a + * single field whose type is {@code float}. * *

In addition, this class provides several methods for converting a * {@code float} to a {@code String} and a diff --git a/src/java.base/share/classes/java/lang/Integer.java b/src/java.base/share/classes/java/lang/Integer.java index 7a65046181f00..eab0a942d9a9a 100644 --- a/src/java.base/share/classes/java/lang/Integer.java +++ b/src/java.base/share/classes/java/lang/Integer.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -45,9 +45,10 @@ import static java.lang.String.UTF16; /** - * The {@code Integer} class wraps a value of the primitive type - * {@code int} in an object. An object of type {@code Integer} - * contains a single field whose type is {@code int}. + * The {@code Integer} class is the {@linkplain + * java.lang##wrapperClass wrapper class} for values of the primitive + * type {@code int}. An object of type {@code Integer} contains a + * single field whose type is {@code int}. * *

In addition, this class provides several methods for converting * an {@code int} to a {@code String} and a {@code String} to an @@ -63,8 +64,9 @@ *

Implementation note: The implementations of the "bit twiddling" * methods (such as {@link #highestOneBit(int) highestOneBit} and * {@link #numberOfTrailingZeros(int) numberOfTrailingZeros}) are - * based on material from Henry S. Warren, Jr.'s Hacker's - * Delight, (Addison Wesley, 2002). + * based on material from Henry S. Warren, Jr.'s Hacker's + * Delight, (Addison Wesley, 2002) and Hacker's + * Delight, Second Edition, (Pearson Education, 2013). * * @author Lee Boynton * @author Arthur van Hoff @@ -1736,7 +1738,7 @@ public static int reverse(int i) { * compress(expand(x, m), m) == x & compress(m, m) * } *

- * The Sheep And Goats (SAG) operation (see Hacker's Delight, section 7.7) + * The Sheep And Goats (SAG) operation (see Hacker's Delight, Second Edition, section 7.7) * can be implemented as follows: * {@snippet lang="java" : * int compressLeft(int i, int mask) { diff --git a/src/java.base/share/classes/java/lang/Long.java b/src/java.base/share/classes/java/lang/Long.java index ee9533b29eb53..f86e1622b3836 100644 --- a/src/java.base/share/classes/java/lang/Long.java +++ b/src/java.base/share/classes/java/lang/Long.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -45,8 +45,9 @@ import static java.lang.String.UTF16; /** - * The {@code Long} class wraps a value of the primitive type {@code - * long} in an object. An object of type {@code Long} contains a + * The {@code Long} class is the {@linkplain + * java.lang##wrapperClass wrapper class} for values of the primitive + * type {@code long}. An object of type {@code Long} contains a * single field whose type is {@code long}. * *

In addition, this class provides several methods for converting @@ -63,8 +64,9 @@ *

Implementation note: The implementations of the "bit twiddling" * methods (such as {@link #highestOneBit(long) highestOneBit} and * {@link #numberOfTrailingZeros(long) numberOfTrailingZeros}) are - * based on material from Henry S. Warren, Jr.'s Hacker's - * Delight, (Addison Wesley, 2002). + * based on material from Henry S. Warren, Jr.'s Hacker's + * Delight, (Addison Wesley, 2002) and Hacker's + * Delight, Second Edition, (Pearson Education, 2013). * * @author Lee Boynton * @author Arthur van Hoff @@ -1749,7 +1751,7 @@ public static long reverse(long i) { * compress(expand(x, m), m) == x & compress(m, m) * } *

- * The Sheep And Goats (SAG) operation (see Hacker's Delight, section 7.7) + * The Sheep And Goats (SAG) operation (see Hacker's Delight, Second Edition, section 7.7) * can be implemented as follows: * {@snippet lang="java" : * long compressLeft(long i, long mask) { diff --git a/src/java.base/share/classes/java/lang/Math.java b/src/java.base/share/classes/java/lang/Math.java index 044982a588af8..6b576c88b4710 100644 --- a/src/java.base/share/classes/java/lang/Math.java +++ b/src/java.base/share/classes/java/lang/Math.java @@ -2737,6 +2737,7 @@ public static double cosh(double x) { * @return The hyperbolic tangent of {@code x}. * @since 1.5 */ + @IntrinsicCandidate public static double tanh(double x) { return StrictMath.tanh(x); } diff --git a/src/java.base/share/classes/java/lang/Object.java b/src/java.base/share/classes/java/lang/Object.java index d9813df57a4f0..7909f05304268 100644 --- a/src/java.base/share/classes/java/lang/Object.java +++ b/src/java.base/share/classes/java/lang/Object.java @@ -109,7 +109,7 @@ public Object() {} /** * Indicates whether some other object is "equal to" this one. *

- * The {@code equals} method implements an equivalence relation + * The {@code equals} method implements an {@index "equivalence relation"} * on non-null object references: *