openjdk · zifeihan · Jun 11, 2024 · Jun 11, 2024 · Jun 12, 2024 · Jun 13, 2024
diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
@@ -2007,11 +2007,12 @@ instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
   ins_pipe(pipe_slow);
 %}
 
-instruct reduce_addF(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
+instruct reduce_addF_ordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
+  predicate(n->as_Reduction()->requires_strict_order());
   match(Set dst (AddReductionVF src1 src2));
   effect(TEMP tmp);
   ins_cost(VEC_COST);
-  format %{ "reduce_addF $dst, $src1, $src2\t# KILL $tmp" %}
+  format %{ "reduce_addF_ordered $dst, $src1, $src2\t# KILL $tmp" %}
   ins_encode %{
     __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2));
     __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
@@ -2022,11 +2023,28 @@ instruct reduce_addF(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
   ins_pipe(pipe_slow);
 %}
 
-instruct reduce_addD(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
+instruct reduce_addF_unordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
+  predicate(!n->as_Reduction()->requires_strict_order());
+  match(Set dst (AddReductionVF src1 src2));
+  effect(TEMP tmp);
+  ins_cost(VEC_COST);
+  format %{ "reduce_addF_unordered $dst, $src1, $src2\t# KILL $tmp" %}
+  ins_encode %{
+    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2));
+    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
+    __ vfredusum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+                    as_VectorRegister($tmp$$reg));
+    __ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addD_ordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
+  predicate(n->as_Reduction()->requires_strict_order());
   match(Set dst (AddReductionVD src1 src2));
   effect(TEMP tmp);
   ins_cost(VEC_COST);
-  format %{ "reduce_addD $dst, $src1, $src2\t# KILL $tmp" %}
+  format %{ "reduce_addD_ordered $dst, $src1, $src2\t# KILL $tmp" %}
   ins_encode %{
     __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2));
     __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
@@ -2037,6 +2055,22 @@ instruct reduce_addD(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
   ins_pipe(pipe_slow);
 %}
 
+instruct reduce_addD_unordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
+  predicate(!n->as_Reduction()->requires_strict_order());
+  match(Set dst (AddReductionVD src1 src2));
+  effect(TEMP tmp);
+  ins_cost(VEC_COST);
+  format %{ "reduce_addD_unordered $dst, $src1, $src2\t# KILL $tmp" %}
+  ins_encode %{
+    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2));
+    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
+    __ vfredusum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+                    as_VectorRegister($tmp$$reg));
+    __ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // vector add reduction - predicated
 
 instruct reduce_addI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{

diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java
@@ -56,6 +56,11 @@ public static void main(String[] args) {
         failOn = {"no_strict_order"},
         applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"},
         phase = CompilePhase.PRINT_IDEAL)
+    @IR(applyIfPlatform = {"riscv64", "true"},
+        applyIfCPUFeature = {"v", "true"},
+        counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VF, ">=1"},
+        failOn = {"no_strict_order"},
+        phase = CompilePhase.PRINT_IDEAL)
     private static void testAddReductionVF() {
         float result = 1;
         for (int i = 0; i < SIZE; i++) {
@@ -71,6 +76,11 @@ private static void testAddReductionVF() {
         failOn = {"no_strict_order"},
         applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"},
         phase = CompilePhase.PRINT_IDEAL)
+    @IR(applyIfPlatform = {"riscv64", "true"},
+        applyIfCPUFeature = {"v", "true"},
+        counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VD, ">=1"},
+        failOn = {"no_strict_order"},
+        phase = CompilePhase.PRINT_IDEAL)
     private static void testAddReductionVD() {
         double result = 1;
         for (int i = 0; i < SIZE; i++) {

diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java
@@ -81,6 +81,12 @@ public static void testFloatAddKernel(VectorSpecies SPECIES, float[] f) {
         applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
         applyIf = {"MaxVectorSize", ">=8"},
         phase = CompilePhase.PRINT_IDEAL)
+    @IR(applyIfPlatform = {"riscv64", "true"},
+        applyIfCPUFeature = {"v", "true"},
+        applyIf = {"MaxVectorSize", ">=8"},
+        counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        phase = CompilePhase.PRINT_IDEAL)
     public static void testFloatAdd_64() {
         testFloatAddKernel(FloatVector.SPECIES_64, fa);
     }
@@ -91,6 +97,12 @@ public static void testFloatAdd_64() {
         applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
         applyIf = {"MaxVectorSize", ">=16"},
         phase = CompilePhase.PRINT_IDEAL)
+    @IR(applyIfPlatform = {"riscv64", "true"},
+        applyIfCPUFeature = {"v", "true"},
+        applyIf = {"MaxVectorSize", ">=16"},
+        counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        phase = CompilePhase.PRINT_IDEAL)
     public static void testFloatAdd_128() {
         testFloatAddKernel(FloatVector.SPECIES_128, fa);
     }
@@ -101,6 +113,12 @@ public static void testFloatAdd_128() {
         applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
         applyIf = {"MaxVectorSize", ">=32"},
         phase = CompilePhase.PRINT_IDEAL)
+    @IR(applyIfPlatform = {"riscv64", "true"},
+        applyIfCPUFeature = {"v", "true"},
+        applyIf = {"MaxVectorSize", ">=32"},
+        counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        phase = CompilePhase.PRINT_IDEAL)
     public static void testFloatAdd_256() {
         testFloatAddKernel(FloatVector.SPECIES_256, fa);
     }
@@ -111,6 +129,12 @@ public static void testFloatAdd_256() {
         applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
         applyIf = {"MaxVectorSize", ">=64"},
         phase = CompilePhase.PRINT_IDEAL)
+    @IR(applyIfPlatform = {"riscv64", "true"},
+        applyIfCPUFeature = {"v", "true"},
+        applyIf = {"MaxVectorSize", ">=64"},
+        counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        phase = CompilePhase.PRINT_IDEAL)
     public static void testFloatAdd_512() {
         testFloatAddKernel(FloatVector.SPECIES_512, fa);
     }
@@ -130,6 +154,12 @@ public static void testDoubleAddKernel(VectorSpecies SPECIES, double[] d) {
         applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
         applyIf = {"MaxVectorSize", ">=16"},
         phase = CompilePhase.PRINT_IDEAL)
+    @IR(applyIfPlatform = {"riscv64", "true"},
+        applyIfCPUFeature = {"v", "true"},
+        applyIf = {"MaxVectorSize", ">=16"},
+        counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        phase = CompilePhase.PRINT_IDEAL)
     public static void testDoubleAdd_128() {
         testDoubleAddKernel(DoubleVector.SPECIES_128, da);
     }
@@ -140,6 +170,12 @@ public static void testDoubleAdd_128() {
         applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
         applyIf = {"MaxVectorSize", ">=32"},
         phase = CompilePhase.PRINT_IDEAL)
+    @IR(applyIfPlatform = {"riscv64", "true"},
+        applyIfCPUFeature = {"v", "true"},
+        applyIf = {"MaxVectorSize", ">=32"},
+        counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        phase = CompilePhase.PRINT_IDEAL)
     public static void testDoubleAdd_256() {
         testDoubleAddKernel(DoubleVector.SPECIES_256, da);
     }
@@ -150,6 +186,12 @@ public static void testDoubleAdd_256() {
         applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
         applyIf = {"MaxVectorSize", ">=64"},
         phase = CompilePhase.PRINT_IDEAL)
+    @IR(applyIfPlatform = {"riscv64", "true"},
+        applyIfCPUFeature = {"v", "true"},
+        applyIf = {"MaxVectorSize", ">=64"},
+        counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        phase = CompilePhase.PRINT_IDEAL)
     public static void testDoubleAdd_512() {
         testDoubleAddKernel(DoubleVector.SPECIES_512, da);
     }