diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad index 4a71da9f20c65..841e8cb260cb8 100644 --- a/src/hotspot/cpu/riscv/riscv_v.ad +++ b/src/hotspot/cpu/riscv/riscv_v.ad @@ -2007,11 +2007,20 @@ instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ ins_pipe(pipe_slow); %} -instruct reduce_addF(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{ +// Distinguish two cases based on requires_strict_order +// 1. Non strictly-ordered AddReductionVF/D. For example, AddReductionVF/D +// generated by Vector API. It is more beneficial performance-wise to do +// an unordered FP reduction sum (vfredusum.vs). +// 2. Strictly-ordered AddReductionVF/D. For example, AddReductionVF/D +// generated by auto-vectorization. Must do an ordered FP reduction sum +// (vfredosum.vs). + +instruct reduce_addF_ordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{ + predicate(n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVF src1 src2)); effect(TEMP tmp); ins_cost(VEC_COST); - format %{ "reduce_addF $dst, $src1, $src2\t# KILL $tmp" %} + format %{ "reduce_addF_ordered $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2)); __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister); @@ -2022,11 +2031,28 @@ instruct reduce_addF(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{ ins_pipe(pipe_slow); %} -instruct reduce_addD(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{ +instruct reduce_addF_unordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{ + predicate(!n->as_Reduction()->requires_strict_order()); + match(Set dst (AddReductionVF src1 src2)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_addF_unordered $dst, $src1, $src2\t# KILL $tmp" %} + ins_encode %{ + __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2)); + __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister); + __ vfredusum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp$$reg)); + __ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addD_ordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{ + predicate(n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVD src1 src2)); effect(TEMP tmp); ins_cost(VEC_COST); - format %{ "reduce_addD $dst, $src1, $src2\t# KILL $tmp" %} + format %{ "reduce_addD_ordered $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2)); __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister); @@ -2037,6 +2063,22 @@ instruct reduce_addD(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{ ins_pipe(pipe_slow); %} +instruct reduce_addD_unordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{ + predicate(!n->as_Reduction()->requires_strict_order()); + match(Set dst (AddReductionVD src1 src2)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_addD_unordered $dst, $src1, $src2\t# KILL $tmp" %} + ins_encode %{ + __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2)); + __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister); + __ vfredusum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp$$reg)); + __ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + // vector add reduction - predicated instruct reduce_addI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java index 327e6e5e12de0..b328d4135ecfe 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java @@ -54,7 +54,7 @@ public static void main(String[] args) { applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) @IR(counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VF, ">=1"}, failOn = {"no_strict_order"}, - applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"}, + applyIfCPUFeatureOr = {"sve", "true", "sse2", "true", "rvv", "true"}, phase = CompilePhase.PRINT_IDEAL) private static void testAddReductionVF() { float result = 1; @@ -69,7 +69,7 @@ private static void testAddReductionVF() { applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) @IR(counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VD, ">=1"}, failOn = {"no_strict_order"}, - applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"}, + applyIfCPUFeatureOr = {"sve", "true", "sse2", "true", "rvv", "true"}, phase = CompilePhase.PRINT_IDEAL) private static void testAddReductionVD() { double result = 1; diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java index 549d9aa5d4946..38a2753a7eda4 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java +++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java @@ -78,7 +78,7 @@ public static void testFloatAddKernel(VectorSpecies SPECIES, float[] f) { @Test @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"}, failOn = {"requires_strict_order"}, - applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"}, applyIf = {"MaxVectorSize", ">=8"}, phase = CompilePhase.PRINT_IDEAL) public static void testFloatAdd_64() { @@ -88,7 +88,7 @@ public static void testFloatAdd_64() { @Test @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"}, failOn = {"requires_strict_order"}, - applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"}, applyIf = {"MaxVectorSize", ">=16"}, phase = CompilePhase.PRINT_IDEAL) public static void testFloatAdd_128() { @@ -98,7 +98,7 @@ public static void testFloatAdd_128() { @Test @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"}, failOn = {"requires_strict_order"}, - applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"}, applyIf = {"MaxVectorSize", ">=32"}, phase = CompilePhase.PRINT_IDEAL) public static void testFloatAdd_256() { @@ -108,7 +108,7 @@ public static void testFloatAdd_256() { @Test @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"}, failOn = {"requires_strict_order"}, - applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"}, applyIf = {"MaxVectorSize", ">=64"}, phase = CompilePhase.PRINT_IDEAL) public static void testFloatAdd_512() { @@ -127,7 +127,7 @@ public static void testDoubleAddKernel(VectorSpecies SPECIES, double[] d) { @Test @IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"}, failOn = {"requires_strict_order"}, - applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"}, applyIf = {"MaxVectorSize", ">=16"}, phase = CompilePhase.PRINT_IDEAL) public static void testDoubleAdd_128() { @@ -137,7 +137,7 @@ public static void testDoubleAdd_128() { @Test @IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"}, failOn = {"requires_strict_order"}, - applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"}, applyIf = {"MaxVectorSize", ">=32"}, phase = CompilePhase.PRINT_IDEAL) public static void testDoubleAdd_256() { @@ -147,7 +147,7 @@ public static void testDoubleAdd_256() { @Test @IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"}, failOn = {"requires_strict_order"}, - applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"}, applyIf = {"MaxVectorSize", ">=64"}, phase = CompilePhase.PRINT_IDEAL) public static void testDoubleAdd_512() {