-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[LV][NFC] Tidy up partial reduction tests with filter-out-after option #129047
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-testing-tools @llvm/pr-subscribers-llvm-transforms Author: David Sherwood (david-arm) ChangesA few test files seemed to have been edited after using the ; NOTE: Assertions have been autogenerated by ... which could potentially be confusing. Patch is 61.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129047.diff 3 Files Affected:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
index 4e4a5c82c298a..092938866c65b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
@@ -48,6 +48,32 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-NEON-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE3]])
; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-NEON-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-NEON: scalar.ph:
+; CHECK-NEON-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEON-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEON-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-NEON: for.cond.cleanup:
+; CHECK-NEON-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEON-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-NEON: for.body:
+; CHECK-NEON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUB]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-NEON-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-NEON-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-NEON-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-NEON-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-NEON-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-NEON-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEON-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-NEON-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-NEON-NEXT: [[SUB]] = sub i32 [[ADD]], [[MUL_AC]]
+; CHECK-NEON-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEON-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEON-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]], !loop [[META4:![0-9]+]]
;
; CHECK-SVE-LABEL: define i32 @chained_partial_reduce_add_sub(
; CHECK-SVE-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -94,6 +120,32 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]])
; CHECK-SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-SVE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-SVE: scalar.ph:
+; CHECK-SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-SVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-SVE-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-SVE: for.cond.cleanup:
+; CHECK-SVE-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
+; CHECK-SVE-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-SVE: for.body:
+; CHECK-SVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUB]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-SVE-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-SVE-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-SVE-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-SVE-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-SVE-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-SVE-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-SVE-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-SVE-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-SVE-NEXT: [[SUB]] = sub i32 [[ADD]], [[MUL_AC]]
+; CHECK-SVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-SVE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-SVE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]], !loop [[META4:![0-9]+]]
;
; CHECK-SVE-MAXBW-LABEL: define i32 @chained_partial_reduce_add_sub(
; CHECK-SVE-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -141,6 +193,32 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE3]])
; CHECK-SVE-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-SVE-MAXBW: scalar.ph:
+; CHECK-SVE-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-SVE-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-SVE-MAXBW-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-SVE-MAXBW: for.cond.cleanup:
+; CHECK-SVE-MAXBW-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
+; CHECK-SVE-MAXBW-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-SVE-MAXBW: for.body:
+; CHECK-SVE-MAXBW-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-SVE-MAXBW-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUB]], [[FOR_BODY]] ]
+; CHECK-SVE-MAXBW-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-SVE-MAXBW-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-SVE-MAXBW-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-SVE-MAXBW-NEXT: [[SUB]] = sub i32 [[ADD]], [[MUL_AC]]
+; CHECK-SVE-MAXBW-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-SVE-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-SVE-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]], !loop [[META4:![0-9]+]]
;
entry:
%cmp28.not = icmp ult i32 %N, 2
@@ -213,6 +291,32 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-NEON-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE3]])
; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-NEON-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-NEON: scalar.ph:
+; CHECK-NEON-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEON-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEON-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-NEON: for.cond.cleanup:
+; CHECK-NEON-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[ADD_2:%.*]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEON-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-NEON: for.body:
+; CHECK-NEON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_2]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-NEON-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-NEON-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-NEON-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-NEON-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-NEON-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-NEON-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEON-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-NEON-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-NEON-NEXT: [[ADD_2]] = add i32 [[ADD]], [[MUL_AC]]
+; CHECK-NEON-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEON-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEON-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]], !loop [[META4]]
;
; CHECK-SVE-LABEL: define i32 @chained_partial_reduce_add_add(
; CHECK-SVE-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
@@ -259,6 +363,32 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]])
; CHECK-SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-SVE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-SVE: scalar.ph:
+; CHECK-SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-SVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-SVE-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-SVE: for.cond.cleanup:
+; CHECK-SVE-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[ADD_2:%.*]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
+; CHECK-SVE-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-SVE: for.body:
+; CHECK-SVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_2]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-SVE-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-SVE-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-SVE-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-SVE-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-SVE-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-SVE-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-SVE-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-SVE-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-SVE-NEXT: [[ADD_2]] = add i32 [[ADD]], [[MUL_AC]]
+; CHECK-SVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-SVE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-SVE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]], !loop [[META4]]
;
; CHECK-SVE-MAXBW-LABEL: define i32 @chained_partial_reduce_add_add(
; CHECK-SVE-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
@@ -305,6 +435,32 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE3]])
; CHECK-SVE-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-SVE-MAXBW: scalar.ph:
+; CHECK-SVE-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-SVE-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-SVE-MAXBW-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-SVE-MAXBW: for.cond.cleanup:
+; CHECK-SVE-MAXBW-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[ADD_2:%.*]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ]
+; CHECK-SVE-MAXBW-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-SVE-MAXBW: for.body:
+; CHECK-SVE-MAXBW-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-SVE-MAXBW-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_2]], [[FOR_BODY]] ]
+; CHECK-SVE-MAXBW-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-SVE-MAXBW-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-SVE-MAXBW-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-SVE-MAXBW-NEXT: [[ADD_2]] = add i32 [[ADD]], [[MUL_AC]]
+; CHECK-SVE-MAXBW-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-SVE-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-SVE-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]], !loop [[META4]]
;
entry:
%cmp28.not = icmp ult i32 %N, 2
@@ -378,6 +534,32 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-NEON-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE3]])
; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-NEON-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-NEON: scalar.ph:
+; CHECK-NEON-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEON-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEON-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-NEON: for.cond.cleanup:
+; CHECK-NEON-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEON-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-NEON: for.body:
+; CHECK-NEON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-NEON-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-NEON-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-NEON-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-NEON-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-NEON-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-NEON-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEON-NEXT: [[SUB:%.*]] = sub nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-NEON-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-NEON-NEXT: [[ADD]] = add i32 [[SUB]], [[MUL_AC]]
+; CHECK-NEON-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEON-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEON-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]], !loop [[META4]]
;
; CHECK-SVE-LABEL: define i32 @chained_partial_reduce_sub_add(
; CHECK-SVE-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
@@ -424,6 +606,32 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]])
; CHECK-SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-SVE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-SVE: scalar.ph:
+; CHECK-SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-SVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-SVE-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-SVE: for.cond.cleanup:
+; CHECK-SVE-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
+; CHECK-SVE-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-SVE: for.body:
+; CHECK-SVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-SVE-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-SVE-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-SVE-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-SVE-NEXT: [[B_EXT:%.*]] = sext i8 [[B...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@@ -1035,3 +1515,55 @@ attributes #0 = { vscale_range(1,16) } | |||
|
|||
!0 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} | |||
!1 = distinct !{!0} | |||
;. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Those are not really interesting. Could you pass —check-globals=none?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the extra work to not stage these changes in git (git add -p .
is great for this) is worth not testing code blocks that we don't actually care about.
I guess it's just every time the IR changes innocuously due to some vplan transformation or whatever, the tests need to be regenerated and then the developer has to go back and delete the scalar loop for every test, and the more tests we add the more painful this becomes. I think if we don't want auto-generated tests then we should remove the If there is a simple command-line tool or script that can be run for maintaining these tests it's worth documenting that at the top of the test, since the "NOTE..." line suggests you just run the update_test_checks script. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the extra work to not stage these changes in git (
git add -p .
is great for this) is worth not testing code blocks that we don't actually care about.I guess it's just every time the IR changes innocuously due to some vplan transformation or whatever, the tests need to be regenerated and then the developer has to go back and delete the scalar loop for every test, and the more tests we add the more painful this becomes. I think if we don't want auto-generated tests then we should remove the
; NOTE
line from the top of the tests because they aren't autogenerated - they've been manually edited. @fhahn any thoughts?If there is a simple command-line tool or script that can be run for maintaining these tests it's worth documenting that at the top of the test, since the "NOTE..." line suggests you just run the update_test_checks script.
If they are manually edited, we should drop the NOTE. In general, IMO having more targeted checks instead of automatically checking everything makes it easier to for the reviewers, as there generally are less changes/noise in the diff, while it makes it more difficult to update.
The trend overall in LLVM seems to be going more towards autogenerating everything and in the past we accepted a number of changes to go from manual check lines to auto-generated ones, especially when the manual check lines weren't comprehensive.
It's not necessarily ideal in all cases for LV tests, as we end up checking a lot of 'uninteresting' things, and end up with much larger check files, so I think it is great when people try to go with more targeted checks. What may be helpful would be an extension to update_test_checks.py
that allows filtering out some basic blocks; that way we could still auto-generate, but filter out the scalar loop.
Yeah that's a good point. My main concern is the cost of maintaining tests have non-trivial IR that change due to a VPlan transformation, optimisation, difference in naming scheme, etc. It's imagine nobody wants to manually update 20 tests every time for something trivial! I'm also happy to look into the idea of changing update_test_checks.py to filter all CHECK lines from the start of a particular block. |
I'll create a new PR soon to add new functionality to update_test_checks.py for stopping CHECK line generation from a particular line onwards! I have a downstream patch working. |
61a4f59
to
9c05681
Compare
✅ With the latest revision this PR passed the Python code formatter. |
@@ -74,13 +74,17 @@ class Filter(Regex): | |||
|
|||
""" | |||
|
|||
def __init__(self, regex, is_filter_out): | |||
def __init__(self, regex, is_filter_out, is_filter_all_after): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's probably best to create a new, separate review for the change to the script, with a dedicated test for the script.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, can do. I just thought I'd put the whole thing here first for context and to help reviewers understand the typical use case. I'll spin out the python changes in a new PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've put up #129739
Whilst trying to clean up some loop vectoriser IR tests (see test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll for example) a reviewer on PR llvm#129047 suggested it would be nice to have an option to stop generating CHECK lines after a certain point. Typically when performing a transformation with the loop vectoriser we don't usually care about any CHECK lines generated for the scalar tail of the loop, since the scalar loop is kept intact. Previously if you wanted to eliminate such unwanted CHECK lines you had to run the update script, then manually delete all the lines corresponding to the scalar loop. This can be very time consuming if the tests ever need changing. What I've tried to do here is add a new --filter-all-after option alongside the existing --filter* options that provides support for stopping the generation of any CHECK lines beyond the line that matches the filter. With the existing filter options we never generate CHECK-NEXT lines, but we still care about ordering with --filter-all-after so I've amended the code to ensure we treat this filter differently.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks great to me, thanks!
Whilst trying to clean up some loop vectoriser IR tests (see test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll for example) a reviewer on PR llvm#129047 suggested it would be nice to have an option to stop generating CHECK lines after a certain point. Typically when performing a transformation with the loop vectoriser we don't usually care about any CHECK lines generated for the scalar tail of the loop, since the scalar loop is kept intact. Previously if you wanted to eliminate such unwanted CHECK lines you had to run the update script, then manually delete all the lines corresponding to the scalar loop. This can be very time consuming if the tests ever need changing. What I've tried to do here is add a new --filter-out-after option alongside the existing --filter* options that provides support for stopping the generation of any CHECK lines beyond the line that matches the filter. With the existing filter options we never generate CHECK-NEXT lines, but we still care about ordering with --filter-out-after so I've amended the code to ensure we treat this filter differently.
Whilst trying to clean up some loop vectoriser IR tests (see test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll for example) a reviewer on PR #129047 suggested it would be nice to have an option to stop generating CHECK lines after a certain point. Typically when performing a transformation with the loop vectoriser we don't usually care about any CHECK lines generated for the scalar tail of the loop, since the scalar loop is kept intact. Previously if you wanted to eliminate such unwanted CHECK lines you had to run the update script, then manually delete all the lines corresponding to the scalar loop. This can be very time consuming if the tests ever need changing. What I've tried to do here is add a new --filter-out-after option alongside the existing --filter* options that provides support for stopping the generation of any CHECK lines beyond the line that matches the filter. With the existing filter options we never generate CHECK-NEXT lines, but we still care about ordering with --filter-out-after so I've amended the code to ensure we treat this filter differently.
A few test files seemed to have been edited after using the update_test_checks.py script, which can make life hard for developers when trying to update these tests in future patches. Also, the tests still had this comment at the top ; NOTE: Assertions have been autogenerated by ... which could potentially be confusing, since they've not strictly been auto-generated. I've attempted to keep the spirit of the original tests by excluding all CHECK lines after the scalar.ph IR block, however I've done this by using a new option called --filter-out-after to the update_test_checks.py script.
9c05681
to
ceee2fd
Compare
Rebased after landing #129739. This PR is now a test-only change! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for doing that :)
A few test files seemed to have been edited after using the
update_test_checks.py script, which can make life hard for
developers when trying to update these tests in future
patches. Also, the tests still had this comment at the top
; NOTE: Assertions have been autogenerated by ...
which could potentially be confusing, since they've not
strictly been auto-generated.
I've attempted to keep the spirit of the original tests by
excluding all CHECK lines after the scalar.ph IR block,
however I've done this by using a new option called
--filter-out-after to the update_test_checks.py script.