Skip to content

Commit 90f733c

Browse files
committed
[LoopUnroll] Add tests for unrolling loops with reductions.
Add tests for unrolling loops with reductions. In some cases, multiple parallel reduction phis could be retained to improve performance.
1 parent 176ae32 commit 90f733c

File tree

3 files changed

+999
-0
lines changed

3 files changed

+999
-0
lines changed

llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,323 @@ loop.latch:
578578
exit:
579579
ret void
580580
}
581+
582+
define i32 @test_add_reduction_unroll_partial(ptr %a, i64 noundef %n) {
583+
; APPLE-LABEL: define i32 @test_add_reduction_unroll_partial(
584+
; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
585+
; APPLE-NEXT: [[ENTRY:.*]]:
586+
; APPLE-NEXT: br label %[[LOOP:.*]]
587+
; APPLE: [[LOOP]]:
588+
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
589+
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
590+
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
591+
; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
592+
; APPLE-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
593+
; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
594+
; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
595+
; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
596+
; APPLE: [[EXIT]]:
597+
; APPLE-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ]
598+
; APPLE-NEXT: ret i32 [[BIN_RDX2]]
599+
;
600+
; OTHER-LABEL: define i32 @test_add_reduction_unroll_partial(
601+
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
602+
; OTHER-NEXT: [[ENTRY:.*]]:
603+
; OTHER-NEXT: br label %[[LOOP:.*]]
604+
; OTHER: [[LOOP]]:
605+
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
606+
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
607+
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
608+
; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
609+
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP0]]
610+
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
611+
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
612+
; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
613+
; OTHER-NEXT: [[RDX_2:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]]
614+
; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
615+
; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
616+
; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2
617+
; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_2]], [[TMP2]]
618+
; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
619+
; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
620+
; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2
621+
; OTHER-NEXT: [[RDX_NEXT_3]] = add nuw nsw i32 [[RDX_NEXT_2]], [[TMP3]]
622+
; OTHER-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
623+
; OTHER-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024
624+
; OTHER-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
625+
; OTHER: [[EXIT]]:
626+
; OTHER-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
627+
; OTHER-NEXT: ret i32 [[BIN_RDX2]]
628+
;
629+
entry:
630+
br label %loop
631+
632+
loop:
633+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
634+
%rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ]
635+
%gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv
636+
%1 = load i32, ptr %gep.a, align 2
637+
%rdx.next = add nuw nsw i32 %rdx, %1
638+
%iv.next = add nuw nsw i64 %iv, 1
639+
%ec = icmp eq i64 %iv.next, 1024
640+
br i1 %ec, label %exit, label %loop
641+
642+
exit:
643+
%res = phi i32 [ %rdx.next, %loop ]
644+
ret i32 %res
645+
}
646+
647+
declare i1 @cond()
648+
649+
define i32 @test_add_reduction_multi_block(ptr %a, i64 noundef %n) {
650+
; APPLE-LABEL: define i32 @test_add_reduction_multi_block(
651+
; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
652+
; APPLE-NEXT: [[ENTRY:.*]]:
653+
; APPLE-NEXT: br label %[[LOOP:.*]]
654+
; APPLE: [[LOOP]]:
655+
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
656+
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP_LATCH]] ]
657+
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
658+
; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
659+
; APPLE-NEXT: [[C:%.*]] = call i1 @cond()
660+
; APPLE-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
661+
; APPLE: [[THEN]]:
662+
; APPLE-NEXT: store i32 0, ptr [[GEP_A]], align 4
663+
; APPLE-NEXT: br label %[[LOOP_LATCH]]
664+
; APPLE: [[LOOP_LATCH]]:
665+
; APPLE-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
666+
; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
667+
; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
668+
; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
669+
; APPLE: [[EXIT]]:
670+
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP_LATCH]] ]
671+
; APPLE-NEXT: ret i32 [[RES]]
672+
;
673+
; OTHER-LABEL: define i32 @test_add_reduction_multi_block(
674+
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
675+
; OTHER-NEXT: [[ENTRY:.*]]:
676+
; OTHER-NEXT: br label %[[LOOP:.*]]
677+
; OTHER: [[LOOP]]:
678+
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
679+
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP_LATCH]] ]
680+
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
681+
; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
682+
; OTHER-NEXT: [[C:%.*]] = call i1 @cond()
683+
; OTHER-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
684+
; OTHER: [[THEN]]:
685+
; OTHER-NEXT: store i32 0, ptr [[GEP_A]], align 4
686+
; OTHER-NEXT: br label %[[LOOP_LATCH]]
687+
; OTHER: [[LOOP_LATCH]]:
688+
; OTHER-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
689+
; OTHER-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
690+
; OTHER-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
691+
; OTHER-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
692+
; OTHER: [[EXIT]]:
693+
; OTHER-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP_LATCH]] ]
694+
; OTHER-NEXT: ret i32 [[RES]]
695+
;
696+
entry:
697+
br label %loop
698+
699+
loop:
700+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
701+
%rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop.latch ]
702+
%gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv
703+
%1 = load i32, ptr %gep.a, align 2
704+
%c = call i1 @cond()
705+
br i1 %c, label %then, label %loop.latch
706+
707+
then:
708+
store i32 0, ptr %gep.a
709+
br label %loop.latch
710+
711+
loop.latch:
712+
%rdx.next = add nuw nsw i32 %rdx, %1
713+
%iv.next = add nuw nsw i64 %iv, 1
714+
%ec = icmp eq i64 %iv.next, 1024
715+
br i1 %ec, label %exit, label %loop
716+
717+
exit:
718+
%res = phi i32 [ %rdx.next, %loop.latch ]
719+
ret i32 %res
720+
}
721+
722+
define i32 @test_add_and_mul_reduction_unroll_partial(ptr %a, i64 noundef %n) {
723+
; APPLE-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial(
724+
; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
725+
; APPLE-NEXT: [[ENTRY:.*]]:
726+
; APPLE-NEXT: br label %[[LOOP:.*]]
727+
; APPLE: [[LOOP]]:
728+
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
729+
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
730+
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT:%.*]], %[[LOOP]] ]
731+
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
732+
; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
733+
; APPLE-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
734+
; APPLE-NEXT: [[RDX_2_NEXT]] = mul i32 [[RDX_2]], [[TMP0]]
735+
; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
736+
; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
737+
; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
738+
; APPLE: [[EXIT]]:
739+
; APPLE-NEXT: [[BIN_RDX3:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ]
740+
; APPLE-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT]], %[[LOOP]] ]
741+
; APPLE-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX3]], [[RES_2]]
742+
; APPLE-NEXT: ret i32 [[SUM]]
743+
;
744+
; OTHER-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial(
745+
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
746+
; OTHER-NEXT: [[ENTRY:.*]]:
747+
; OTHER-NEXT: br label %[[LOOP:.*]]
748+
; OTHER: [[LOOP]]:
749+
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
750+
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
751+
; OTHER-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT_1:%.*]], %[[LOOP]] ]
752+
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
753+
; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
754+
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP0]]
755+
; OTHER-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[TMP0]]
756+
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
757+
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
758+
; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
759+
; OTHER-NEXT: [[RDX_NEXT_1]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]]
760+
; OTHER-NEXT: [[RDX_2_NEXT_1]] = mul i32 [[RDX_2_NEXT]], [[TMP1]]
761+
; OTHER-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
762+
; OTHER-NEXT: [[EC_1:%.*]] = icmp eq i64 [[IV_NEXT_1]], 1024
763+
; OTHER-NEXT: br i1 [[EC_1]], label %[[EXIT:.*]], label %[[LOOP]]
764+
; OTHER: [[EXIT]]:
765+
; OTHER-NEXT: [[BIN_RDX:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
766+
; OTHER-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT_1]], %[[LOOP]] ]
767+
; OTHER-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX]], [[RES_2]]
768+
; OTHER-NEXT: ret i32 [[SUM]]
769+
;
770+
entry:
771+
br label %loop
772+
773+
loop:
774+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
775+
%rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ]
776+
%rdx.2 = phi i32 [ 0, %entry ], [ %rdx.2.next, %loop ]
777+
%gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv
778+
%1 = load i32, ptr %gep.a, align 2
779+
%rdx.next = add nuw nsw i32 %rdx, %1
780+
%rdx.2.next = mul i32 %rdx.2, %1
781+
%iv.next = add nuw nsw i64 %iv, 1
782+
%ec = icmp eq i64 %iv.next, 1024
783+
br i1 %ec, label %exit, label %loop
784+
785+
exit:
786+
%res.1 = phi i32 [ %rdx.next, %loop ]
787+
%res.2 = phi i32 [ %rdx.2.next, %loop ]
788+
%sum = add i32 %res.1, %res.2
789+
ret i32 %sum
790+
}
791+
792+
793+
define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
794+
; APPLE-LABEL: define i32 @test_add_reduction_runtime(
795+
; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
796+
; APPLE-NEXT: [[ENTRY:.*]]:
797+
; APPLE-NEXT: br label %[[LOOP:.*]]
798+
; APPLE: [[LOOP]]:
799+
; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP]] ]
800+
; APPLE-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP]] ]
801+
; APPLE-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]]
802+
; APPLE-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2
803+
; APPLE-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP6]]
804+
; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1
805+
; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]]
806+
; APPLE-NEXT: br i1 [[EC_EPIL]], label %[[EXIT:.*]], label %[[LOOP]]
807+
; APPLE: [[EXIT]]:
808+
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP]] ]
809+
; APPLE-NEXT: ret i32 [[RES]]
810+
;
811+
; OTHER-LABEL: define i32 @test_add_reduction_runtime(
812+
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
813+
; OTHER-NEXT: [[ENTRY:.*]]:
814+
; OTHER-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
815+
; OTHER-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 3
816+
; OTHER-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3
817+
; OTHER-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
818+
; OTHER: [[ENTRY_NEW]]:
819+
; OTHER-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
820+
; OTHER-NEXT: br label %[[LOOP:.*]]
821+
; OTHER: [[LOOP]]:
822+
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
823+
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
824+
; OTHER-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[LOOP]] ]
825+
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
826+
; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A]], align 2
827+
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP2]]
828+
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
829+
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
830+
; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2
831+
; OTHER-NEXT: [[RDX_2:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP3]]
832+
; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
833+
; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
834+
; OTHER-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_2]], align 2
835+
; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_2]], [[TMP4]]
836+
; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
837+
; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
838+
; OTHER-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP_A_3]], align 2
839+
; OTHER-NEXT: [[RDX_NEXT_3]] = add nuw nsw i32 [[RDX_NEXT_2]], [[TMP5]]
840+
; OTHER-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
841+
; OTHER-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4
842+
; OTHER-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
843+
; OTHER-NEXT: br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
844+
; OTHER: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
845+
; OTHER-NEXT: [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
846+
; OTHER-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ]
847+
; OTHER-NEXT: [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
848+
; OTHER-NEXT: br label %[[EXIT_UNR_LCSSA]]
849+
; OTHER: [[EXIT_UNR_LCSSA]]:
850+
; OTHER-NEXT: [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
851+
; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
852+
; OTHER-NEXT: [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
853+
; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
854+
; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
855+
; OTHER: [[LOOP_EPIL_PREHEADER]]:
856+
; OTHER-NEXT: br label %[[LOOP_EPIL:.*]]
857+
; OTHER: [[LOOP_EPIL]]:
858+
; OTHER-NEXT: [[IV_EPIL:%.*]] = phi i64 [ [[IV_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
859+
; OTHER-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
860+
; OTHER-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ]
861+
; OTHER-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]]
862+
; OTHER-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2
863+
; OTHER-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP6]]
864+
; OTHER-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1
865+
; OTHER-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]]
866+
; OTHER-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
867+
; OTHER-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
868+
; OTHER-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
869+
; OTHER: [[EXIT_EPILOG_LCSSA]]:
870+
; OTHER-NEXT: [[RES_PH1:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
871+
; OTHER-NEXT: br label %[[EXIT]]
872+
; OTHER: [[EXIT]]:
873+
; OTHER-NEXT: [[RES:%.*]] = phi i32 [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ]
874+
; OTHER-NEXT: ret i32 [[RES]]
875+
;
876+
entry:
877+
br label %loop
878+
879+
loop:
880+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
881+
%rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ]
882+
%gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv
883+
%1 = load i32, ptr %gep.a, align 2
884+
%rdx.next = add nuw nsw i32 %rdx, %1
885+
%iv.next = add nuw nsw i64 %iv, 1
886+
%ec = icmp eq i64 %iv.next, %n
887+
br i1 %ec, label %exit, label %loop
888+
889+
exit:
890+
%res = phi i32 [ %rdx.next, %loop ]
891+
ret i32 %res
892+
}
581893
;.
582894
; APPLE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
583895
; APPLE: [[META1]] = !{!"llvm.loop.unroll.disable"}
584896
; APPLE: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
585897
;.
898+
; OTHER: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
899+
; OTHER: [[META1]] = !{!"llvm.loop.unroll.disable"}
900+
;.

0 commit comments

Comments
 (0)