|
| 1 | +; RUN: opt < %s %loadEnzyme -enzyme -enzyme_preopt=false -inline -mem2reg -correlated-propagation -instsimplify -adce -loop-deletion -simplifycfg -S | FileCheck %s |
| 2 | + |
| 3 | +; Function Attrs: noinline norecurse nounwind uwtable |
| 4 | +define dso_local void @insertion_sort_inner(float* nocapture %array, i32 %i) local_unnamed_addr #0 { |
| 5 | +entry: |
| 6 | + %cmp29 = icmp sgt i32 %i, 0 |
| 7 | + br i1 %cmp29, label %land.rhs.preheader, label %while.end |
| 8 | + |
| 9 | +land.rhs.preheader: ; preds = %entry |
| 10 | + %0 = sext i32 %i to i64 |
| 11 | + br label %land.rhs |
| 12 | + |
| 13 | +land.rhs: ; preds = %land.rhs.preheader, %while.body |
| 14 | + %indvars.iv = phi i64 [ %0, %land.rhs.preheader ], [ %indvars.iv.next, %while.body ] |
| 15 | + %indvars.iv.next = add nsw i64 %indvars.iv, -1 |
| 16 | + %arrayidx = getelementptr inbounds float, float* %array, i64 %indvars.iv.next |
| 17 | + %1 = load float, float* %arrayidx, align 4 |
| 18 | + %arrayidx2 = getelementptr inbounds float, float* %array, i64 %indvars.iv |
| 19 | + %2 = load float, float* %arrayidx2, align 4 |
| 20 | + %cmp3 = fcmp ogt float %1, %2 |
| 21 | + br i1 %cmp3, label %while.body, label %while.end |
| 22 | + |
| 23 | +while.body: ; preds = %land.rhs |
| 24 | + store float %1, float* %arrayidx2, align 4 |
| 25 | + store float %2, float* %arrayidx, align 4 |
| 26 | + %cmp = icmp sgt i64 %indvars.iv, 1 |
| 27 | + br i1 %cmp, label %land.rhs, label %while.end |
| 28 | + |
| 29 | +while.end: ; preds = %land.rhs, %while.body, %entry |
| 30 | + ret void |
| 31 | +} |
| 32 | + |
| 33 | + |
| 34 | +define dso_local void @dsum(float* %x, float* %xp, i32 %n) { |
| 35 | +entry: |
| 36 | + %0 = tail call double (void (float*, i32)*, ...) @__enzyme_autodiff(void (float*, i32)* nonnull @insertion_sort_inner, float* %x, float* %xp, i32 %n) |
| 37 | + ret void |
| 38 | +} |
| 39 | + |
| 40 | +declare double @__enzyme_autodiff(void (float*, i32)*, ...) |
| 41 | + |
| 42 | +attributes #0 = { noinline norecurse nounwind uwtable } |
| 43 | + |
| 44 | +; CHECK: define internal {} @diffeinsertion_sort_inner(float* nocapture %array, float* %"array'", i32 %i) local_unnamed_addr #0 { |
| 45 | +; CHECK-NEXT: entry: |
| 46 | +; CHECK-NEXT: %cmp29 = icmp sgt i32 %i, 0 |
| 47 | +; CHECK-NEXT: br i1 %cmp29, label %land.rhs.preheader, label %while.end |
| 48 | + |
| 49 | +; CHECK: land.rhs.preheader: ; preds = %entry |
| 50 | +; CHECK-NEXT: %0 = sext i32 %i to i64 |
| 51 | +; CHECK-NEXT: br label %land.rhs |
| 52 | + |
| 53 | +; CHECK-NEXT: land.rhs: ; preds = %while.body, %land.rhs.preheader |
| 54 | +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %while.body ], [ 0, %land.rhs.preheader ] |
| 55 | +; CHECK-NEXT: %1 = mul i64 %iv, -1 |
| 56 | +; CHECK-NEXT: %2 = add i64 %0, %1 |
| 57 | +; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1 |
| 58 | +; CHECK-NEXT: %indvars.iv.next = add nsw i64 %2, -1 |
| 59 | +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %array, i64 %indvars.iv.next |
| 60 | +; CHECK-NEXT: %3 = load float, float* %arrayidx, align 4 |
| 61 | +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, float* %array, i64 %2 |
| 62 | +; CHECK-NEXT: %4 = load float, float* %arrayidx2, align 4 |
| 63 | +; CHECK-NEXT: %cmp3 = fcmp ogt float %3, %4 |
| 64 | +; CHECK-NEXT: br i1 %cmp3, label %while.body, label %while.end.loopexit |
| 65 | + |
| 66 | +; CHECK: while.body: ; preds = %land.rhs |
| 67 | +; CHECK-NEXT: store float %3, float* %arrayidx2, align 4 |
| 68 | +; CHECK-NEXT: store float %4, float* %arrayidx, align 4 |
| 69 | +; CHECK-NEXT: %cmp = icmp sgt i64 %2, 1 |
| 70 | +; CHECK-NEXT: br i1 %cmp, label %land.rhs, label %while.end.loopexit |
| 71 | + |
| 72 | +; CHECK: while.end.loopexit: ; preds = %while.body, %land.rhs |
| 73 | +; CHECK-NEXT: %"cmp3!manual_lcssa" = phi i1 [ %cmp3, %while.body ], [ %cmp3, %land.rhs ] |
| 74 | +; CHECK-NEXT: %5 = phi i8 [ 0, %while.body ], [ 1, %land.rhs ] |
| 75 | +; CHECK-NEXT: %6 = phi i64 [ %iv, %while.body ], [ %iv, %land.rhs ] |
| 76 | +; CHECK-NEXT: br label %while.end |
| 77 | + |
| 78 | +; CHECK: while.end: ; preds = %while.end.loopexit, %entry |
| 79 | +; CHECK-NEXT: %"cmp3!manual_lcssa_cache.0" = phi i1 [ %"cmp3!manual_lcssa", %while.end.loopexit ], [ undef, %entry ] |
| 80 | +; CHECK-NEXT: %_cache1.0 = phi i8 [ %5, %while.end.loopexit ], [ undef, %entry ] |
| 81 | +; CHECK-NEXT: %_cache.0 = phi i64 [ %6, %while.end.loopexit ], [ undef, %entry ] |
| 82 | +; CHECK-NEXT: br label %invertwhile.end |
| 83 | + |
| 84 | +; CHECK: invertentry: ; preds = %invertwhile.end, %invertland.rhs.preheader |
| 85 | +; CHECK-NEXT: ret {} undef |
| 86 | + |
| 87 | +; CHECK: invertland.rhs.preheader: ; preds = %invertland.rhs |
| 88 | +; CHECK-NEXT: br label %invertentry |
| 89 | + |
| 90 | +; CHECK: invertland.rhs: ; preds = %invertwhile.body, %loopMerge |
| 91 | +; CHECK-NEXT: %"'de2.0" = phi float [ 0.000000e+00, %loopMerge ], [ %30, %invertwhile.body ] |
| 92 | +; CHECK-NEXT: %"'de.0" = phi float [ 0.000000e+00, %loopMerge ], [ %24, %invertwhile.body ] |
| 93 | +; CHECK-NEXT: %_unwrap = sext i32 %i to i64 |
| 94 | +; CHECK-NEXT: %7 = mul i64 %"iv'phi", -1 |
| 95 | +; CHECK-NEXT: %8 = add i64 %_unwrap, %7 |
| 96 | +; CHECK-NEXT: %"arrayidx2'ipg" = getelementptr float, float* %"array'", i64 %8 |
| 97 | +; CHECK-NEXT: %9 = load float, float* %"arrayidx2'ipg" |
| 98 | +; CHECK-NEXT: %10 = fadd fast float %9, %"'de.0" |
| 99 | +; CHECK-NEXT: store float %10, float* %"arrayidx2'ipg" |
| 100 | +; CHECK-NEXT: %_unwrap3 = sext i32 %i to i64 |
| 101 | +; CHECK-NEXT: %11 = mul i64 %"iv'phi", -1 |
| 102 | +; CHECK-NEXT: %12 = add i64 %_unwrap3, %11 |
| 103 | +; CHECK-NEXT: %13 = add i64 %12, -1 |
| 104 | +; CHECK-NEXT: %"arrayidx'ipg" = getelementptr float, float* %"array'", i64 %13 |
| 105 | +; CHECK-NEXT: %14 = load float, float* %"arrayidx'ipg" |
| 106 | +; CHECK-NEXT: %15 = fadd fast float %14, %"'de2.0" |
| 107 | +; CHECK-NEXT: store float %15, float* %"arrayidx'ipg" |
| 108 | +; CHECK-NEXT: %16 = icmp eq i64 %"iv'phi", 0 |
| 109 | +; CHECK-NEXT: br i1 %16, label %invertland.rhs.preheader, label %loopMerge |
| 110 | + |
| 111 | +; CHECK: invertwhile.body: ; preds = %loopMerge |
| 112 | +; CHECK-NEXT: %_unwrap4 = sext i32 %i to i64 |
| 113 | +; CHECK-NEXT: %17 = mul i64 %"iv'phi", -1 |
| 114 | +; CHECK-NEXT: %18 = add i64 %_unwrap4, %17 |
| 115 | +; CHECK-NEXT: %19 = add i64 %18, -1 |
| 116 | +; CHECK-NEXT: %"arrayidx'ipg5" = getelementptr float, float* %"array'", i64 %19 |
| 117 | +; CHECK-NEXT: %20 = load float, float* %"arrayidx'ipg5" |
| 118 | +; CHECK-NEXT: %_unwrap6 = sext i32 %i to i64 |
| 119 | +; CHECK-NEXT: %21 = mul i64 %"iv'phi", -1 |
| 120 | +; CHECK-NEXT: %22 = add i64 %_unwrap6, %21 |
| 121 | +; CHECK-NEXT: %23 = add i64 %22, -1 |
| 122 | +; CHECK-NEXT: %"arrayidx'ipg7" = getelementptr float, float* %"array'", i64 %23 |
| 123 | +; CHECK-NEXT: store float 0.000000e+00, float* %"arrayidx'ipg7" |
| 124 | +; CHECK-NEXT: %24 = fadd fast float 0.000000e+00, %20 |
| 125 | +; CHECK-NEXT: %_unwrap8 = sext i32 %i to i64 |
| 126 | +; CHECK-NEXT: %25 = mul i64 %"iv'phi", -1 |
| 127 | +; CHECK-NEXT: %26 = add i64 %_unwrap8, %25 |
| 128 | +; CHECK-NEXT: %"arrayidx2'ipg9" = getelementptr float, float* %"array'", i64 %26 |
| 129 | +; CHECK-NEXT: %27 = load float, float* %"arrayidx2'ipg9" |
| 130 | +; CHECK-NEXT: %_unwrap10 = sext i32 %i to i64 |
| 131 | +; CHECK-NEXT: %28 = mul i64 %"iv'phi", -1 |
| 132 | +; CHECK-NEXT: %29 = add i64 %_unwrap10, %28 |
| 133 | +; CHECK-NEXT: %"arrayidx2'ipg11" = getelementptr float, float* %"array'", i64 %29 |
| 134 | +; CHECK-NEXT: store float 0.000000e+00, float* %"arrayidx2'ipg11" |
| 135 | +; CHECK-NEXT: %30 = fadd fast float 0.000000e+00, %27 |
| 136 | +; CHECK-NEXT: br label %invertland.rhs |
| 137 | + |
| 138 | +; CHECK: invertwhile.end.loopexit: ; preds = %invertwhile.end |
| 139 | +; CHECK-NEXT: br label %loopMerge |
| 140 | + |
| 141 | +; CHECK: invertwhile.end: ; preds = %while.end |
| 142 | +; CHECK-NEXT: %31 = icmp sgt i32 %i, 0 |
| 143 | +; CHECK-NEXT: br i1 %31, label %invertwhile.end.loopexit, label %invertentry |
| 144 | + |
| 145 | +; CHECK: loopMerge: ; preds = %invertwhile.end.loopexit, %invertland.rhs |
| 146 | +; CHECK-NEXT: %"iv'phi" = phi i64 [ %_cache.0, %invertwhile.end.loopexit ], [ %32, %invertland.rhs ] |
| 147 | +; CHECK-NEXT: %32 = sub i64 %"iv'phi", 1 |
| 148 | +; CHECK-NEXT: switch i8 %_cache1.0, label %invertland.rhs [ |
| 149 | +; CHECK-NEXT: i8 0, label %invertwhile.body |
| 150 | +; CHECK-NEXT: ] |
| 151 | +; CHECK-NEXT: } |
0 commit comments