From 613d8f29538ee43137a77dfd0d6464c65b328b68 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker@arm.com>
Date: Mon, 17 Aug 2020 12:50:30 +0100
Subject: [PATCH 01/23] [NFC] Run update script on test

Update IndVarSimplify/no-iv-rewrite.ll
---
 .../IndVarSimplify/no-iv-rewrite.ll           | 340 ++++++++++++------
 1 file changed, 237 insertions(+), 103 deletions(-)

diff --git a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 53c08ec5d57f5f..68566fd03a5758 100644
--- a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -indvars -S -indvars-predicate-loops=0  | FileCheck %s
 ;
 ; Make sure that indvars isn't inserting canonical IVs.
@@ -5,7 +6,32 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
+; We should only have 2 IVs.
+; sext should be eliminated while preserving gep inboundsness.
 define i32 @sum(i32* %arr, i32 %n) nounwind {
+; CHECK-LABEL: @sum(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PRECOND:%.*]] = icmp slt i32 0, [[N:%.*]]
+; CHECK-NEXT:    br i1 [[PRECOND]], label [[PH:%.*]], label [[RETURN:%.*]]
+; CHECK:       ph:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[PH]] ]
+; CHECK-NEXT:    [[S_01:%.*]] = phi i32 [ 0, [[PH]] ], [ [[SINC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ADR:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADR]], align 4
+; CHECK-NEXT:    [[SINC]] = add nsw i32 [[S_01]], [[VAL]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[S_LCSSA:%.*]] = phi i32 [ [[SINC]], [[LOOP]] ]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[S_0_LCSSA:%.*]] = phi i32 [ [[S_LCSSA]], [[EXIT]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[S_0_LCSSA]]
+;
 entry:
   %precond = icmp slt i32 0, %n
   br i1 %precond, label %ph, label %return
@@ -13,17 +39,6 @@ entry:
 ph:
   br label %loop
 
-; CHECK: loop:
-;
-; We should only have 2 IVs.
-; CHECK: phi
-; CHECK: phi
-; CHECK-NOT: phi
-;
-; sext should be eliminated while preserving gep inboundsness.
-; CHECK-NOT: sext
-; CHECK: getelementptr inbounds
-; CHECK: exit:
 loop:
   %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ]
   %s.01 = phi i32 [ 0, %ph ], [ %sinc, %loop ]
@@ -44,7 +59,34 @@ return:
   ret i32 %s.0.lcssa
 }
 
+; We should only have 2 IVs.
+; %ofs sext should be eliminated while preserving gep inboundsness.
+; %vall sext should obviously not be eliminated
 define i64 @suml(i32* %arr, i32 %n) nounwind {
+; CHECK-LABEL: @suml(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PRECOND:%.*]] = icmp slt i32 0, [[N:%.*]]
+; CHECK-NEXT:    br i1 [[PRECOND]], label [[PH:%.*]], label [[RETURN:%.*]]
+; CHECK:       ph:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[PH]] ]
+; CHECK-NEXT:    [[S_01:%.*]] = phi i64 [ 0, [[PH]] ], [ [[SINC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ADR:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADR]], align 4
+; CHECK-NEXT:    [[VALL:%.*]] = sext i32 [[VAL]] to i64
+; CHECK-NEXT:    [[SINC]] = add nsw i64 [[S_01]], [[VALL]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[S_LCSSA:%.*]] = phi i64 [ [[SINC]], [[LOOP]] ]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[S_0_LCSSA:%.*]] = phi i64 [ [[S_LCSSA]], [[EXIT]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i64 [[S_0_LCSSA]]
+;
 entry:
   %precond = icmp slt i32 0, %n
   br i1 %precond, label %ph, label %return
@@ -52,19 +94,6 @@ entry:
 ph:
   br label %loop
 
-; CHECK: loop:
-;
-; We should only have 2 IVs.
-; CHECK: phi
-; CHECK: phi
-; CHECK-NOT: phi
-;
-; %ofs sext should be eliminated while preserving gep inboundsness.
-; CHECK-NOT: sext
-; CHECK: getelementptr inbounds
-; %vall sext should obviously not be eliminated
-; CHECK: sext
-; CHECK: exit:
 loop:
   %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ]
   %s.01 = phi i64 [ 0, %ph ], [ %sinc, %loop ]
@@ -86,29 +115,35 @@ return:
   ret i64 %s.0.lcssa
 }
 
+; It's not indvars' job to perform LICM on %ofs
+; Preserve exactly one pointer type IV.
+; Don't create any extra adds.
+; Preserve gep inboundsness, and don't factor it.
 define void @outofbounds(i32* %first, i32* %last, i32 %idx) nounwind {
+; CHECK-LABEL: @outofbounds(
+; CHECK-NEXT:    [[PRECOND:%.*]] = icmp ne i32* [[FIRST:%.*]], [[LAST:%.*]]
+; CHECK-NEXT:    br i1 [[PRECOND]], label [[PH:%.*]], label [[RETURN:%.*]]
+; CHECK:       ph:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTRIV:%.*]] = phi i32* [ [[FIRST]], [[PH]] ], [ [[PTRPOST:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFS:%.*]] = sext i32 [[IDX:%.*]] to i64
+; CHECK-NEXT:    [[ADR:%.*]] = getelementptr inbounds i32, i32* [[PTRIV]], i64 [[OFS]]
+; CHECK-NEXT:    store i32 3, i32* [[ADR]], align 4
+; CHECK-NEXT:    [[PTRPOST]] = getelementptr inbounds i32, i32* [[PTRIV]], i32 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32* [[PTRPOST]], [[LAST]]
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
   %precond = icmp ne i32* %first, %last
   br i1 %precond, label %ph, label %return
 
-; CHECK: ph:
-; It's not indvars' job to perform LICM on %ofs
-; CHECK-NOT: sext
 ph:
   br label %loop
 
-; CHECK: loop:
-;
-; Preserve exactly one pointer type IV.
-; CHECK: phi i32*
-; CHECK-NOT: phi
-;
-; Don't create any extra adds.
-; CHECK-NOT: add
-;
-; Preserve gep inboundsness, and don't factor it.
-; CHECK: getelementptr inbounds i32, i32* %ptriv, i32 1
-; CHECK-NOT: add
-; CHECK: exit:
 loop:
   %ptriv = phi i32* [ %first, %ph ], [ %ptrpost, %loop ]
   %ofs = sext i32 %idx to i64
@@ -127,19 +162,30 @@ return:
 
 %structI = type { i32 }
 
+; Preserve casts
 define void @bitcastiv(i32 %start, i32 %limit, i32 %step, %structI* %base)
+; CHECK-LABEL: @bitcastiv(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[P:%.*]] = phi %structI* [ [[BASE:%.*]], [[ENTRY]] ], [ [[PINC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ADR:%.*]] = getelementptr [[STRUCTI:%.*]], %structI* [[P]], i32 0, i32 0
+; CHECK-NEXT:    store i32 3, i32* [[ADR]], align 4
+; CHECK-NEXT:    [[PP:%.*]] = bitcast %structI* [[P]] to i32*
+; CHECK-NEXT:    store i32 4, i32* [[PP]], align 4
+; CHECK-NEXT:    [[PINC]] = getelementptr [[STRUCTI]], %structI* [[P]], i32 1
+; CHECK-NEXT:    [[NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[NEXT]], [[LIMIT:%.*]]
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 nounwind
 {
 entry:
   br label %loop
 
-; CHECK: loop:
-;
-; Preserve casts
-; CHECK: phi i32
-; CHECK: bitcast
-; CHECK: getelementptr
-; CHECK: exit:
 loop:
   %iv = phi i32 [%start, %entry], [%next, %loop]
   %p = phi %structI* [%base, %entry], [%pinc, %loop]
@@ -156,16 +202,37 @@ exit:
   ret void
 }
 
+; Test inserting a truncate at a phi use.
 define void @maxvisitor(i32 %limit, i32* %base) nounwind {
+; CHECK-LABEL: @maxvisitor(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[LIMIT:%.*]], 1
+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[LIMIT]], i32 1
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[MAX_NEXT:%.*]], [[LOOP_INC]] ]
+; CHECK-NEXT:    [[ADR:%.*]] = getelementptr inbounds i32, i32* [[BASE:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADR]], align 4
+; CHECK-NEXT:    [[CMP19:%.*]] = icmp sgt i32 [[VAL]], [[MAX]]
+; CHECK-NEXT:    br i1 [[CMP19]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    br label [[LOOP_INC]]
+; CHECK:       if.else:
+; CHECK-NEXT:    br label [[LOOP_INC]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[MAX_NEXT]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[MAX]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
- br label %loop
+  br label %loop
 
-; Test inserting a truncate at a phi use.
-;
-; CHECK: loop:
-; CHECK: phi i64
-; CHECK: trunc
-; CHECK: exit:
 loop:
   %idx = phi i32 [ 0, %entry ], [ %idx.next, %loop.inc ]
   %max = phi i32 [ 0, %entry ], [ %max.next, %loop.inc ]
@@ -191,16 +258,25 @@ exit:
   ret void
 }
 
+; Test an edge case of removing an identity phi that directly feeds
+; back to the loop iv.
 define void @identityphi(i32 %limit) nounwind {
+; CHECK-LABEL: @identityphi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN:%.*]], label [[CONTROL:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[CONTROL]]
+; CHECK:       control:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[LIMIT:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 
-; Test an edge case of removing an identity phi that directly feeds
-; back to the loop iv.
-;
-; CHECK: loop:
-; CHECK-NOT: phi
-; CHECK: exit:
 loop:
   %iv = phi i32 [ 0, %entry], [ %iv.next, %control ]
   br i1 undef, label %if.then, label %control
@@ -217,20 +293,32 @@ exit:
   ret void
 }
 
+; Test cloning an or, which is not an OverflowBinaryOperator.
 define i64 @cloneOr(i32 %limit, i64* %base) nounwind {
+; CHECK-LABEL: @cloneOr(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[HALFLIM:%.*]] = ashr i32 [[LIMIT:%.*]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[HALFLIM]] to i64
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ADR:%.*]] = getelementptr i64, i64* [[BASE:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i64, i64* [[ADR]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[VAL_LCSSA:%.*]] = phi i64 [ [[VAL]], [[LOOP]] ]
+; CHECK-NEXT:    [[T3_LCSSA:%.*]] = phi i64 [ [[TMP1]], [[LOOP]] ]
+; CHECK-NEXT:    [[RESULT:%.*]] = and i64 [[VAL_LCSSA]], [[T3_LCSSA]]
+; CHECK-NEXT:    ret i64 [[RESULT]]
+;
 entry:
   ; ensure that the loop can't overflow
   %halfLim = ashr i32 %limit, 2
   br label %loop
 
-; Test cloning an or, which is not an OverflowBinaryOperator.
-;
-; CHECK: sext
-; CHECK: loop:
-; CHECK: phi i64
-; CHECK-NOT: sext
-; CHECK: or i64
-; CHECK: exit:
 loop:
   %iv = phi i32 [ 0, %entry], [ %iv.next, %loop ]
   %t1 = sext i32 %iv to i64
@@ -249,14 +337,24 @@ exit:
 
 ; The i induction variable looks like a wrap-around, but it really is just
 ; a simple affine IV.  Make sure that indvars simplifies through.
+; ReplaceLoopExitValue should fold the return value to constant 9.
 define i32 @indirectRecurrence() nounwind {
+; CHECK-LABEL: @indirectRecurrence(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[J_0:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[J_NEXT:%.*]], [[COND_TRUE:%.*]] ]
+; CHECK-NEXT:    [[TMP:%.*]] = icmp ne i32 [[J_0]], 10
+; CHECK-NEXT:    br i1 [[TMP]], label [[COND_TRUE]], label [[RETURN:%.*]]
+; CHECK:       cond_true:
+; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i32 [[J_0]], 1
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 9
+;
 entry:
   br label %loop
 
-; ReplaceLoopExitValue should fold the return value to constant 9.
-; CHECK: loop:
-; CHECK: phi i32
-; CHECK: ret i32 9
 loop:
   %j.0 = phi i32 [ 1, %entry ], [ %j.next, %cond_true ]
   %i.0 = phi i32 [ 0, %entry ], [ %j.0, %cond_true ]
@@ -275,25 +373,33 @@ return:
 ; Eliminate the redundant IV increments k.next and l.next.
 ; Two phis should remain, one starting at %init, and one at %init1.
 ; Two increments should remain, one by %step and one by %step1.
-; CHECK: loop:
-; CHECK: phi i32
-; CHECK: phi i32
-; CHECK-NOT: phi
-; CHECK: add i32
-; CHECK: add i32
-; CHECK: add i32
-; CHECK-NOT: add
-; CHECK: return:
-;
 ; Five live-outs should remain.
-; CHECK: lcssa = phi
-; CHECK: lcssa = phi
-; CHECK: lcssa = phi
-; CHECK: lcssa = phi
-; CHECK: lcssa = phi
-; CHECK-NOT: phi
-; CHECK: ret
 define i32 @isomorphic(i32 %init, i32 %step, i32 %lim) nounwind {
+; CHECK-LABEL: @isomorphic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STEP1:%.*]] = add i32 [[STEP:%.*]], 1
+; CHECK-NEXT:    [[INIT1:%.*]] = add i32 [[INIT:%.*]], [[STEP1]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[II:%.*]] = phi i32 [ [[INIT1]], [[ENTRY:%.*]] ], [ [[II_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ [[INIT]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[II_NEXT]] = add i32 [[II]], [[STEP1]]
+; CHECK-NEXT:    [[J_NEXT]] = add i32 [[J]], [[STEP1]]
+; CHECK-NEXT:    [[L_STEP:%.*]] = add i32 [[J]], [[STEP]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[II_NEXT]], [[LIM:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[RETURN:%.*]]
+; CHECK:       return:
+; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[J]], [[LOOP]] ]
+; CHECK-NEXT:    [[J_NEXT_LCSSA:%.*]] = phi i32 [ [[J_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    [[K_NEXT_LCSSA:%.*]] = phi i32 [ [[II_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_STEP_LCSSA:%.*]] = phi i32 [ [[L_STEP]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_NEXT_LCSSA:%.*]] = phi i32 [ [[J_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    [[SUM1:%.*]] = add i32 [[I_LCSSA]], [[J_NEXT_LCSSA]]
+; CHECK-NEXT:    [[SUM2:%.*]] = add i32 [[SUM1]], [[K_NEXT_LCSSA]]
+; CHECK-NEXT:    [[SUM3:%.*]] = add i32 [[SUM1]], [[L_STEP_LCSSA]]
+; CHECK-NEXT:    [[SUM4:%.*]] = add i32 [[SUM1]], [[L_NEXT_LCSSA]]
+; CHECK-NEXT:    ret i32 [[SUM4]]
+;
 entry:
   %step1 = add i32 %step, 1
   %init1 = add i32 %init, %step1
@@ -327,16 +433,24 @@ return:
 %structIF = type { i32, float }
 
 define void @congruentgepiv(%structIF* %base) nounwind uwtable ssp {
+; CHECK-LABEL: @congruentgepiv(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi %structIF* [ [[PTR_INC:%.*]], [[LATCH:%.*]] ], [ [[BASE:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INDVARS1:%.*]] = bitcast %structIF* [[PTR_IV]] to i32*
+; CHECK-NEXT:    store i32 4, i32* [[INDVARS1]], align 4
+; CHECK-NEXT:    br i1 false, label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[PTR_INC]] = getelementptr inbounds [[STRUCTIF:%.*]], %structIF* [[PTR_IV]], i64 1
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   %first = getelementptr inbounds %structIF, %structIF* %base, i64 0, i32 0
   br label %loop
 
-; CHECK: loop:
-; CHECK: phi %structIF*
-; CHECK-NOT: phi
-; CHECK: getelementptr inbounds
-; CHECK-NOT: getelementptr
-; CHECK: exit:
 loop:
   %ptr.iv = phi %structIF* [ %ptr.inc, %latch ], [ %base, %entry ]
   %next = phi i32* [ %next.inc, %latch ], [ %first, %entry ]
@@ -356,15 +470,35 @@ declare void @use32(i32 %x)
 declare void @use64(i64 %x)
 
 ; Test a widened IV that is used by a phi on different paths within the loop.
-;
-; CHECK: for.body:
-; CHECK: phi i64
-; CHECK: trunc i64
-; CHECK: if.then:
-; CHECK: for.inc:
-; CHECK: phi i32
-; CHECK: for.end:
 define void @phiUsesTrunc() nounwind {
+; CHECK-LABEL: @phiUsesTrunc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 1, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN33:%.*]], label [[FOR_INC]]
+; CHECK:       if.then33:
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       if.else:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN97:%.*]], label [[FOR_INC]]
+; CHECK:       if.then97:
+; CHECK-NEXT:    call void @use64(i64 [[INDVARS_IV]])
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[KMIN_1:%.*]] = phi i32 [ [[TMP0]], [[IF_THEN33]] ], [ 0, [[IF_THEN]] ], [ [[TMP0]], [[IF_THEN97]] ], [ 0, [[IF_ELSE]] ]
+; CHECK-NEXT:    call void @use32(i32 [[KMIN_1]])
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    br i1 false, label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   br i1 undef, label %for.body, label %for.end
 

From 79d9e2cd93a3ff7b448f40caf50dbfd3516f7c0d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 17 Aug 2020 12:46:31 +0100
Subject: [PATCH 02/23] [DemandedBits] Reorder addition test checks. NFC.

As suggested on D72423 we should try to keep the same order as the original IR
---
 llvm/test/Analysis/DemandedBits/add.ll | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/test/Analysis/DemandedBits/add.ll b/llvm/test/Analysis/DemandedBits/add.ll
index 102d667745a204..9203ed15d62789 100644
--- a/llvm/test/Analysis/DemandedBits/add.ll
+++ b/llvm/test/Analysis/DemandedBits/add.ll
@@ -1,14 +1,14 @@
 ; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s
 ; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
 
-; CHECK-DAG: DemandedBits: 0x1f for   %5 = or i32 %2, %3
-; CHECK-DAG: DemandedBits: 0xffffffff for   %8 = and i32 %7, 16
-; CHECK-DAG: DemandedBits: 0x1f for   %4 = and i32 %d, 4
 ; CHECK-DAG: DemandedBits: 0x1f for   %1 = and i32 %a, 9
+; CHECK-DAG: DemandedBits: 0x1f for   %2 = and i32 %b, 9
 ; CHECK-DAG: DemandedBits: 0x1f for   %3 = and i32 %c, 13
-; CHECK-DAG: DemandedBits: 0x10 for   %7 = add i32 %1, %6
+; CHECK-DAG: DemandedBits: 0x1f for   %4 = and i32 %d, 4
+; CHECK-DAG: DemandedBits: 0x1f for   %5 = or i32 %2, %3
 ; CHECK-DAG: DemandedBits: 0x1f for   %6 = or i32 %4, %5
-; CHECK-DAG: DemandedBits: 0x1f for   %2 = and i32 %b, 9
+; CHECK-DAG: DemandedBits: 0x10 for   %7 = add i32 %1, %6
+; CHECK-DAG: DemandedBits: 0xffffffff for   %8 = and i32 %7, 16
 define i32 @test_add(i32 %a, i32 %b, i32 %c, i32 %d) {
   %1 = and i32 %a, 9
   %2 = and i32 %b, 9

From c1f6ce0c7322d47f1bb90169585fa54232231ede Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 17 Aug 2020 12:53:52 +0100
Subject: [PATCH 03/23] [DemandedBits] Improve accuracy of Add propagator

The current demand propagator for addition will mark all input bits at and right of the alive output bit as alive. But carry won't propagate beyond a bit for which both operands are zero (or one/zero in the case of subtraction) so a more accurate answer is possible given known bits.

I derived a propagator by working through truth tables and using a bit-reversed addition to make demand ripple to the right, but I'm not sure how to make a convincing argument for its correctness in the comments yet. Nevertheless, here's a minimal implementation and test to get feedback.

This would help in a situation where, for example, four bytes (<128) packed into an int are added with four others SIMD-style but only one of the four results is actually read.

Known A:     0_______0_______0_______0_______
Known B:     0_______0_______0_______0_______
AOut:        00000000001000000000000000000000
AB, current: 00000000001111111111111111111111
AB, patch:   00000000001111111000000000000000

Committed on behalf of: @rrika (Erika)

Differential Revision: https://reviews.llvm.org/D72423
---
 llvm/include/llvm/Analysis/DemandedBits.h | 14 ++++
 llvm/lib/Analysis/DemandedBits.cpp        | 94 +++++++++++++++++++++++
 llvm/test/Analysis/DemandedBits/add.ll    | 40 +++++-----
 llvm/unittests/IR/CMakeLists.txt          |  1 +
 llvm/unittests/IR/DemandedBitsTest.cpp    | 66 ++++++++++++++++
 llvm/unittests/Support/KnownBitsTest.cpp  | 30 +-------
 llvm/unittests/Support/KnownBitsTest.h    | 52 +++++++++++++
 7 files changed, 248 insertions(+), 49 deletions(-)
 create mode 100644 llvm/unittests/IR/DemandedBitsTest.cpp
 create mode 100644 llvm/unittests/Support/KnownBitsTest.h

diff --git a/llvm/include/llvm/Analysis/DemandedBits.h b/llvm/include/llvm/Analysis/DemandedBits.h
index 04db3eb57c18e8..7a8618a27ce79d 100644
--- a/llvm/include/llvm/Analysis/DemandedBits.h
+++ b/llvm/include/llvm/Analysis/DemandedBits.h
@@ -61,6 +61,20 @@ class DemandedBits {
 
   void print(raw_ostream &OS);
 
+  /// Compute alive bits of one addition operand from alive output and known
+  /// operand bits
+  static APInt determineLiveOperandBitsAdd(unsigned OperandNo,
+                                           const APInt &AOut,
+                                           const KnownBits &LHS,
+                                           const KnownBits &RHS);
+
+  /// Compute alive bits of one subtraction operand from alive output and known
+  /// operand bits
+  static APInt determineLiveOperandBitsSub(unsigned OperandNo,
+                                           const APInt &AOut,
+                                           const KnownBits &LHS,
+                                           const KnownBits &RHS);
+
 private:
   void performAnalysis();
   void determineLiveOperandBits(const Instruction *UserI,
diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index aaee8c21f28910..62e08f3f8a8ba8 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -173,7 +173,21 @@ void DemandedBits::determineLiveOperandBits(
       }
     break;
   case Instruction::Add:
+    if (AOut.isMask()) {
+      AB = AOut;
+    } else {
+      ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
+      AB = determineLiveOperandBitsAdd(OperandNo, AOut, Known, Known2);
+    }
+    break;
   case Instruction::Sub:
+    if (AOut.isMask()) {
+      AB = AOut;
+    } else {
+      ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
+      AB = determineLiveOperandBitsSub(OperandNo, AOut, Known, Known2);
+    }
+    break;
   case Instruction::Mul:
     // Find the highest live output bit. We don't need any more input
     // bits than that (adds, and thus subtracts, ripple only to the
@@ -469,6 +483,86 @@ void DemandedBits::print(raw_ostream &OS) {
   }
 }
 
+static APInt determineLiveOperandBitsAddCarry(unsigned OperandNo,
+                                              const APInt &AOut,
+                                              const KnownBits &LHS,
+                                              const KnownBits &RHS,
+                                              bool CarryZero, bool CarryOne) {
+  assert(!(CarryZero && CarryOne) &&
+         "Carry can't be zero and one at the same time");
+
+  // The following check should be done by the caller, as it also indicates
+  // that LHS and RHS don't need to be computed.
+  //
+  // if (AOut.isMask())
+  //   return AOut;
+
+  // Boundary bits' carry out is unaffected by their carry in.
+  APInt Bound = (LHS.Zero & RHS.Zero) | (LHS.One & RHS.One);
+
+  // First, the alive carry bits are determined from the alive output bits:
+  // Let demand ripple to the right but only up to any set bit in Bound.
+  //   AOut         = -1----
+  //   Bound        = ----1-
+  //   ACarry&~AOut = --111-
+  APInt RBound = Bound.reverseBits();
+  APInt RAOut = AOut.reverseBits();
+  APInt RProp = RAOut + (RAOut | ~RBound);
+  APInt RACarry = RProp ^ ~RBound;
+  APInt ACarry = RACarry.reverseBits();
+
+  // Then, the alive input bits are determined from the alive carry bits:
+  APInt NeededToMaintainCarryZero;
+  APInt NeededToMaintainCarryOne;
+  if (OperandNo == 0) {
+    NeededToMaintainCarryZero = LHS.Zero | ~RHS.Zero;
+    NeededToMaintainCarryOne = LHS.One | ~RHS.One;
+  } else {
+    NeededToMaintainCarryZero = RHS.Zero | ~LHS.Zero;
+    NeededToMaintainCarryOne = RHS.One | ~LHS.One;
+  }
+
+  // As in computeForAddCarry
+  APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + !CarryZero;
+  APInt PossibleSumOne = LHS.One + RHS.One + CarryOne;
+
+  // The below is simplified from
+  //
+  // APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero);
+  // APInt CarryKnownOne = PossibleSumOne ^ LHS.One ^ RHS.One;
+  // APInt CarryUnknown = ~(CarryKnownZero | CarryKnownOne);
+  //
+  // APInt NeededToMaintainCarry =
+  //   (CarryKnownZero & NeededToMaintainCarryZero) |
+  //   (CarryKnownOne  & NeededToMaintainCarryOne) |
+  //   CarryUnknown;
+
+  APInt NeededToMaintainCarry = (~PossibleSumZero | NeededToMaintainCarryZero) &
+                                (PossibleSumOne | NeededToMaintainCarryOne);
+
+  APInt AB = AOut | (ACarry & NeededToMaintainCarry);
+  return AB;
+}
+
+APInt DemandedBits::determineLiveOperandBitsAdd(unsigned OperandNo,
+                                                const APInt &AOut,
+                                                const KnownBits &LHS,
+                                                const KnownBits &RHS) {
+  return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, RHS, true,
+                                          false);
+}
+
+APInt DemandedBits::determineLiveOperandBitsSub(unsigned OperandNo,
+                                                const APInt &AOut,
+                                                const KnownBits &LHS,
+                                                const KnownBits &RHS) {
+  KnownBits NRHS;
+  NRHS.Zero = RHS.One;
+  NRHS.One = RHS.Zero;
+  return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, NRHS, false,
+                                          true);
+}
+
 FunctionPass *llvm::createDemandedBitsWrapperPass() {
   return new DemandedBitsWrapperPass();
 }
diff --git a/llvm/test/Analysis/DemandedBits/add.ll b/llvm/test/Analysis/DemandedBits/add.ll
index 9203ed15d62789..01673f82c2b365 100644
--- a/llvm/test/Analysis/DemandedBits/add.ll
+++ b/llvm/test/Analysis/DemandedBits/add.ll
@@ -1,22 +1,22 @@
-; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s
-; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
-
-; CHECK-DAG: DemandedBits: 0x1f for   %1 = and i32 %a, 9
-; CHECK-DAG: DemandedBits: 0x1f for   %2 = and i32 %b, 9
-; CHECK-DAG: DemandedBits: 0x1f for   %3 = and i32 %c, 13
-; CHECK-DAG: DemandedBits: 0x1f for   %4 = and i32 %d, 4
-; CHECK-DAG: DemandedBits: 0x1f for   %5 = or i32 %2, %3
-; CHECK-DAG: DemandedBits: 0x1f for   %6 = or i32 %4, %5
+; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s
+; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
+
+; CHECK-DAG: DemandedBits: 0x1e for   %1 = and i32 %a, 9
+; CHECK-DAG: DemandedBits: 0x1a for   %2 = and i32 %b, 9
+; CHECK-DAG: DemandedBits: 0x1a for   %3 = and i32 %c, 13
+; CHECK-DAG: DemandedBits: 0x1a for   %4 = and i32 %d, 4
+; CHECK-DAG: DemandedBits: 0x1a for   %5 = or i32 %2, %3
+; CHECK-DAG: DemandedBits: 0x1a for   %6 = or i32 %4, %5
 ; CHECK-DAG: DemandedBits: 0x10 for   %7 = add i32 %1, %6
 ; CHECK-DAG: DemandedBits: 0xffffffff for   %8 = and i32 %7, 16
-define i32 @test_add(i32 %a, i32 %b, i32 %c, i32 %d) {
-  %1 = and i32 %a, 9
-  %2 = and i32 %b, 9
-  %3 = and i32 %c, 13
-  %4 = and i32 %d, 4 ; no bit of %d alive, %4 simplifies to zero
-  %5 = or i32 %2, %3
-  %6 = or i32 %4, %5
-  %7 = add i32 %1, %6
-  %8 = and i32 %7, 16
-  ret i32 %8
-}
\ No newline at end of file
+define i32 @test_add(i32 %a, i32 %b, i32 %c, i32 %d) {
+  %1 = and i32 %a, 9
+  %2 = and i32 %b, 9
+  %3 = and i32 %c, 13
+  %4 = and i32 %d, 4 ; no bit of %d alive, %4 simplifies to zero
+  %5 = or i32 %2, %3
+  %6 = or i32 %4, %5
+  %7 = add i32 %1, %6
+  %8 = and i32 %7, 16
+  ret i32 %8
+}
diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt
index 4634bf89059a74..c4386fed6174fc 100644
--- a/llvm/unittests/IR/CMakeLists.txt
+++ b/llvm/unittests/IR/CMakeLists.txt
@@ -18,6 +18,7 @@ add_llvm_unittest(IRTests
   DataLayoutTest.cpp
   DebugInfoTest.cpp
   DebugTypeODRUniquingTest.cpp
+  DemandedBitsTest.cpp
   DominatorTreeTest.cpp
   DominatorTreeBatchUpdatesTest.cpp
   FunctionTest.cpp
diff --git a/llvm/unittests/IR/DemandedBitsTest.cpp b/llvm/unittests/IR/DemandedBitsTest.cpp
new file mode 100644
index 00000000000000..4d15e81899612f
--- /dev/null
+++ b/llvm/unittests/IR/DemandedBitsTest.cpp
@@ -0,0 +1,66 @@
+//===- DemandedBitsTest.cpp - DemandedBits tests --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DemandedBits.h"
+#include "../Support/KnownBitsTest.h"
+#include "llvm/Support/KnownBits.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+template <typename Fn1, typename Fn2>
+static void TestBinOpExhaustive(Fn1 PropagateFn, Fn2 EvalFn) {
+  unsigned Bits = 4;
+  unsigned Max = 1 << Bits;
+  ForeachKnownBits(Bits, [&](const KnownBits &Known1) {
+    ForeachKnownBits(Bits, [&](const KnownBits &Known2) {
+      for (unsigned AOut_ = 0; AOut_ < Max; AOut_++) {
+        APInt AOut(Bits, AOut_);
+        APInt AB1 = PropagateFn(0, AOut, Known1, Known2);
+        APInt AB2 = PropagateFn(1, AOut, Known1, Known2);
+        {
+          // If the propagator claims that certain known bits
+          // didn't matter, check it doesn't change its mind
+          // when they become unknown.
+          KnownBits Known1Redacted;
+          KnownBits Known2Redacted;
+          Known1Redacted.Zero = Known1.Zero & AB1;
+          Known1Redacted.One = Known1.One & AB1;
+          Known2Redacted.Zero = Known2.Zero & AB2;
+          Known2Redacted.One = Known2.One & AB2;
+
+          APInt AB1R = PropagateFn(0, AOut, Known1Redacted, Known2Redacted);
+          APInt AB2R = PropagateFn(1, AOut, Known1Redacted, Known2Redacted);
+          EXPECT_EQ(AB1, AB1R);
+          EXPECT_EQ(AB2, AB2R);
+        }
+        ForeachNumInKnownBits(Known1, [&](APInt Value1) {
+          ForeachNumInKnownBits(Known2, [&](APInt Value2) {
+            APInt ReferenceResult = EvalFn((Value1 & AB1), (Value2 & AB2));
+            APInt Result = EvalFn(Value1, Value2);
+            EXPECT_EQ(Result & AOut, ReferenceResult & AOut);
+          });
+        });
+      }
+    });
+  });
+}
+
+TEST(DemandedBitsTest, Add) {
+  TestBinOpExhaustive(DemandedBits::determineLiveOperandBitsAdd,
+                      [](APInt N1, APInt N2) -> APInt { return N1 + N2; });
+}
+
+TEST(DemandedBitsTest, Sub) {
+  TestBinOpExhaustive(DemandedBits::determineLiveOperandBitsSub,
+                      [](APInt N1, APInt N2) -> APInt { return N1 - N2; });
+}
+
+} // anonymous namespace
diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp
index bfd8eb204cafff..694e5c4dcc7128 100644
--- a/llvm/unittests/Support/KnownBitsTest.cpp
+++ b/llvm/unittests/Support/KnownBitsTest.cpp
@@ -11,41 +11,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/KnownBits.h"
+#include "KnownBitsTest.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
 
 namespace {
 
-template<typename FnTy>
-void ForeachKnownBits(unsigned Bits, FnTy Fn) {
-  unsigned Max = 1 << Bits;
-  KnownBits Known(Bits);
-  for (unsigned Zero = 0; Zero < Max; ++Zero) {
-    for (unsigned One = 0; One < Max; ++One) {
-      Known.Zero = Zero;
-      Known.One = One;
-      if (Known.hasConflict())
-        continue;
-
-      Fn(Known);
-    }
-  }
-}
-
-template<typename FnTy>
-void ForeachNumInKnownBits(const KnownBits &Known, FnTy Fn) {
-  unsigned Bits = Known.getBitWidth();
-  unsigned Max = 1 << Bits;
-  for (unsigned N = 0; N < Max; ++N) {
-    APInt Num(Bits, N);
-    if ((Num & Known.Zero) != 0 || (~Num & Known.One) != 0)
-      continue;
-
-    Fn(Num);
-  }
-}
-
 TEST(KnownBitsTest, AddCarryExhaustive) {
   unsigned Bits = 4;
   ForeachKnownBits(Bits, [&](const KnownBits &Known1) {
diff --git a/llvm/unittests/Support/KnownBitsTest.h b/llvm/unittests/Support/KnownBitsTest.h
new file mode 100644
index 00000000000000..bc291898814bd1
--- /dev/null
+++ b/llvm/unittests/Support/KnownBitsTest.h
@@ -0,0 +1,52 @@
+//===- llvm/unittest/Support/KnownBitsTest.h - KnownBits tests ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements helpers for KnownBits and DemandedBits unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UNITTESTS_SUPPORT_KNOWNBITSTEST_H
+#define LLVM_UNITTESTS_SUPPORT_KNOWNBITSTEST_H
+
+#include "llvm/Support/KnownBits.h"
+
+namespace {
+
+using namespace llvm;
+
+template <typename FnTy> void ForeachKnownBits(unsigned Bits, FnTy Fn) {
+  unsigned Max = 1 << Bits;
+  KnownBits Known(Bits);
+  for (unsigned Zero = 0; Zero < Max; ++Zero) {
+    for (unsigned One = 0; One < Max; ++One) {
+      Known.Zero = Zero;
+      Known.One = One;
+      if (Known.hasConflict())
+        continue;
+
+      Fn(Known);
+    }
+  }
+}
+
+template <typename FnTy>
+void ForeachNumInKnownBits(const KnownBits &Known, FnTy Fn) {
+  unsigned Bits = Known.getBitWidth();
+  unsigned Max = 1 << Bits;
+  for (unsigned N = 0; N < Max; ++N) {
+    APInt Num(Bits, N);
+    if ((Num & Known.Zero) != 0 || (~Num & Known.One) != 0)
+      continue;
+
+    Fn(Num);
+  }
+}
+
+} // end anonymous namespace
+
+#endif

From 6567f822160ea7c4d13a7e3358883eafc61af337 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Mon, 10 Aug 2020 17:50:16 +0300
Subject: [PATCH 04/23] [llvm-readobj/elf] - Refine the warning about the
 broken PT_DYNAMIC segment.

Splitted out from D85519.

Currently we report "PT_DYNAMIC segment offset + size exceeds the size of the file",
this changes it to
"PT_DYNAMIC segment offset (0x1234) + file size (0x5678) exceeds the size of the file (0x68ab)"

Differential revision: https://reviews.llvm.org/D85654
---
 llvm/test/Object/invalid.test                          |  6 ++++--
 .../tools/llvm-readobj/ELF/malformed-pt-dynamic.test   | 10 +++++++---
 llvm/tools/llvm-readobj/ELFDumper.cpp                  | 10 ++++++----
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/llvm/test/Object/invalid.test b/llvm/test/Object/invalid.test
index c3dc175f399cca..8d9068a1ba07a0 100644
--- a/llvm/test/Object/invalid.test
+++ b/llvm/test/Object/invalid.test
@@ -489,7 +489,7 @@ Sections:
 # RUN: yaml2obj %s --docnum=22 -o %t22
 # RUN: llvm-readobj --dyn-relocations %t22 2>&1 | FileCheck -DFILE=%t22 --check-prefix=DYN-TABLE-PHDR %s
 
-# DYN-TABLE-PHDR: warning: '[[FILE]]': PT_DYNAMIC segment offset + size exceeds the size of the file
+# DYN-TABLE-PHDR: warning: '[[FILE]]': PT_DYNAMIC segment offset (0xffff0000) + file size (0x0) exceeds the size of the file (0x150)
 
 --- !ELF
 FileHeader:
@@ -506,7 +506,9 @@ ProgramHeaders:
 
 # RUN: yaml2obj %s --docnum=23 -o %t23
 # RUN: llvm-readobj --dyn-relocations %t23 2>&1 \
-# RUN:  | FileCheck -DFILE=%t23 --check-prefix=DYN-TABLE-PHDR %s
+# RUN:  | FileCheck -DFILE=%t23 --check-prefix=DYN-TABLE-PHDR2 %s
+
+# DYN-TABLE-PHDR2: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x78) + file size (0xffff0000) exceeds the size of the file (0x1a8)
 
 --- !ELF
 FileHeader:
diff --git a/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test b/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test
index a6aa2915aa4732..3ffdd57486a0e0 100644
--- a/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test
+++ b/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test
@@ -13,14 +13,18 @@
 # within the file.
 # RUN: cp %t.stripped %t.truncated1
 # RUN: %python -c "with open(r'%t.truncated1', 'r+') as f: f.truncate(0x1001)"
-# RUN: llvm-readobj %t.truncated1 --dynamic-table 2>&1 | FileCheck -DFILE=%t.truncated1 %s
+# RUN: llvm-readobj %t.truncated1 --dynamic-table 2>&1 | \
+# RUN:   FileCheck -DFILE=%t.truncated1 %s --check-prefix=WARN1
+
+# WARN1: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x10) exceeds the size of the file (0x1001)
 
 # Test case where the offset is too large to be in the file.
 # RUN: cp %t.stripped %t.truncated2
 # RUN: %python -c "with open(r'%t.truncated2', 'r+') as f: f.truncate(0xFFF)"
-# RUN: llvm-readobj %t.truncated2 --dynamic-table 2>&1 | FileCheck -DFILE=%t.truncated2 %s
+# RUN: llvm-readobj %t.truncated2 --dynamic-table 2>&1 | \
+# RUN:   FileCheck -DFILE=%t.truncated2 %s  --check-prefix=WARN2
 
-# CHECK: warning: '[[FILE]]': PT_DYNAMIC segment offset + size exceeds the size of the file
+# WARN2: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x10) exceeds the size of the file (0xfff)
 
 --- !ELF
 FileHeader:
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index b18c841ead9111..461d1894d1eccb 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1894,10 +1894,12 @@ ELFDumper<ELFT>::findDynamic(const ELFFile<ELFT> *Obj) {
 
   if (DynamicPhdr && DynamicPhdr->p_offset + DynamicPhdr->p_filesz >
                          ObjF->getMemoryBufferRef().getBufferSize()) {
-    reportWarning(
-        createError(
-            "PT_DYNAMIC segment offset + size exceeds the size of the file"),
-        ObjF->getFileName());
+    reportUniqueWarning(createError(
+        "PT_DYNAMIC segment offset (0x" +
+        Twine::utohexstr(DynamicPhdr->p_offset) + ") + file size (0x" +
+        Twine::utohexstr(DynamicPhdr->p_filesz) +
+        ") exceeds the size of the file (0x" +
+        Twine::utohexstr(ObjF->getMemoryBufferRef().getBufferSize()) + ")"));
     // Don't use the broken dynamic header.
     DynamicPhdr = nullptr;
   }

From 6cd4a6f6b27eea40dbddccb21c206fb4d4354c53 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 16 Aug 2020 15:00:03 -0400
Subject: [PATCH 05/23] [InstCombine] reduce code duplication; NFC

---
 .../Transforms/InstCombine/InstCombineMulDivRem.cpp   | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 203c8c7f1c1b05..fb8b640c7c93b9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1114,6 +1114,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
     return Common;
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  Type *Ty = I.getType();
   Value *X;
   // sdiv Op0, -1 --> -Op0
   // sdiv Op0, (sext i1 X) --> -Op0 (because if X is 0, the op is undefined)
@@ -1123,7 +1124,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
 
   // X / INT_MIN --> X == INT_MIN
   if (match(Op1, m_SignMask()))
-    return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType());
+    return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), Ty);
 
   // sdiv exact X,  1<<C  -->    ashr exact X, C   iff  1<<C  is non-negative
   // sdiv exact X, -1<<C  -->  -(ashr exact X, C)
@@ -1133,7 +1134,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
     if (DivisorWasNegative)
       Op1 = ConstantExpr::getNeg(cast<Constant>(Op1));
     auto *AShr = BinaryOperator::CreateExactAShr(
-        Op0, getLogBase2(I.getType(), cast<Constant>(Op1)), I.getName());
+        Op0, getLogBase2(Ty, cast<Constant>(Op1)), I.getName());
     if (!DivisorWasNegative)
       return AShr;
     Builder.Insert(AShr);
@@ -1157,7 +1158,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
       Constant *NarrowDivisor =
           ConstantExpr::getTrunc(cast<Constant>(Op1), Op0Src->getType());
       Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor);
-      return new SExtInst(NarrowOp, Op0->getType());
+      return new SExtInst(NarrowOp, Ty);
     }
 
     // -X / C --> X / -C (if the negation doesn't overflow).
@@ -1165,7 +1166,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
     //       checking if all elements are not the min-signed-val.
     if (!Op1C->isMinSignedValue() &&
         match(Op0, m_NSWSub(m_Zero(), m_Value(X)))) {
-      Constant *NegC = ConstantInt::get(I.getType(), -(*Op1C));
+      Constant *NegC = ConstantInt::get(Ty, -(*Op1C));
       Instruction *BO = BinaryOperator::CreateSDiv(X, NegC);
       BO->setIsExact(I.isExact());
       return BO;
@@ -1180,7 +1181,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
 
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a udiv.
-  APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
+  APInt Mask(APInt::getSignMask(Ty->getScalarSizeInBits()));
   if (MaskedValueIsZero(Op0, Mask, 0, &I)) {
     if (MaskedValueIsZero(Op1, Mask, 0, &I)) {
       // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set

From 61512ddd2d57fc33464bda477dab04829266faa1 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 16 Aug 2020 15:11:11 -0400
Subject: [PATCH 06/23] [InstCombine] add tests for sdiv-of-abs; NFC

---
 .../InstCombine/sdiv-canonicalize.ll          | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll
index 39ba5120ed62f3..52f7b1b01dae9e 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll
@@ -1,6 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+declare void @use(i32)
+declare i32 @llvm.abs.i32(i32, i1)
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
+
 define i32 @test_sdiv_canonicalize_op0(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test_sdiv_canonicalize_op0(
 ; CHECK-NEXT:    [[SDIV1:%.*]] = sdiv i32 [[X:%.*]], [[Y:%.*]]
@@ -89,3 +93,49 @@ define i64 @test_sdiv_canonicalize_constexpr(i64 %L1) {
   %B4 = sdiv i64 %L1, %B8
   ret i64 %B4
 }
+
+define i32 @sdiv_abs_nsw(i32 %x) {
+; CHECK-LABEL: @sdiv_abs_nsw(
+; CHECK-NEXT:    [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true)
+; CHECK-NEXT:    [[R:%.*]] = sdiv i32 [[A]], [[X]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = call i32 @llvm.abs.i32(i32 %x, i1 true)
+  %r = sdiv i32 %a, %x
+  ret i32 %r
+}
+
+define <4 x i32> @sdiv_abs_nsw_vec(<4 x i32> %x) {
+; CHECK-LABEL: @sdiv_abs_nsw_vec(
+; CHECK-NEXT:    [[A:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[X:%.*]], i1 true)
+; CHECK-NEXT:    [[R:%.*]] = sdiv <4 x i32> [[X]], [[A]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %a = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
+  %r = sdiv <4 x i32> %x, %a
+  ret <4 x i32> %r
+}
+
+define i32 @sdiv_abs(i32 %x) {
+; CHECK-LABEL: @sdiv_abs(
+; CHECK-NEXT:    [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false)
+; CHECK-NEXT:    [[R:%.*]] = sdiv i32 [[A]], [[X]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = call i32 @llvm.abs.i32(i32 %x, i1 false)
+  %r = sdiv i32 %a, %x
+  ret i32 %r
+}
+
+define i32 @sdiv_abs_extra_use(i32 %x) {
+; CHECK-LABEL: @sdiv_abs_extra_use(
+; CHECK-NEXT:    [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true)
+; CHECK-NEXT:    call void @use(i32 [[A]])
+; CHECK-NEXT:    [[R:%.*]] = sdiv i32 [[A]], [[X]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = call i32 @llvm.abs.i32(i32 %x, i1 true)
+  call void @use(i32 %a)
+  %r = sdiv i32 %a, %x
+  ret i32 %r
+}

From e6b6787d01e9ea6338b5b51c6e3ba1b903876b3a Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 16 Aug 2020 15:42:06 -0400
Subject: [PATCH 07/23] [InstCombine] fold abs(X)/X to cmp+select

The backend can convert the select-of-constants to
bit-hack shift+logic if desirable.

https://alive2.llvm.org/ce/z/pgJT6E

  define i8 @src(i8 %x) {
  %0:
    %a = abs i8 %x, 1
    %d = sdiv i8 %x, %a
    ret i8 %d
  }
  =>
  define i8 @tgt(i8 %x) {
  %0:
    %cond = icmp sgt i8 %x, 255
    %r = select i1 %cond, i8 1, i8 255
    ret i8 %r
  }
  Transformation seems to be correct!
---
 .../Transforms/InstCombine/InstCombineMulDivRem.cpp  | 10 ++++++++++
 .../test/Transforms/InstCombine/sdiv-canonicalize.ll | 12 ++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index fb8b640c7c93b9..ec610b4f3469b4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1179,6 +1179,16 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
     return BinaryOperator::CreateNSWNeg(
         Builder.CreateSDiv(X, Y, I.getName(), I.isExact()));
 
+  // abs(X) / X --> X > -1 ? 1 : -1
+  // X / abs(X) --> X > -1 ? 1 : -1
+  if (match(&I, m_c_BinOp(
+                    m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One())),
+                    m_Deferred(X)))) {
+    Constant *NegOne = ConstantInt::getAllOnesValue(Ty);
+    Value *Cond = Builder.CreateICmpSGT(X, NegOne);
+    return SelectInst::Create(Cond, ConstantInt::get(Ty, 1), NegOne);
+  }
+
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a udiv.
   APInt Mask(APInt::getSignMask(Ty->getScalarSizeInBits()));
diff --git a/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll
index 52f7b1b01dae9e..b2a7a0da6cb505 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll
@@ -96,8 +96,8 @@ define i64 @test_sdiv_canonicalize_constexpr(i64 %L1) {
 
 define i32 @sdiv_abs_nsw(i32 %x) {
 ; CHECK-LABEL: @sdiv_abs_nsw(
-; CHECK-NEXT:    [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true)
-; CHECK-NEXT:    [[R:%.*]] = sdiv i32 [[A]], [[X]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = icmp sgt i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[DOTINV]], i32 1, i32 -1
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %a = call i32 @llvm.abs.i32(i32 %x, i1 true)
@@ -107,8 +107,8 @@ define i32 @sdiv_abs_nsw(i32 %x) {
 
 define <4 x i32> @sdiv_abs_nsw_vec(<4 x i32> %x) {
 ; CHECK-LABEL: @sdiv_abs_nsw_vec(
-; CHECK-NEXT:    [[A:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[X:%.*]], i1 true)
-; CHECK-NEXT:    [[R:%.*]] = sdiv <4 x i32> [[X]], [[A]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[DOTINV]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %a = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
@@ -116,6 +116,8 @@ define <4 x i32> @sdiv_abs_nsw_vec(<4 x i32> %x) {
   ret <4 x i32> %r
 }
 
+; Negative test - requires poison int min (nsw)
+
 define i32 @sdiv_abs(i32 %x) {
 ; CHECK-LABEL: @sdiv_abs(
 ; CHECK-NEXT:    [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false)
@@ -127,6 +129,8 @@ define i32 @sdiv_abs(i32 %x) {
   ret i32 %r
 }
 
+; Negative test
+
 define i32 @sdiv_abs_extra_use(i32 %x) {
 ; CHECK-LABEL: @sdiv_abs_extra_use(
 ; CHECK-NEXT:    [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true)

From e0eb4f204a0ef48cff8fedc0cbc5be2c71fe2afe Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Mon, 17 Aug 2020 12:02:24 +0000
Subject: [PATCH 08/23] [gn build] Port c1f6ce0c732

---
 llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
index ad60a40b630dfb..2ea9172628dc1b 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
@@ -21,6 +21,7 @@ unittest("IRTests") {
     "DataLayoutTest.cpp",
     "DebugInfoTest.cpp",
     "DebugTypeODRUniquingTest.cpp",
+    "DemandedBitsTest.cpp",
     "DominatorTreeBatchUpdatesTest.cpp",
     "DominatorTreeTest.cpp",
     "FunctionTest.cpp",

From 5f9ecc5d857fa5d95f6ea36153be19db40576f8a Mon Sep 17 00:00:00 2001
From: Sam Elliott <selliott@lowrisc.org>
Date: Mon, 17 Aug 2020 12:25:45 +0100
Subject: [PATCH 09/23] [RISCV] Indirect branch generation in position
 independent code

This fixes the "Unable to insert indirect branch" fatal error sometimes
seen when generating position-independent code.

Patch by msizanoen1

Reviewed By: jrtc27

Differential Revision: https://reviews.llvm.org/D84833
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp     | 21 +++++++++-----------
 llvm/lib/Target/RISCV/RISCVInstrInfo.td      |  4 ++--
 llvm/test/CodeGen/RISCV/branch-relaxation.ll |  8 +++++---
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index d39ec505127c47..7b6ea002c7b719 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -279,7 +279,7 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
 
   // Handle a single unconditional branch.
   if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
-    TBB = I->getOperand(0).getMBB();
+    TBB = getBranchDestBlock(*I);
     return false;
   }
 
@@ -293,7 +293,7 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
       I->getDesc().isUnconditionalBranch()) {
     parseCondBranch(*std::prev(I), TBB, Cond);
-    FBB = I->getOperand(0).getMBB();
+    FBB = getBranchDestBlock(*I);
     return false;
   }
 
@@ -384,10 +384,6 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
 
   MachineFunction *MF = MBB.getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
-  const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
-
-  if (TM.isPositionIndependent())
-    report_fatal_error("Unable to insert indirect branch");
 
   if (!isInt<32>(BrOffset))
     report_fatal_error(
@@ -399,15 +395,13 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
   Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
   auto II = MBB.end();
 
-  MachineInstr &LuiMI = *BuildMI(MBB, II, DL, get(RISCV::LUI), ScratchReg)
-                             .addMBB(&DestBB, RISCVII::MO_HI);
-  BuildMI(MBB, II, DL, get(RISCV::PseudoBRIND))
-      .addReg(ScratchReg, RegState::Kill)
-      .addMBB(&DestBB, RISCVII::MO_LO);
+  MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
+                          .addReg(ScratchReg, RegState::Define | RegState::Dead)
+                          .addMBB(&DestBB, RISCVII::MO_CALL);
 
   RS->enterBasicBlockEnd(MBB);
   unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
-                                                LuiMI.getIterator(), false, 0);
+                                                MI.getIterator(), false, 0);
   MRI.replaceRegWith(ScratchReg, Scav);
   MRI.clearVirtRegs();
   RS->setRegUsed(Scav);
@@ -431,6 +425,7 @@ RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
 
 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
                                            int64_t BrOffset) const {
+  unsigned XLen = STI.getXLen();
   // Ideally we could determine the supported branch offset from the
   // RISCVII::FormMask, but this can't be used for Pseudo instructions like
   // PseudoBR.
@@ -447,6 +442,8 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
   case RISCV::JAL:
   case RISCV::PseudoBR:
     return isIntN(21, BrOffset);
+  case RISCV::PseudoJump:
+    return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));
   }
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index b9483062ddeb11..8547f791092b0b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1012,8 +1012,8 @@ def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)),
 def : Pat<(riscv_tail (iPTR texternalsym:$dst)),
           (PseudoTAIL texternalsym:$dst)>;
 
-let isCall = 0, isBarrier = 0, isCodeGenOnly = 0, hasSideEffects = 0,
-    mayStore = 0, mayLoad = 0 in
+let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1,
+    isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in
 def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []> {
   let AsmString = "jump\t$target, $rd";
 }
diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
index 3d617bf0b26b46..5925e17ae407c4 100644
--- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll
+++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
@@ -1,7 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -filetype=obj < %s \
 ; RUN:   -o /dev/null 2>&1
+; RUN: llc -mtriple=riscv32 -relocation-model=pic -verify-machineinstrs \
+; RUN:   -filetype=obj < %s -o /dev/null 2>&1
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -relocation-model=pic -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s
 
 define void @relax_bcc(i1 %a) nounwind {
 ; CHECK-LABEL: relax_bcc:
@@ -25,15 +29,13 @@ tail:
   ret void
 }
 
-; TODO: Extend simm12's MCOperandPredicate so the jalr zero is printed as a jr.
 define i32 @relax_jal(i1 %a) nounwind {
 ; CHECK-LABEL: relax_jal:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    bnez a0, .LBB1_1
 ; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    lui a0, %hi(.LBB1_2)
-; CHECK-NEXT:    jalr zero, %lo(.LBB1_2)(a0)
+; CHECK-NEXT:    jump .LBB1_2, a0
 ; CHECK-NEXT:  .LBB1_1: # %iftrue
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP

From 3f7068ad986d7f44f47faec78597a5e62b07b20b Mon Sep 17 00:00:00 2001
From: Sam Elliott <selliott@lowrisc.org>
Date: Mon, 17 Aug 2020 13:10:27 +0100
Subject: [PATCH 10/23] [RISCV] Enable the use of the old mucounteren name

The RISC-V Privileged Specification 1.11 defines `mcountinhibit`, which
has the same numeric CSR value as `mucounteren` from 1.09.1. This patch
enables the use of the old `mucounteren` name.

Patch by Yuichi Sugiyama.

Reviewed By: lenary, jrtc27, pzheng

Differential Revision: https://reviews.llvm.org/D85067
---
 llvm/lib/Target/RISCV/RISCVSystemOperands.td |  2 ++
 llvm/test/MC/RISCV/machine-csr-names.s       | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index 8e75647bd4a9e7..16399fea150e28 100644
--- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -310,7 +310,9 @@ def: SysReg<"mhpmcounter31h", 0xB9F>;
 //===--------------------------
 // Machine Counter Setup
 //===--------------------------
+let AltName = "mucounteren" in // Privileged spec v1.9.1 Name
 def : SysReg<"mcountinhibit", 0x320>;
+
 def : SysReg<"mhpmevent3", 0x323>;
 def : SysReg<"mhpmevent4", 0x324>;
 def : SysReg<"mhpmevent5", 0x325>;
diff --git a/llvm/test/MC/RISCV/machine-csr-names.s b/llvm/test/MC/RISCV/machine-csr-names.s
index 93ecd7e173ffd5..dbc4f5fcb82440 100644
--- a/llvm/test/MC/RISCV/machine-csr-names.s
+++ b/llvm/test/MC/RISCV/machine-csr-names.s
@@ -863,6 +863,20 @@ csrrs t1, mcountinhibit, zero
 # uimm12
 csrrs t2, 0x320, zero
 
+# mucounteren
+# name
+# CHECK-INST: csrrs t1, mcountinhibit, zero
+# CHECK-ENC:  encoding: [0x73,0x23,0x00,0x32]
+# CHECK-INST-ALIAS: csrr t1, mcountinhibit
+# uimm12
+# CHECK-INST: csrrs t2, mcountinhibit, zero
+# CHECK-ENC:  encoding: [0xf3,0x23,0x00,0x32]
+# CHECK-INST-ALIAS: csrr t2, mcountinhibit
+# name
+csrrs t1, mucounteren, zero
+# uimm12
+csrrs t2, 0x320, zero
+
 # mhpmevent3
 # name
 # CHECK-INST: csrrs t1, mhpmevent3, zero

From bc902191d3c002c13436f2c9a299826704861a80 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Thu, 13 Aug 2020 13:24:39 +0300
Subject: [PATCH 11/23] [llvm-readobj] - Remove unwrapOrError calls from
 GNUStyle<ELFT>::printRelocations.

This fixes existent FIXMEs: we should not error out when unable to
find the number of relocations.

Differential revision: https://reviews.llvm.org/D85891
---
 .../tools/llvm-readobj/ELF/packed-relocs.test | 16 ++++---
 .../tools/llvm-readobj/ELF/relr-relocs.test   | 15 ++++---
 llvm/tools/llvm-readobj/ELFDumper.cpp         | 45 +++++++++++++------
 3 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs.test b/llvm/test/tools/llvm-readobj/ELF/packed-relocs.test
index bdadeddbbeac94..43ba16856a78f9 100644
--- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs.test
+++ b/llvm/test/tools/llvm-readobj/ELF/packed-relocs.test
@@ -49,7 +49,7 @@ Symbols:
 
 # RUN: yaml2obj --docnum=1 -DSHOFFSET=0xffffffff %s -o %t1.broken
 # RUN: llvm-readobj --relocations %t1.broken 2>&1 | FileCheck -DFILE=%t1.broken --check-prefix=BROKEN-RELA-LLVM %s
-# RUN: not llvm-readelf --relocations %t1.broken 2>&1 | FileCheck -DFILE=%t1.broken --check-prefix=BROKEN-RELA-GNU %s
+# RUN: llvm-readelf --relocations %t1.broken 2>&1 | FileCheck -DFILE=%t1.broken --check-prefix=BROKEN-RELA-GNU %s
 
 # BROKEN-RELA-LLVM:      Relocations [
 # BROKEN-RELA-LLVM-NEXT:   Section (1) .rela.dyn {
@@ -57,8 +57,10 @@ Symbols:
 # BROKEN-RELA-LLVM-NEXT:   }
 # BROKEN-RELA-LLVM-NEXT: ]
 
-## FIXME: GNU still reports an error before trying to dump relocations.
-# BROKEN-RELA-GNU: error: '[[FILE]]': section [index 1] has a sh_offset (0xffffffff) + sh_size (0x3a) that is greater than the file size (0x238)
+# BROKEN-RELA-GNU:      warning: '[[FILE]]': unable to get the number of relocations in SHT_ANDROID_RELA section with index 1: section [index 1] has a sh_offset (0xffffffff) + sh_size (0x3a) that is greater than the file size (0x238)
+# BROKEN-RELA-GNU:      Relocation section '.rela.dyn' at offset 0xffffffff contains <?> entries:
+# BROKEN-RELA-GNU-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+# BROKEN-RELA-GNU-NEXT: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_RELA section with index 1: section [index 1] has a sh_offset (0xffffffff) + sh_size (0x3a) that is greater than the file size (0x238)
 
 ## Check we report a warning when the sh_link field of the SHT_ANDROID_RELA section is broken.
 
@@ -130,7 +132,7 @@ Symbols:
 
 # RUN: yaml2obj --docnum=2 -DSHOFFSET=0xffffffff %s -o %t2.broken
 # RUN: llvm-readobj --relocations %t2.broken 2>&1 | FileCheck -DFILE=%t2.broken --check-prefix=BROKEN-REL-LLVM %s
-# RUN: not llvm-readelf --relocations %t2.broken 2>&1 | FileCheck -DFILE=%t2.broken --check-prefix=BROKEN-REL-GNU %s
+# RUN: llvm-readelf --relocations %t2.broken 2>&1 | FileCheck -DFILE=%t2.broken --check-prefix=BROKEN-REL-GNU %s
 
 # BROKEN-REL-LLVM:      Relocations [
 # BROKEN-REL-LLVM-NEXT:   Section (1) .rel.dyn {
@@ -138,8 +140,10 @@ Symbols:
 # BROKEN-REL-LLVM-NEXT:   }
 # BROKEN-REL-LLVM-NEXT: ]
 
-## FIXME: GNU still reports an error before trying to dump relocations.
-# BROKEN-REL-GNU: error: '[[FILE]]': section [index 1] has a sh_offset (0xffffffff) + sh_size (0x12) that cannot be represented
+# BROKEN-REL-GNU:      warning: '[[FILE]]': unable to get the number of relocations in SHT_ANDROID_REL section with index 1: section [index 1] has a sh_offset (0xffffffff) + sh_size (0x12) that cannot be represented
+# BROKEN-REL-GNU:      Relocation section '.rel.dyn' at offset 0xffffffff contains <?> entries:
+# BROKEN-REL-GNU-NEXT:  Offset     Info    Type                Sym. Value  Symbol's Name
+# BROKEN-REL-GNU-NEXT: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 1: section [index 1] has a sh_offset (0xffffffff) + sh_size (0x12) that cannot be represented
 
 ## Check we report a warning when the sh_link field of the SHT_ANDROID_REL section is broken.
 
diff --git a/llvm/test/tools/llvm-readobj/ELF/relr-relocs.test b/llvm/test/tools/llvm-readobj/ELF/relr-relocs.test
index 16bb47ff03c26d..22a56c19368449 100644
--- a/llvm/test/tools/llvm-readobj/ELF/relr-relocs.test
+++ b/llvm/test/tools/llvm-readobj/ELF/relr-relocs.test
@@ -160,8 +160,8 @@ Sections:
 # RUN: yaml2obj --docnum=2 -DENTSIZE=1 %s -o %t2.broken
 # RUN: llvm-readobj --relocations %t2.broken 2>&1 | \
 # RUN:   FileCheck -DFILE=%t2.broken --check-prefix=BROKEN-LLVM %s -DSECNAME=SHT_RELR
-# RUN: not llvm-readelf --relocations %t2.broken 2>&1 | \
-# RUN:   FileCheck -DFILE=%t2.broken --check-prefix=BROKEN-GNU %s
+# RUN: llvm-readelf --relocations %t2.broken 2>&1 | \
+# RUN:   FileCheck -DFILE=%t2.broken --check-prefix=BROKEN-GNU %s -DSECNAME=SHT_RELR
 
 # BROKEN-LLVM:      Relocations [
 # BROKEN-LLVM-NEXT:   Section (1) .relr.dyn {
@@ -169,17 +169,18 @@ Sections:
 # BROKEN-LLVM-NEXT:   }
 # BROKEN-LLVM-NEXT: ]
 
-## FIXME: GNU still reports an error before trying to dump relocations.
-# BROKEN-GNU: error: '[[FILE]]': section [index 1] has an invalid sh_entsize: 1
+# BROKEN-GNU:      warning: '[[FILE]]': unable to get the number of relocations in [[SECNAME]] section with index 1: section [index 1] has an invalid sh_entsize: 1
+# BROKEN-GNU:      Relocation section '.relr.dyn' at offset 0x34 contains <?> entries:
+# BROKEN-GNU-NEXT:  Offset     Info    Type                Sym. Value  Symbol's Name
+# BROKEN-GNU-NEXT: warning: '[[FILE]]': unable to read relocations from [[SECNAME]] section with index 1: section [index 1] has an invalid sh_entsize: 1
 
 ## Case B: check the case when relocations can't be read from an SHT_ANDROID_RELR section.
 ##         SHT_ANDROID_RELR = 0x6fffff00.
 # RUN: yaml2obj --docnum=2 -DENTSIZE=1 -DSHTYPE=0x6fffff00 %s -o %t2.broken.android
 # RUN: llvm-readobj --relocations %t2.broken.android 2>&1 | \
 # RUN:   FileCheck -DFILE=%t2.broken.android --check-prefix=BROKEN-LLVM %s -DSECNAME=SHT_ANDROID_RELR
-
-# RUN: not llvm-readelf --relocations %t2.broken.android 2>&1 | \
-# RUN:   FileCheck -DFILE=%t2.broken.android --check-prefix=BROKEN-GNU %s
+# RUN: llvm-readelf --relocations %t2.broken.android 2>&1 | \
+# RUN:   FileCheck -DFILE=%t2.broken.android --check-prefix=BROKEN-GNU %s -DSECNAME=SHT_ANDROID_RELR
 
 ## Check the behavior when the sh_link field of the SHT_RELR/SHT_ANDROID_RELR section
 ## is set to an arbitrary value. Normally, it is set to 0, because such sections contains
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 461d1894d1eccb..e171cdcb2f42fd 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -3749,30 +3749,47 @@ static bool isRelocationSec(const typename ELFT::Shdr &Sec) {
 }
 
 template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
+  auto GetEntriesNum = [&](const Elf_Shdr &Sec) -> Expected<size_t> {
+    // Android's packed relocation section needs to be unpacked first
+    // to get the actual number of entries.
+    if (Sec.sh_type == ELF::SHT_ANDROID_REL ||
+        Sec.sh_type == ELF::SHT_ANDROID_RELA) {
+      Expected<std::vector<typename ELFT::Rela>> RelasOrErr =
+          Obj->android_relas(&Sec);
+      if (!RelasOrErr)
+        return RelasOrErr.takeError();
+      return RelasOrErr->size();
+    }
+
+    if (!opts::RawRelr && (Sec.sh_type == ELF::SHT_RELR ||
+                           Sec.sh_type == ELF::SHT_ANDROID_RELR)) {
+      Expected<Elf_Relr_Range> RelrsOrErr = Obj->relrs(&Sec);
+      if (!RelrsOrErr)
+        return RelrsOrErr.takeError();
+      return Obj->decode_relrs(*RelrsOrErr).size();
+    }
+
+    return Sec.getEntityCount();
+  };
+
   bool HasRelocSections = false;
   for (const Elf_Shdr &Sec : cantFail(Obj->sections())) {
     if (!isRelocationSec<ELFT>(Sec))
       continue;
     HasRelocSections = true;
 
-    unsigned Entries;
-    // Android's packed relocation section needs to be unpacked first
-    // to get the actual number of entries.
-    if (Sec.sh_type == ELF::SHT_ANDROID_REL ||
-        Sec.sh_type == ELF::SHT_ANDROID_RELA) {
-      Entries = unwrapOrError(this->FileName, Obj->android_relas(&Sec)).size();
-    } else if (!opts::RawRelr && (Sec.sh_type == ELF::SHT_RELR ||
-                                  Sec.sh_type == ELF::SHT_ANDROID_RELR)) {
-      Elf_Relr_Range Relrs = unwrapOrError(this->FileName, Obj->relrs(&Sec));
-      Entries = Obj->decode_relrs(Relrs).size();
-    } else {
-      Entries = Sec.getEntityCount();
-    }
+    std::string EntriesNum = "<?>";
+    if (Expected<size_t> NumOrErr = GetEntriesNum(Sec))
+      EntriesNum = std::to_string(*NumOrErr);
+    else
+      this->reportUniqueWarning(createError(
+          "unable to get the number of relocations in " + describe(Obj, Sec) +
+          ": " + toString(NumOrErr.takeError())));
 
     uintX_t Offset = Sec.sh_offset;
     StringRef Name = this->getPrintableSectionName(Obj, Sec);
     OS << "\nRelocation section '" << Name << "' at offset 0x"
-       << to_hexString(Offset, false) << " contains " << Entries
+       << to_hexString(Offset, false) << " contains " << EntriesNum
        << " entries:\n";
     printRelocHeader(Sec.sh_type);
     this->printRelocationsHelper(Obj, Sec);

From cfb773c676236652f63f9ba031d6755d55f5d884 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Mon, 17 Aug 2020 13:35:12 +0200
Subject: [PATCH 12/23] [lldb][NFC] Use StringRef in
 CreateFunctionDeclaration/GetDeclarationName

CreateFunctionDeclaration should just take a StringRef. GetDeclarationName is
(only) used by CreateFunctionDeclaration so that's why now also takes a
StringRef.
---
 .../Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp |  4 ++--
 .../Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp   |  3 +--
 lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp  |  2 +-
 .../Plugins/TypeSystem/Clang/TypeSystemClang.cpp     |  7 ++-----
 .../Plugins/TypeSystem/Clang/TypeSystemClang.h       | 12 ++++++------
 5 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 4b23ead1fe9e5a..486945ccbb8bb2 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -1215,7 +1215,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die,
       }
 
       if (!function_decl) {
-        const char *name = attrs.name.GetCString();
+        llvm::StringRef name = attrs.name.GetStringRef();
 
         // We currently generate function templates with template parameters in
         // their name. In order to get closer to the AST that clang generates
@@ -1239,7 +1239,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die,
           template_function_decl = m_ast.CreateFunctionDeclaration(
               ignore_containing_context ? m_ast.GetTranslationUnitDecl()
                                         : containing_decl_ctx,
-              GetOwningClangModule(die), attrs.name.GetCString(), clang_type,
+              GetOwningClangModule(die), attrs.name.GetStringRef(), clang_type,
               attrs.storage, attrs.is_inline);
           clang::FunctionTemplateDecl *func_template_decl =
               m_ast.CreateFunctionTemplateDecl(
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
index 0acc77d7c67ff5..21f8b13bf07f76 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
@@ -1015,8 +1015,7 @@ PdbAstBuilder::GetOrCreateFunctionDecl(PdbCompilandSymId func_id) {
   proc_name.consume_front("::");
 
   clang::FunctionDecl *function_decl = m_clang.CreateFunctionDeclaration(
-      parent, OptionalClangModuleID(), proc_name.str().c_str(), func_ct,
-      storage, false);
+      parent, OptionalClangModuleID(), proc_name, func_ct, storage, false);
 
   lldbassert(m_uid_to_decl.count(toOpaqueUid(func_id)) == 0);
   m_uid_to_decl[toOpaqueUid(func_id)] = function_decl;
diff --git a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp
index d87926a6588f61..7649e8a90f9ab6 100644
--- a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp
+++ b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp
@@ -928,7 +928,7 @@ PDBASTParser::GetDeclForSymbol(const llvm::pdb::PDBSymbol &symbol) {
                                     : clang::StorageClass::SC_None;
 
     auto decl = m_ast.CreateFunctionDeclaration(
-        decl_context, OptionalClangModuleID(), name.c_str(),
+        decl_context, OptionalClangModuleID(), name,
         type->GetForwardCompilerType(), storage, func->hasInlineAttribute());
 
     std::vector<clang::ParmVarDecl *> params;
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index 2ace212b6662c1..608cdc25d0721e 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -1965,11 +1965,8 @@ TypeSystemClang::GetOpaqueCompilerType(clang::ASTContext *ast,
 #pragma mark Function Types
 
 clang::DeclarationName
-TypeSystemClang::GetDeclarationName(const char *name,
+TypeSystemClang::GetDeclarationName(llvm::StringRef name,
                                     const CompilerType &function_clang_type) {
-  if (!name || !name[0])
-    return clang::DeclarationName();
-
   clang::OverloadedOperatorKind op_kind = clang::NUM_OVERLOADED_OPERATORS;
   if (!IsOperator(name, op_kind) || op_kind == clang::NUM_OVERLOADED_OPERATORS)
     return DeclarationName(&getASTContext().Idents.get(
@@ -1996,7 +1993,7 @@ TypeSystemClang::GetDeclarationName(const char *name,
 
 FunctionDecl *TypeSystemClang::CreateFunctionDeclaration(
     clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
-    const char *name, const CompilerType &function_clang_type,
+    llvm::StringRef name, const CompilerType &function_clang_type,
     clang::StorageClass storage, bool is_inline) {
   FunctionDecl *func_decl = nullptr;
   ASTContext &ast = getASTContext();
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
index 4ae12716112785..74fd9de4357f6f 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
@@ -408,11 +408,10 @@ class TypeSystemClang : public TypeSystem {
 
   // Function Types
 
-  clang::FunctionDecl *
-  CreateFunctionDeclaration(clang::DeclContext *decl_ctx,
-                            OptionalClangModuleID owning_module,
-                            const char *name, const CompilerType &function_Type,
-                            clang::StorageClass storage, bool is_inline);
+  clang::FunctionDecl *CreateFunctionDeclaration(
+      clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
+      llvm::StringRef name, const CompilerType &function_Type,
+      clang::StorageClass storage, bool is_inline);
 
   CompilerType CreateFunctionType(const CompilerType &result_type,
                                   const CompilerType *args, unsigned num_args,
@@ -1053,7 +1052,8 @@ class TypeSystemClang : public TypeSystem {
   }
 
   clang::DeclarationName
-  GetDeclarationName(const char *name, const CompilerType &function_clang_type);
+  GetDeclarationName(llvm::StringRef name,
+                     const CompilerType &function_clang_type);
 
   clang::LangOptions *GetLangOpts() const {
     return m_language_options_up.get();

From 687e7d34253b283945bdf9892aa58fd167f9913d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lu=C3=ADs=20Marques?= <luismarques@lowrisc.org>
Date: Mon, 17 Aug 2020 13:43:53 +0100
Subject: [PATCH 13/23] [NFC] Tweak a comment about the lock-free builtins

---
 clang/lib/AST/ExprConstant.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 448a683c9088b7..760e5621e0ef7d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11519,8 +11519,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
       return false;
 
     // For __atomic_is_lock_free(sizeof(_Atomic(T))), if the size is a power
-    // of two less than the maximum inline atomic width, we know it is
-    // lock-free.  If the size isn't a power of two, or greater than the
+    // of two less than or equal to the maximum inline atomic width, we know it
+    // is lock-free.  If the size isn't a power of two, or greater than the
     // maximum alignment where we promote atomics, we know it is not lock-free
     // (at least not in the sense of atomic_is_lock_free).  Otherwise,
     // the answer can only be determined at runtime; for example, 16-byte

From dad04e62f13bf64868c9b842f51dfc5fffbe95c0 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker@arm.com>
Date: Mon, 17 Aug 2020 13:53:55 +0100
Subject: [PATCH 14/23] [NFC] run update test script

On Transforms/LoopUnroll/runtime-small-upperbound.ll
---
 .../LoopUnroll/runtime-small-upperbound.ll    | 154 +++++++++++++++---
 1 file changed, 133 insertions(+), 21 deletions(-)

diff --git a/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll b/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
index 95632a5a3bee75..a16d567a36ce00 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -loop-unroll -unroll-runtime %s -o - | FileCheck %s
 ; RUN: opt -S -loop-unroll -unroll-runtime -unroll-max-upperbound=6 %s -o - | FileCheck %s --check-prefix=UPPER
 
@@ -7,17 +8,49 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 @global.1 = dso_local local_unnamed_addr global i8* null, align 4
 
 ; Check that loop in hoge_3, with a runtime upperbound of 3, is not unrolled.
-; CHECK-LABEL: hoge_3
-; CHECK: loop:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1 %{{.*}}, label %loop
-; UPPER-LABEL: hoge_3
-; UPPER: loop:
-; UPPER: store
-; UPPER-NOT: store
-; UPPER: br i1 %{{.*}}, label %loop
 define dso_local void @hoge_3(i8 %arg) {
+; CHECK-LABEL: @hoge_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* @global, align 4
+; CHECK-NEXT:    [[Y:%.*]] = load i8*, i8** @global.1, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X]], 17
+; CHECK-NEXT:    br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[PTR:%.*]] = phi i8* [ [[PTR_NEXT:%.*]], [[LOOP]] ], [ [[Y]], [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw i32 [[IV]], 8
+; CHECK-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i8, i8* [[PTR]], i32 1
+; CHECK-NEXT:    store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+; UPPER-LABEL: @hoge_3(
+; UPPER-NEXT:  entry:
+; UPPER-NEXT:    [[X:%.*]] = load i32, i32* @global, align 4
+; UPPER-NEXT:    [[Y:%.*]] = load i8*, i8** @global.1, align 4
+; UPPER-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X]], 17
+; UPPER-NEXT:    br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; UPPER:       loop.preheader:
+; UPPER-NEXT:    br label [[LOOP:%.*]]
+; UPPER:       loop:
+; UPPER-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ]
+; UPPER-NEXT:    [[PTR:%.*]] = phi i8* [ [[PTR_NEXT:%.*]], [[LOOP]] ], [ [[Y]], [[LOOP_PREHEADER]] ]
+; UPPER-NEXT:    [[IV_NEXT]] = add nuw i32 [[IV]], 8
+; UPPER-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i8, i8* [[PTR]], i32 1
+; UPPER-NEXT:    store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1
+; UPPER-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17
+; UPPER-NEXT:    br i1 [[TMP1]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; UPPER:       exit.loopexit:
+; UPPER-NEXT:    br label [[EXIT]]
+; UPPER:       exit:
+; UPPER-NEXT:    ret void
+;
 entry:
   %x = load i32, i32* @global, align 4
   %y = load i8*, i8** @global.1, align 4
@@ -38,18 +71,97 @@ exit:
 }
 
 ; Check that loop in hoge_5, with a runtime upperbound of 5, is unrolled when -unroll-max-upperbound=4
-; CHECK-LABEL: hoge_5
-; CHECK: loop:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1 %{{.*}}, label %loop
-; UPPER-LABEL: hoge_5
-; UPPER: loop:
-; UPPER: store
-; UPPER: store
-; UPPER: store
-; UPPER: br i1 %{{.*}}, label %loop
 define dso_local void @hoge_5(i8 %arg) {
+; CHECK-LABEL: @hoge_5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* @global, align 4
+; CHECK-NEXT:    [[Y:%.*]] = load i8*, i8** @global.1, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X]], 17
+; CHECK-NEXT:    br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[PTR:%.*]] = phi i8* [ [[PTR_NEXT:%.*]], [[LOOP]] ], [ [[Y]], [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw i32 [[IV]], 4
+; CHECK-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i8, i8* [[PTR]], i32 1
+; CHECK-NEXT:    store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+; UPPER-LABEL: @hoge_5(
+; UPPER-NEXT:  entry:
+; UPPER-NEXT:    [[X:%.*]] = load i32, i32* @global, align 4
+; UPPER-NEXT:    [[Y:%.*]] = load i8*, i8** @global.1, align 4
+; UPPER-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X]], 17
+; UPPER-NEXT:    br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; UPPER:       loop.preheader:
+; UPPER-NEXT:    [[TMP1:%.*]] = sub i32 16, [[X]]
+; UPPER-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
+; UPPER-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
+; UPPER-NEXT:    [[TMP4:%.*]] = urem i32 [[TMP2]], 6
+; UPPER-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], 1
+; UPPER-NEXT:    [[XTRAITER:%.*]] = urem i32 [[TMP5]], 6
+; UPPER-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; UPPER-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_PROL_PREHEADER:%.*]], label [[LOOP_PROL_LOOPEXIT:%.*]]
+; UPPER:       loop.prol.preheader:
+; UPPER-NEXT:    br label [[LOOP_PROL:%.*]]
+; UPPER:       loop.prol:
+; UPPER-NEXT:    [[IV_PROL:%.*]] = phi i32 [ [[IV_NEXT_PROL:%.*]], [[LOOP_PROL]] ], [ [[X]], [[LOOP_PROL_PREHEADER]] ]
+; UPPER-NEXT:    [[PTR_PROL:%.*]] = phi i8* [ [[PTR_NEXT_PROL:%.*]], [[LOOP_PROL]] ], [ [[Y]], [[LOOP_PROL_PREHEADER]] ]
+; UPPER-NEXT:    [[PROL_ITER:%.*]] = phi i32 [ [[XTRAITER]], [[LOOP_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LOOP_PROL]] ]
+; UPPER-NEXT:    [[IV_NEXT_PROL]] = add nuw i32 [[IV_PROL]], 4
+; UPPER-NEXT:    [[PTR_NEXT_PROL]] = getelementptr inbounds i8, i8* [[PTR_PROL]], i32 1
+; UPPER-NEXT:    store i8 [[ARG:%.*]], i8* [[PTR_NEXT_PROL]], align 1
+; UPPER-NEXT:    [[TMP6:%.*]] = icmp ult i32 [[IV_NEXT_PROL]], 17
+; UPPER-NEXT:    [[PROL_ITER_SUB]] = sub i32 [[PROL_ITER]], 1
+; UPPER-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0
+; UPPER-NEXT:    br i1 [[PROL_ITER_CMP]], label [[LOOP_PROL]], label [[LOOP_PROL_LOOPEXIT_UNR_LCSSA:%.*]], [[LOOP0:!llvm.loop !.*]]
+; UPPER:       loop.prol.loopexit.unr-lcssa:
+; UPPER-NEXT:    [[IV_UNR_PH:%.*]] = phi i32 [ [[IV_NEXT_PROL]], [[LOOP_PROL]] ]
+; UPPER-NEXT:    [[PTR_UNR_PH:%.*]] = phi i8* [ [[PTR_NEXT_PROL]], [[LOOP_PROL]] ]
+; UPPER-NEXT:    br label [[LOOP_PROL_LOOPEXIT]]
+; UPPER:       loop.prol.loopexit:
+; UPPER-NEXT:    [[IV_UNR:%.*]] = phi i32 [ [[X]], [[LOOP_PREHEADER]] ], [ [[IV_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; UPPER-NEXT:    [[PTR_UNR:%.*]] = phi i8* [ [[Y]], [[LOOP_PREHEADER]] ], [ [[PTR_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; UPPER-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP2]], 5
+; UPPER-NEXT:    br i1 [[TMP7]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_PREHEADER_NEW:%.*]]
+; UPPER:       loop.preheader.new:
+; UPPER-NEXT:    br label [[LOOP:%.*]]
+; UPPER:       loop:
+; UPPER-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_UNR]], [[LOOP_PREHEADER_NEW]] ], [ [[IV_NEXT_5:%.*]], [[LOOP]] ]
+; UPPER-NEXT:    [[PTR:%.*]] = phi i8* [ [[PTR_UNR]], [[LOOP_PREHEADER_NEW]] ], [ [[PTR_NEXT_5:%.*]], [[LOOP]] ]
+; UPPER-NEXT:    [[IV_NEXT:%.*]] = add nuw i32 [[IV]], 4
+; UPPER-NEXT:    [[PTR_NEXT:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i32 1
+; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT]], align 1
+; UPPER-NEXT:    [[IV_NEXT_1:%.*]] = add nuw i32 [[IV_NEXT]], 4
+; UPPER-NEXT:    [[PTR_NEXT_1:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT]], i32 1
+; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT_1]], align 1
+; UPPER-NEXT:    [[IV_NEXT_2:%.*]] = add nuw i32 [[IV_NEXT_1]], 4
+; UPPER-NEXT:    [[PTR_NEXT_2:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_1]], i32 1
+; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT_2]], align 1
+; UPPER-NEXT:    [[IV_NEXT_3:%.*]] = add nuw i32 [[IV_NEXT_2]], 4
+; UPPER-NEXT:    [[PTR_NEXT_3:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_2]], i32 1
+; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT_3]], align 1
+; UPPER-NEXT:    [[IV_NEXT_4:%.*]] = add nuw i32 [[IV_NEXT_3]], 4
+; UPPER-NEXT:    [[PTR_NEXT_4:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_3]], i32 1
+; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT_4]], align 1
+; UPPER-NEXT:    [[IV_NEXT_5]] = add nuw i32 [[IV_NEXT_4]], 4
+; UPPER-NEXT:    [[PTR_NEXT_5]] = getelementptr inbounds i8, i8* [[PTR_NEXT_4]], i32 1
+; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT_5]], align 1
+; UPPER-NEXT:    [[TMP8:%.*]] = icmp ult i32 [[IV_NEXT_5]], 17
+; UPPER-NEXT:    br i1 [[TMP8]], label [[LOOP]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]]
+; UPPER:       exit.loopexit.unr-lcssa:
+; UPPER-NEXT:    br label [[EXIT_LOOPEXIT]]
+; UPPER:       exit.loopexit:
+; UPPER-NEXT:    br label [[EXIT]]
+; UPPER:       exit:
+; UPPER-NEXT:    ret void
+;
 entry:
   %x = load i32, i32* @global, align 4
   %y = load i8*, i8** @global.1, align 4

From c2ae7934c809d739f7677f4422755c58e4ab8ea3 Mon Sep 17 00:00:00 2001
From: Kai Nacke <kai.nacke@de.ibm.com>
Date: Thu, 2 Jul 2020 15:15:14 +0200
Subject: [PATCH 15/23] [SystemZ/ZOS]__(de)register_frame are not available on
 z/OS.

The functions `__register_frame`/`__deregister_frame` are not
available on z/OS, so add a guard to not use them.

Reviewed By: lhames, abhina.sreeskantharajan

Differential Revision: https://reviews.llvm.org/D84787
---
 llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp          | 4 ++--
 llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
index f1114e92c36062..54ba9ac39ea6a3 100644
--- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
@@ -631,8 +631,8 @@ Expected<Symbol &> EHFrameEdgeFixer::getOrCreateSymbol(ParseContext &PC,
 
 // Determine whether we can register EH tables.
 #if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) &&      \
-     !(defined(_AIX) && defined(__ibmxl__)) && !defined(__SEH__) &&            \
-     !defined(__USING_SJLJ_EXCEPTIONS__))
+     !(defined(_AIX) && defined(__ibmxl__)) && !defined(__MVS__) &&            \
+     !defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__))
 #define HAVE_EHTABLE_SUPPORT 1
 #else
 #define HAVE_EHTABLE_SUPPORT 0
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index 46604ff4000c11..6aa8f1cc6861da 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -33,8 +33,8 @@ RTDyldMemoryManager::~RTDyldMemoryManager() {}
 
 // Determine whether we can register EH tables.
 #if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) &&      \
-     !(defined(_AIX) && defined(__ibmxl__)) && !defined(__SEH__) &&            \
-     !defined(__USING_SJLJ_EXCEPTIONS__))
+     !(defined(_AIX) && defined(__ibmxl__)) && !defined(__MVS__) &&            \
+     !defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__))
 #define HAVE_EHTABLE_SUPPORT 1
 #else
 #define HAVE_EHTABLE_SUPPORT 0

From f5f22f04481bec765ccaf6e400da24987a421c2e Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Mon, 17 Aug 2020 15:06:46 +0200
Subject: [PATCH 16/23] [lldb] Skip TestSimulatorPlatform with sanitized builds

The test executable crashes when ran on a simulator. Skipping until this is
fixed.

rdar://67238668
---
 .../API/macosx/simulator/TestSimulatorPlatform.py     | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py b/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py
index cd3c999b5356dc..41924fc62a1073 100644
--- a/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py
+++ b/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py
@@ -52,6 +52,7 @@ def run_with(self, arch, os, vers, env, expected_load_command):
         self.expect('image list -b -t', patterns=['a\.out '+triple_re])
         self.check_debugserver(log, os+env, vers)
 
+    @skipIfAsan
     @skipUnlessDarwin
     @skipIfDarwinEmbedded
     @apple_simulator_test('iphone')
@@ -62,6 +63,7 @@ def test_ios(self):
                       os='ios', vers='', env='simulator',
                       expected_load_command='LC_BUILD_VERSION')
 
+    @skipIfAsan
     @skipUnlessDarwin
     @skipIfDarwinEmbedded
     @apple_simulator_test('appletv')
@@ -72,6 +74,7 @@ def test_tvos(self):
                       os='tvos', vers='', env='simulator',
                       expected_load_command='LC_BUILD_VERSION')
 
+    @skipIfAsan
     @skipUnlessDarwin
     @skipIfDarwinEmbedded
     @apple_simulator_test('watch')
@@ -84,6 +87,7 @@ def test_watchos_i386(self):
                       os='watchos', vers='', env='simulator',
                       expected_load_command='LC_BUILD_VERSION')
 
+    @skipIfAsan
     @skipUnlessDarwin
     @skipIfDarwinEmbedded
     @apple_simulator_test('watch')
@@ -114,6 +118,7 @@ def test_lc_version_min_macosx(self):
         self.run_with(arch=self.getArchitecture(),
                       os='macosx', vers='10.9', env='',
                       expected_load_command='LC_VERSION_MIN_MACOSX')
+    @skipIfAsan
     @skipUnlessDarwin
     @skipIfDarwinEmbedded
     @apple_simulator_test('iphone')
@@ -126,6 +131,7 @@ def test_lc_version_min_iphoneos(self):
                       os='ios', vers='11.0', env='simulator',
                       expected_load_command='LC_VERSION_MIN_IPHONEOS')
 
+    @skipIfAsan
     @skipUnlessDarwin
     @skipIfDarwinEmbedded
     @apple_simulator_test('iphone')
@@ -138,6 +144,7 @@ def test_ios_backdeploy_x86(self):
                       os='ios', vers='13.0', env='simulator',
                       expected_load_command='LC_BUILD_VERSION')
 
+    @skipIfAsan
     @skipUnlessDarwin
     @skipIfDarwinEmbedded
     @apple_simulator_test('iphone')
@@ -149,6 +156,7 @@ def test_ios_backdeploy_apple_silicon(self):
                       os='ios', vers='11.0', env='simulator',
                       expected_load_command='LC_BUILD_VERSION')
 
+    @skipIfAsan
     @skipUnlessDarwin
     @skipIfDarwinEmbedded
     @apple_simulator_test('appletv')
@@ -161,6 +169,7 @@ def test_lc_version_min_tvos(self):
                       os='tvos', vers='11.0', env='simulator',
                       expected_load_command='LC_VERSION_MIN_TVOS')
 
+    @skipIfAsan
     @skipUnlessDarwin
     @skipIfDarwinEmbedded
     @apple_simulator_test('appletv')
@@ -172,6 +181,7 @@ def test_tvos_backdeploy_apple_silicon(self):
                       os='tvos', vers='11.0', env='simulator',
                       expected_load_command='LC_BUILD_VERSION')
 
+    @skipIfAsan
     @skipUnlessDarwin
     @skipIfDarwinEmbedded
     @apple_simulator_test('watch')
@@ -185,6 +195,7 @@ def test_lc_version_min_watchos(self):
                       os='watchos', vers='4.0', env='simulator',
                       expected_load_command='LC_VERSION_MIN_WATCHOS')
 
+    @skipIfAsan
     @skipUnlessDarwin
     @skipIfDarwinEmbedded
     @apple_simulator_test('watch')

From 874aef875d0cd04b33f25bb71b534cbb0d6220ae Mon Sep 17 00:00:00 2001
From: Alex Zinenko <zinenko@google.com>
Date: Mon, 17 Aug 2020 13:34:07 +0200
Subject: [PATCH 17/23] [llvm] support graceful failure of DataLayout parsing

Existing implementation always aborts on syntax errors in a DataLayout
description. While this is meaningful for consuming textual IR modules, it is
inconvenient for users that may need fine-grained control over the layout from,
e.g., command-line options. Propagate errors through the parsing functions and
only abort in the top-level parsing function instead.

Reviewed By: mehdi_amini

Differential Revision: https://reviews.llvm.org/D85650
---
 llvm/include/llvm/IR/DataLayout.h |  24 ++-
 llvm/lib/IR/DataLayout.cpp        | 235 +++++++++++++++++++-----------
 2 files changed, 166 insertions(+), 93 deletions(-)

diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 17297bb8b30935..579275ab1f8222 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -174,19 +174,25 @@ class DataLayout {
   /// well-defined bitwise representation.
   SmallVector<unsigned, 8> NonIntegralAddressSpaces;
 
-  void setAlignment(AlignTypeEnum align_type, Align abi_align, Align pref_align,
-                    uint32_t bit_width);
+  /// Attempts to set the alignment of the given type. Returns an error
+  /// description on failure.
+  Error setAlignment(AlignTypeEnum align_type, Align abi_align,
+                     Align pref_align, uint32_t bit_width);
+
   Align getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
                          bool ABIAlign, Type *Ty) const;
-  void setPointerAlignment(uint32_t AddrSpace, Align ABIAlign, Align PrefAlign,
-                           uint32_t TypeByteWidth, uint32_t IndexWidth);
+
+  /// Attempts to set the alignment of a pointer in the given address space.
+  /// Returns an error description on failure.
+  Error setPointerAlignment(uint32_t AddrSpace, Align ABIAlign, Align PrefAlign,
+                            uint32_t TypeByteWidth, uint32_t IndexWidth);
 
   /// Internal helper method that returns requested alignment for type.
   Align getAlignment(Type *Ty, bool abi_or_pref) const;
 
-  /// Parses a target data specification string. Assert if the string is
-  /// malformed.
-  void parseSpecifier(StringRef LayoutDescription);
+  /// Attempts to parse a target data specification string and reports an error
+  /// if the string is malformed.
+  Error parseSpecifier(StringRef Desc);
 
   // Free all internal data structures.
   void clear();
@@ -229,6 +235,10 @@ class DataLayout {
   /// Parse a data layout string (with fallback to default values).
   void reset(StringRef LayoutDescription);
 
+  /// Parse a data layout string and return the layout. Return an error
+  /// description on failure.
+  static Expected<DataLayout> parse(StringRef LayoutDescription);
+
   /// Layout endianness...
   bool isLittleEndian() const { return !BigEndian; }
   bool isBigEndian() const { return BigEndian; }
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index c44737c5bfc248..31b227d4a68207 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TypeSize.h"
@@ -188,57 +189,80 @@ void DataLayout::reset(StringRef Desc) {
 
   // Default alignments
   for (const LayoutAlignElem &E : DefaultAlignments) {
-    setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign, E.PrefAlign,
-                 E.TypeBitWidth);
+    if (Error Err = setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign,
+                                 E.PrefAlign, E.TypeBitWidth))
+      return report_fatal_error(std::move(Err));
   }
-  setPointerAlignment(0, Align(8), Align(8), 8, 8);
+  if (Error Err = setPointerAlignment(0, Align(8), Align(8), 8, 8))
+    return report_fatal_error(std::move(Err));
 
-  parseSpecifier(Desc);
+  if (Error Err = parseSpecifier(Desc))
+    return report_fatal_error(std::move(Err));
+}
+
+Expected<DataLayout> DataLayout::parse(StringRef LayoutDescription) {
+  DataLayout Layout("");
+  if (Error Err = Layout.parseSpecifier(LayoutDescription))
+    return std::move(Err);
+  return Layout;
+}
+
+static Error reportError(const Twine &Message) {
+  return createStringError(inconvertibleErrorCode(), Message);
 }
 
 /// Checked version of split, to ensure mandatory subparts.
-static std::pair<StringRef, StringRef> split(StringRef Str, char Separator) {
+static Error split(StringRef Str, char Separator,
+                   std::pair<StringRef, StringRef> &Split) {
   assert(!Str.empty() && "parse error, string can't be empty here");
-  std::pair<StringRef, StringRef> Split = Str.split(Separator);
+  Split = Str.split(Separator);
   if (Split.second.empty() && Split.first != Str)
-    report_fatal_error("Trailing separator in datalayout string");
+    return reportError("Trailing separator in datalayout string");
   if (!Split.second.empty() && Split.first.empty())
-    report_fatal_error("Expected token before separator in datalayout string");
-  return Split;
+    return reportError("Expected token before separator in datalayout string");
+  return Error::success();
 }
 
 /// Get an unsigned integer, including error checks.
-static unsigned getInt(StringRef R) {
-  unsigned Result;
+template <typename IntTy> static Error getInt(StringRef R, IntTy &Result) {
   bool error = R.getAsInteger(10, Result); (void)error;
   if (error)
-    report_fatal_error("not a number, or does not fit in an unsigned int");
-  return Result;
+    return reportError("not a number, or does not fit in an unsigned int");
+  return Error::success();
 }
 
-/// Convert bits into bytes. Assert if not a byte width multiple.
-static unsigned inBytes(unsigned Bits) {
-  if (Bits % 8)
-    report_fatal_error("number of bits must be a byte width multiple");
-  return Bits / 8;
+/// Get an unsigned integer representing the number of bits and convert it into
+/// bytes. Error out of not a byte width multiple.
+template <typename IntTy>
+static Error getIntInBytes(StringRef R, IntTy &Result) {
+  if (Error Err = getInt<IntTy>(R, Result))
+    return Err;
+  if (Result % 8)
+    return reportError("number of bits must be a byte width multiple");
+  Result /= 8;
+  return Error::success();
 }
 
-static unsigned getAddrSpace(StringRef R) {
-  unsigned AddrSpace = getInt(R);
+static Error getAddrSpace(StringRef R, unsigned &AddrSpace) {
+  if (Error Err = getInt(R, AddrSpace))
+    return Err;
   if (!isUInt<24>(AddrSpace))
-    report_fatal_error("Invalid address space, must be a 24-bit integer");
-  return AddrSpace;
+    return reportError("Invalid address space, must be a 24-bit integer");
+  return Error::success();
 }
 
-void DataLayout::parseSpecifier(StringRef Desc) {
+Error DataLayout::parseSpecifier(StringRef Desc) {
   StringRepresentation = std::string(Desc);
   while (!Desc.empty()) {
     // Split at '-'.
-    std::pair<StringRef, StringRef> Split = split(Desc, '-');
+    std::pair<StringRef, StringRef> Split;
+    if (Error Err = split(Desc, '-', Split))
+      return Err;
     Desc = Split.second;
 
     // Split at ':'.
-    Split = split(Split.first, ':');
+    if (Error Err = split(Split.first, ':', Split))
+      return Err;
 
     // Aliases used below.
     StringRef &Tok  = Split.first;  // Current token.
@@ -246,11 +270,14 @@ void DataLayout::parseSpecifier(StringRef Desc) {
 
     if (Tok == "ni") {
       do {
-        Split = split(Rest, ':');
+        if (Error Err = split(Rest, ':', Split))
+          return Err;
         Rest = Split.second;
-        unsigned AS = getInt(Split.first);
+        unsigned AS;
+        if (Error Err = getInt(Split.first, AS))
+          return Err;
         if (AS == 0)
-          report_fatal_error("Address space 0 can never be non-integral");
+          return reportError("Address space 0 can never be non-integral");
         NonIntegralAddressSpaces.push_back(AS);
       } while (!Rest.empty());
 
@@ -273,28 +300,36 @@ void DataLayout::parseSpecifier(StringRef Desc) {
       break;
     case 'p': {
       // Address space.
-      unsigned AddrSpace = Tok.empty() ? 0 : getInt(Tok);
+      unsigned AddrSpace = 0;
+      if (!Tok.empty())
+        if (Error Err = getInt(Tok, AddrSpace))
+          return Err;
       if (!isUInt<24>(AddrSpace))
-        report_fatal_error("Invalid address space, must be a 24bit integer");
+        return reportError("Invalid address space, must be a 24bit integer");
 
       // Size.
       if (Rest.empty())
-        report_fatal_error(
+        return reportError(
             "Missing size specification for pointer in datalayout string");
-      Split = split(Rest, ':');
-      unsigned PointerMemSize = inBytes(getInt(Tok));
+      if (Error Err = split(Rest, ':', Split))
+        return Err;
+      unsigned PointerMemSize;
+      if (Error Err = getIntInBytes(Tok, PointerMemSize))
+        return Err;
       if (!PointerMemSize)
-        report_fatal_error("Invalid pointer size of 0 bytes");
+        return reportError("Invalid pointer size of 0 bytes");
 
       // ABI alignment.
       if (Rest.empty())
-        report_fatal_error(
+        return reportError(
             "Missing alignment specification for pointer in datalayout string");
-      Split = split(Rest, ':');
-      unsigned PointerABIAlign = inBytes(getInt(Tok));
+      if (Error Err = split(Rest, ':', Split))
+        return Err;
+      unsigned PointerABIAlign;
+      if (Error Err = getIntInBytes(Tok, PointerABIAlign))
+        return Err;
       if (!isPowerOf2_64(PointerABIAlign))
-        report_fatal_error(
-            "Pointer ABI alignment must be a power of 2");
+        return reportError("Pointer ABI alignment must be a power of 2");
 
       // Size of index used in GEP for address calculation.
       // The parameter is optional. By default it is equal to size of pointer.
@@ -303,23 +338,28 @@ void DataLayout::parseSpecifier(StringRef Desc) {
       // Preferred alignment.
       unsigned PointerPrefAlign = PointerABIAlign;
       if (!Rest.empty()) {
-        Split = split(Rest, ':');
-        PointerPrefAlign = inBytes(getInt(Tok));
+        if (Error Err = split(Rest, ':', Split))
+          return Err;
+        if (Error Err = getIntInBytes(Tok, PointerPrefAlign))
+          return Err;
         if (!isPowerOf2_64(PointerPrefAlign))
-          report_fatal_error(
-            "Pointer preferred alignment must be a power of 2");
+          return reportError(
+              "Pointer preferred alignment must be a power of 2");
 
         // Now read the index. It is the second optional parameter here.
         if (!Rest.empty()) {
-          Split = split(Rest, ':');
-          IndexSize = inBytes(getInt(Tok));
+          if (Error Err = split(Rest, ':', Split))
+            return Err;
+          if (Error Err = getIntInBytes(Tok, IndexSize))
+            return Err;
           if (!IndexSize)
-            report_fatal_error("Invalid index size of 0 bytes");
+            return reportError("Invalid index size of 0 bytes");
         }
       }
-      setPointerAlignment(AddrSpace, assumeAligned(PointerABIAlign),
-                          assumeAligned(PointerPrefAlign), PointerMemSize,
-                          IndexSize);
+      if (Error Err = setPointerAlignment(
+              AddrSpace, assumeAligned(PointerABIAlign),
+              assumeAligned(PointerPrefAlign), PointerMemSize, IndexSize))
+        return Err;
       break;
     }
     case 'i':
@@ -336,61 +376,75 @@ void DataLayout::parseSpecifier(StringRef Desc) {
       }
 
       // Bit size.
-      unsigned Size = Tok.empty() ? 0 : getInt(Tok);
+      unsigned Size = 0;
+      if (!Tok.empty())
+        if (Error Err = getInt(Tok, Size))
+          return Err;
 
       if (AlignType == AGGREGATE_ALIGN && Size != 0)
-        report_fatal_error(
+        return reportError(
             "Sized aggregate specification in datalayout string");
 
       // ABI alignment.
       if (Rest.empty())
-        report_fatal_error(
+        return reportError(
             "Missing alignment specification in datalayout string");
-      Split = split(Rest, ':');
-      const unsigned ABIAlign = inBytes(getInt(Tok));
+      if (Error Err = split(Rest, ':', Split))
+        return Err;
+      unsigned ABIAlign;
+      if (Error Err = getIntInBytes(Tok, ABIAlign))
+        return Err;
       if (AlignType != AGGREGATE_ALIGN && !ABIAlign)
-        report_fatal_error(
+        return reportError(
             "ABI alignment specification must be >0 for non-aggregate types");
 
       if (!isUInt<16>(ABIAlign))
-        report_fatal_error("Invalid ABI alignment, must be a 16bit integer");
+        return reportError("Invalid ABI alignment, must be a 16bit integer");
       if (ABIAlign != 0 && !isPowerOf2_64(ABIAlign))
-        report_fatal_error("Invalid ABI alignment, must be a power of 2");
+        return reportError("Invalid ABI alignment, must be a power of 2");
 
       // Preferred alignment.
       unsigned PrefAlign = ABIAlign;
       if (!Rest.empty()) {
-        Split = split(Rest, ':');
-        PrefAlign = inBytes(getInt(Tok));
+        if (Error Err = split(Rest, ':', Split))
+          return Err;
+        if (Error Err = getIntInBytes(Tok, PrefAlign))
+          return Err;
       }
 
       if (!isUInt<16>(PrefAlign))
-        report_fatal_error(
+        return reportError(
             "Invalid preferred alignment, must be a 16bit integer");
       if (PrefAlign != 0 && !isPowerOf2_64(PrefAlign))
-        report_fatal_error("Invalid preferred alignment, must be a power of 2");
+        return reportError("Invalid preferred alignment, must be a power of 2");
 
-      setAlignment(AlignType, assumeAligned(ABIAlign), assumeAligned(PrefAlign),
-                   Size);
+      if (Error Err = setAlignment(AlignType, assumeAligned(ABIAlign),
+                                   assumeAligned(PrefAlign), Size))
+        return Err;
 
       break;
     }
     case 'n':  // Native integer types.
       while (true) {
-        unsigned Width = getInt(Tok);
+        unsigned Width;
+        if (Error Err = getInt(Tok, Width))
+          return Err;
         if (Width == 0)
-          report_fatal_error(
+          return reportError(
               "Zero width native integer type in datalayout string");
         LegalIntWidths.push_back(Width);
         if (Rest.empty())
           break;
-        Split = split(Rest, ':');
+        if (Error Err = split(Rest, ':', Split))
+          return Err;
       }
       break;
     case 'S': { // Stack natural alignment.
-      uint64_t Alignment = inBytes(getInt(Tok));
+      uint64_t Alignment;
+      if (Error Err = getIntInBytes(Tok, Alignment))
+        return Err;
       if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment))
-        report_fatal_error("Alignment is neither 0 nor a power of 2");
+        return reportError("Alignment is neither 0 nor a power of 2");
       StackNaturalAlign = MaybeAlign(Alignment);
       break;
     }
@@ -403,34 +457,39 @@ void DataLayout::parseSpecifier(StringRef Desc) {
         TheFunctionPtrAlignType = FunctionPtrAlignType::MultipleOfFunctionAlign;
         break;
       default:
-        report_fatal_error("Unknown function pointer alignment type in "
+        return reportError("Unknown function pointer alignment type in "
                            "datalayout string");
       }
       Tok = Tok.substr(1);
-      uint64_t Alignment = inBytes(getInt(Tok));
+      uint64_t Alignment;
+      if (Error Err = getIntInBytes(Tok, Alignment))
+        return Err;
       if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment))
-        report_fatal_error("Alignment is neither 0 nor a power of 2");
+        return reportError("Alignment is neither 0 nor a power of 2");
       FunctionPtrAlign = MaybeAlign(Alignment);
       break;
     }
     case 'P': { // Function address space.
-      ProgramAddrSpace = getAddrSpace(Tok);
+      if (Error Err = getAddrSpace(Tok, ProgramAddrSpace))
+        return Err;
       break;
     }
     case 'A': { // Default stack/alloca address space.
-      AllocaAddrSpace = getAddrSpace(Tok);
+      if (Error Err = getAddrSpace(Tok, AllocaAddrSpace))
+        return Err;
       break;
     }
     case 'm':
       if (!Tok.empty())
-        report_fatal_error("Unexpected trailing characters after mangling specifier in datalayout string");
+        return reportError("Unexpected trailing characters after mangling "
+                           "specifier in datalayout string");
       if (Rest.empty())
-        report_fatal_error("Expected mangling specifier in datalayout string");
+        return reportError("Expected mangling specifier in datalayout string");
       if (Rest.size() > 1)
-        report_fatal_error("Unknown mangling specifier in datalayout string");
+        return reportError("Unknown mangling specifier in datalayout string");
       switch(Rest[0]) {
       default:
-        report_fatal_error("Unknown mangling in datalayout string");
+        return reportError("Unknown mangling in datalayout string");
       case 'e':
         ManglingMode = MM_ELF;
         break;
@@ -452,10 +511,12 @@ void DataLayout::parseSpecifier(StringRef Desc) {
       }
       break;
     default:
-      report_fatal_error("Unknown specifier in datalayout string");
+      return reportError("Unknown specifier in datalayout string");
       break;
     }
   }
+
+  return Error::success();
 }
 
 DataLayout::DataLayout(const Module *M) {
@@ -487,17 +548,17 @@ DataLayout::findAlignmentLowerBound(AlignTypeEnum AlignType,
   });
 }
 
-void DataLayout::setAlignment(AlignTypeEnum align_type, Align abi_align,
-                              Align pref_align, uint32_t bit_width) {
+Error DataLayout::setAlignment(AlignTypeEnum align_type, Align abi_align,
+                               Align pref_align, uint32_t bit_width) {
   // AlignmentsTy::ABIAlign and AlignmentsTy::PrefAlign were once stored as
   // uint16_t, it is unclear if there are requirements for alignment to be less
   // than 2^16 other than storage. In the meantime we leave the restriction as
   // an assert. See D67400 for context.
   assert(Log2(abi_align) < 16 && Log2(pref_align) < 16 && "Alignment too big");
   if (!isUInt<24>(bit_width))
-    report_fatal_error("Invalid bit width, must be a 24bit integer");
+    return reportError("Invalid bit width, must be a 24bit integer");
   if (pref_align < abi_align)
-    report_fatal_error(
+    return reportError(
         "Preferred alignment cannot be less than the ABI alignment");
 
   AlignmentsTy::iterator I = findAlignmentLowerBound(align_type, bit_width);
@@ -511,6 +572,7 @@ void DataLayout::setAlignment(AlignTypeEnum align_type, Align abi_align,
     Alignments.insert(I, LayoutAlignElem::get(align_type, abi_align,
                                               pref_align, bit_width));
   }
+  return Error::success();
 }
 
 DataLayout::PointersTy::iterator
@@ -521,11 +583,11 @@ DataLayout::findPointerLowerBound(uint32_t AddressSpace) {
   });
 }
 
-void DataLayout::setPointerAlignment(uint32_t AddrSpace, Align ABIAlign,
-                                     Align PrefAlign, uint32_t TypeByteWidth,
-                                     uint32_t IndexWidth) {
+Error DataLayout::setPointerAlignment(uint32_t AddrSpace, Align ABIAlign,
+                                      Align PrefAlign, uint32_t TypeByteWidth,
+                                      uint32_t IndexWidth) {
   if (PrefAlign < ABIAlign)
-    report_fatal_error(
+    return reportError(
         "Preferred alignment cannot be less than the ABI alignment");
 
   PointersTy::iterator I = findPointerLowerBound(AddrSpace);
@@ -538,6 +600,7 @@ void DataLayout::setPointerAlignment(uint32_t AddrSpace, Align ABIAlign,
     I->TypeByteWidth = TypeByteWidth;
     I->IndexWidth = IndexWidth;
   }
+  return Error::success();
 }
 
 /// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or

From 168213f91c571352c56f432573513cba3f9ba61b Mon Sep 17 00:00:00 2001
From: Alex Zinenko <zinenko@google.com>
Date: Mon, 17 Aug 2020 13:35:27 +0200
Subject: [PATCH 18/23] [mlir] Move data layout from LLVMDialect to module Op
 attributes

Legacy implementation of the LLVM dialect in MLIR contained an instance of
llvm::Module as it was required to parse LLVM IR types. The access to the data
layout of this module was exposed to the users for convenience, but in practice
this layout has always been the default one obtained by parsing an empty layout
description string. Current implementation of the dialect no longer relies on
wrapping LLVM IR types, but it kept an instance of DataLayout for
compatibility. This effectively forces a single data layout to be used across
all modules in a given MLIR context, which is not desirable. Remove DataLayout
from the LLVM dialect and attach it as a module attribute instead. Since MLIR
does not yet have support for data layouts, use the LLVM DataLayout in string
form with verification inside MLIR. Introduce the layout when converting a
module to the LLVM dialect and keep the default "" description for
compatibility.

This approach should be replaced with a proper MLIR-based data layout when it
becomes available, but provides an immediate solution to compiling modules with
different layouts, e.g. for GPUs.

This removes the need for LLVMDialectImpl, which is also removed.

Depends On D85650

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D85652
---
 mlir/include/mlir/Conversion/Passes.td        |  4 ++
 .../StandardToLLVM/ConvertStandardToLLVM.h    |  3 ++
 .../ConvertStandardToLLVMPass.h               |  7 +++
 .../include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 15 +++---
 .../StandardToLLVM/StandardToLLVM.cpp         | 23 +++++---
 .../VectorToLLVM/ConvertVectorToLLVM.cpp      |  3 +-
 mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp    | 53 ++++++++++++-------
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  |  8 +--
 .../StandardToLLVM/convert-data-layout.mlir   |  6 +++
 mlir/test/Dialect/LLVMIR/invalid.mlir         |  7 +++
 mlir/test/Target/llvmir.mlir                  | 16 ++++++
 11 files changed, 105 insertions(+), 40 deletions(-)
 create mode 100644 mlir/test/Conversion/StandardToLLVM/convert-data-layout.mlir

diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 4d4fe064a6bc20..4ff23d71a5c0bf 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -277,6 +277,10 @@ def ConvertStandardToLLVM : Pass<"convert-std-to-llvm", "ModuleOp"> {
     Option<"indexBitwidth", "index-bitwidth", "unsigned",
            /*default=kDeriveIndexBitwidthFromDataLayout*/"0",
            "Bitwidth of the index type, 0 to use size of machine word">,
+    Option<"dataLayout", "data-layout", "std::string",
+           /*default=*/"\"\"",
+           "String description (LLVM format) of the data layout that is "
+           "expected on the produced module">
   ];
 }
 
diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
index 2f8f87fa0e41e4..63ffd783738250 100644
--- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
+++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
@@ -105,6 +105,9 @@ class LLVMTypeConverter : public TypeConverter {
   /// pointers to memref descriptors for arguments.
   LLVM::LLVMType convertFunctionTypeCWrapper(FunctionType type);
 
+  /// Returns the data layout to use during and after conversion.
+  const llvm::DataLayout &getDataLayout() { return options.dataLayout; }
+
   /// Gets the LLVM representation of the index type. The returned type is an
   /// integer type with the size configured for this type converter.
   LLVM::LLVMType getIndexType();
diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h
index 3d8312c6e7f51e..02fefc689bac31 100644
--- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h
+++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h
@@ -9,6 +9,8 @@
 #ifndef MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVMPASS_H_
 #define MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVMPASS_H_
 
+#include "llvm/IR/DataLayout.h"
+
 #include <memory>
 
 namespace mlir {
@@ -31,6 +33,11 @@ struct LowerToLLVMOptions {
   /// Use aligned_alloc for heap allocations.
   bool useAlignedAlloc = false;
 
+  /// The data layout of the module to produce. This must be consistent with the
+  /// data layout used in the upper levels of the lowering pipeline.
+  // TODO: this should be replaced by MLIR data layout when one exists.
+  llvm::DataLayout dataLayout = llvm::DataLayout("");
+
   /// Get a statically allocated copy of the default LowerToLLVMOptions.
   static const LowerToLLVMOptions &getDefaultOptions() {
     static LowerToLLVMOptions options;
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
index d21f5bc0b49b35..e824f97bc28544 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
@@ -20,16 +20,15 @@ def LLVM_Dialect : Dialect {
   let name = "llvm";
   let cppNamespace = "LLVM";
   let hasRegionArgAttrVerify = 1;
+  let hasOperationAttrVerify = 1;
   let extraClassDeclaration = [{
-    ~LLVMDialect();
-    const llvm::DataLayout &getDataLayout();
+    /// Name of the data layout attributes.
+    static StringRef getDataLayoutAttrName() { return "llvm.data_layout"; }
 
-  private:
-    friend LLVMType;
-
-    // This can't be a unique_ptr because the ctor is generated inline
-    // in the class definition at the moment.
-    detail::LLVMDialectImpl *impl;
+    /// Verifies if the given string is a well-formed data layout descriptor.
+    /// Uses `reportError` to report errors.
+    static LogicalResult verifyDataLayoutString(
+        StringRef descr, llvm::function_ref<void (const Twine &)> reportError);
   }];
 }
 
diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
index efe4a3c958d7b9..4a061963fce3aa 100644
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -129,8 +129,7 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx,
       options(options) {
   assert(llvmDialect && "LLVM IR dialect is not registered");
   if (options.indexBitwidth == kDeriveIndexBitwidthFromDataLayout)
-    this->options.indexBitwidth =
-        llvmDialect->getDataLayout().getPointerSizeInBits();
+    this->options.indexBitwidth = options.dataLayout.getPointerSizeInBits();
 
   // Register conversions for the standard types.
   addConversion([&](ComplexType type) { return convertComplexType(type); });
@@ -198,7 +197,7 @@ LLVM::LLVMType LLVMTypeConverter::getIndexType() {
 }
 
 unsigned LLVMTypeConverter::getPointerBitwidth(unsigned addressSpace) {
-  return llvmDialect->getDataLayout().getPointerSizeInBits(addressSpace);
+  return options.dataLayout.getPointerSizeInBits(addressSpace);
 }
 
 Type LLVMTypeConverter::convertIndexType(IndexType type) {
@@ -3427,11 +3426,13 @@ namespace {
 struct LLVMLoweringPass : public ConvertStandardToLLVMBase<LLVMLoweringPass> {
   LLVMLoweringPass() = default;
   LLVMLoweringPass(bool useBarePtrCallConv, bool emitCWrappers,
-                   unsigned indexBitwidth, bool useAlignedAlloc) {
+                   unsigned indexBitwidth, bool useAlignedAlloc,
+                   const llvm::DataLayout &dataLayout) {
     this->useBarePtrCallConv = useBarePtrCallConv;
     this->emitCWrappers = emitCWrappers;
     this->indexBitwidth = indexBitwidth;
     this->useAlignedAlloc = useAlignedAlloc;
+    this->dataLayout = dataLayout.getStringRepresentation();
   }
 
   /// Run the dialect converter on the module.
@@ -3443,11 +3444,19 @@ struct LLVMLoweringPass : public ConvertStandardToLLVMBase<LLVMLoweringPass> {
       signalPassFailure();
       return;
     }
+    if (failed(LLVM::LLVMDialect::verifyDataLayoutString(
+            this->dataLayout, [this](const Twine &message) {
+              getOperation().emitError() << message.str();
+            }))) {
+      signalPassFailure();
+      return;
+    }
 
     ModuleOp m = getOperation();
 
     LowerToLLVMOptions options = {useBarePtrCallConv, emitCWrappers,
-                                  indexBitwidth, useAlignedAlloc};
+                                  indexBitwidth, useAlignedAlloc,
+                                  llvm::DataLayout(this->dataLayout)};
     LLVMTypeConverter typeConverter(&getContext(), options);
 
     OwningRewritePatternList patterns;
@@ -3456,6 +3465,8 @@ struct LLVMLoweringPass : public ConvertStandardToLLVMBase<LLVMLoweringPass> {
     LLVMConversionTarget target(getContext());
     if (failed(applyPartialConversion(m, target, patterns)))
       signalPassFailure();
+    m.setAttr(LLVM::LLVMDialect::getDataLayoutAttrName(),
+              StringAttr::get(this->dataLayout, m.getContext()));
   }
 };
 } // end namespace
@@ -3471,5 +3482,5 @@ std::unique_ptr<OperationPass<ModuleOp>>
 mlir::createLowerToLLVMPass(const LowerToLLVMOptions &options) {
   return std::make_unique<LLVMLoweringPass>(
       options.useBarePtrCallConv, options.emitCWrappers, options.indexBitwidth,
-      options.useAlignedAlloc);
+      options.useAlignedAlloc, options.dataLayout);
 }
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index a68f28fa12894c..45f6f4d1cf31b7 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -128,11 +128,10 @@ LogicalResult getMemRefAlignment(LLVMTypeConverter &typeConverter, T op,
 
   // TODO: this should use the MLIR data layout when it becomes available and
   // stop depending on translation.
-  LLVM::LLVMDialect *dialect = typeConverter.getDialect();
   llvm::LLVMContext llvmContext;
   align = LLVM::TypeToLLVMIRTranslator(llvmContext)
               .getPreferredAlignment(elementTy.cast<LLVM::LLVMType>(),
-                                     dialect->getDataLayout());
+                                     typeConverter.getDataLayout());
   return success();
 }
 
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index f1299c065d3674..6c5f207b57a654 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -1668,23 +1668,7 @@ static LogicalResult verify(FenceOp &op) {
 // LLVMDialect initialization, type parsing, and registration.
 //===----------------------------------------------------------------------===//
 
-namespace mlir {
-namespace LLVM {
-namespace detail {
-struct LLVMDialectImpl {
-  LLVMDialectImpl() : layout("") {}
-
-  /// Default data layout to use.
-  // TODO: this should be moved to some Op equivalent to LLVM module and
-  // eventually replaced with a proper MLIR data layout.
-  llvm::DataLayout layout;
-};
-} // end namespace detail
-} // end namespace LLVM
-} // end namespace mlir
-
 void LLVMDialect::initialize() {
-  impl = new detail::LLVMDialectImpl();
   // clang-format off
   addTypes<LLVMVoidType,
            LLVMHalfType,
@@ -1715,13 +1699,9 @@ void LLVMDialect::initialize() {
   allowUnknownOperations();
 }
 
-LLVMDialect::~LLVMDialect() { delete impl; }
-
 #define GET_OP_CLASSES
 #include "mlir/Dialect/LLVMIR/LLVMOps.cpp.inc"
 
-const llvm::DataLayout &LLVMDialect::getDataLayout() { return impl->layout; }
-
 /// Parse a type registered to this dialect.
 Type LLVMDialect::parseType(DialectAsmParser &parser) const {
   return detail::parseType(parser);
@@ -1732,6 +1712,39 @@ void LLVMDialect::printType(Type type, DialectAsmPrinter &os) const {
   return detail::printType(type.cast<LLVMType>(), os);
 }
 
+LogicalResult LLVMDialect::verifyDataLayoutString(
+    StringRef descr, llvm::function_ref<void(const Twine &)> reportError) {
+  llvm::Expected<llvm::DataLayout> maybeDataLayout =
+      llvm::DataLayout::parse(descr);
+  if (maybeDataLayout)
+    return success();
+
+  std::string message;
+  llvm::raw_string_ostream messageStream(message);
+  llvm::logAllUnhandledErrors(maybeDataLayout.takeError(), messageStream);
+  reportError("invalid data layout descriptor: " + messageStream.str());
+  return failure();
+}
+
+/// Verify LLVM dialect attributes.
+LogicalResult LLVMDialect::verifyOperationAttribute(Operation *op,
+                                                    NamedAttribute attr) {
+  // If the data layout attribute is present, it must use the LLVM data layout
+  // syntax. Try parsing it and report errors in case of failure. Users of this
+  // attribute may assume it is well-formed and can pass it to the (asserting)
+  // llvm::DataLayout constructor.
+  if (attr.first.strref() != LLVM::LLVMDialect::getDataLayoutAttrName())
+    return success();
+  if (auto stringAttr = attr.second.dyn_cast<StringAttr>())
+    return verifyDataLayoutString(
+        stringAttr.getValue(),
+        [op](const Twine &message) { op->emitOpError() << message.str(); });
+
+  return op->emitOpError() << "expected '"
+                           << LLVM::LLVMDialect::getDataLayoutAttrName()
+                           << "' to be a string attribute";
+}
+
 /// Verify LLVMIR function argument attributes.
 LogicalResult LLVMDialect::verifyRegionArgAttribute(Operation *op,
                                                     unsigned regionIdx,
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 215c1910f74439..f8277d154f2765 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -944,11 +944,11 @@ ModuleTranslation::lookupValues(ValueRange values) {
 
 std::unique_ptr<llvm::Module> ModuleTranslation::prepareLLVMModule(
     Operation *m, llvm::LLVMContext &llvmContext, StringRef name) {
-  auto *dialect = m->getContext()->getRegisteredDialect<LLVM::LLVMDialect>();
-  assert(dialect && "LLVM dialect must be registered");
-
   auto llvmModule = std::make_unique<llvm::Module>(name, llvmContext);
-  llvmModule->setDataLayout(dialect->getDataLayout());
+
+  if (auto dataLayoutAttr =
+          m->getAttr(LLVM::LLVMDialect::getDataLayoutAttrName()))
+    llvmModule->setDataLayout(dataLayoutAttr.cast<StringAttr>().getValue());
 
   // Inject declarations for `malloc` and `free` functions that can be used in
   // memref allocation/deallocation coming from standard ops lowering.
diff --git a/mlir/test/Conversion/StandardToLLVM/convert-data-layout.mlir b/mlir/test/Conversion/StandardToLLVM/convert-data-layout.mlir
new file mode 100644
index 00000000000000..5086de2f5d05ce
--- /dev/null
+++ b/mlir/test/Conversion/StandardToLLVM/convert-data-layout.mlir
@@ -0,0 +1,6 @@
+// RUN: mlir-opt -convert-std-to-llvm %s | FileCheck %s
+// RUN-32: mlir-opt -convert-std-to-llvm='data-layout=p:32:32:32' %s | FileCheck %s
+
+// CHECK: module attributes {llvm.data_layout = ""}
+// CHECK-32: module attributes {llvm.data_layout ="p:32:32:32"}
+module {}
diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir
index b4475df66fd1dc..737fa4ff8bf199 100644
--- a/mlir/test/Dialect/LLVMIR/invalid.mlir
+++ b/mlir/test/Dialect/LLVMIR/invalid.mlir
@@ -603,3 +603,10 @@ func @invalid_ordering_in_fence() {
   // expected-error @+1 {{can be given only acquire, release, acq_rel, and seq_cst orderings}}
   llvm.fence syncscope("agent") monotonic
 }
+
+// -----
+
+// expected-error @+1 {{invalid data layout descriptor}}
+module attributes {llvm.data_layout = "#vjkr32"} {
+  func @invalid_data_layout()
+}
diff --git a/mlir/test/Target/llvmir.mlir b/mlir/test/Target/llvmir.mlir
index 5e57f1c7c69863..b1abae64a2adf9 100644
--- a/mlir/test/Target/llvmir.mlir
+++ b/mlir/test/Target/llvmir.mlir
@@ -1295,3 +1295,19 @@ llvm.func @nontemoral_store_and_load() {
 }
 
 // CHECK: ![[NODE]] = !{i32 1}
+
+// -----
+
+// Check that the translation does not crash in absence of a data layout.
+module {
+  // CHECK: declare void @module_default_layout
+  llvm.func @module_default_layout()
+}
+
+// -----
+
+// CHECK: target datalayout = "E"
+module attributes {llvm.data_layout = "E"} {
+  llvm.func @module_big_endian()
+}
+

From e0375dbcb39bdd5fa9f8288f9149df256003e06b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 14 Aug 2020 22:19:22 -0400
Subject: [PATCH 19/23] AMDGPU: Fix using wrong offsets for global atomic fadd
 intrinsics

Global instructions have the signed offsets.
---
 llvm/lib/Target/AMDGPU/FLATInstructions.td     |  9 +++++++--
 .../CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll  | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 7dd98461668087..dcb8d74a448792 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -783,6 +783,11 @@ class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt
   (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
 >;
 
+class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+  (node (FLATOffsetSigned i64:$vaddr, i16:$offset), vt:$data),
+  (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
+>;
+
 class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
                      ValueType data_vt = vt> : GCNPat <
   (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$data)),
@@ -971,8 +976,8 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64
 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
 
-def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32,    atomic_fadd_global_noret, f32>;
-def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_fadd_global_noret, v2f16>;
+def : FlatSignedAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32,    atomic_fadd_global_noret, f32>;
+def : FlatSignedAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_fadd_global_noret, v2f16>;
 
 } // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
index 693b09dd0c7b76..b46e01373aad07 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
@@ -54,6 +54,15 @@ main_body:
   ret void
 }
 
+; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4:
+; GCN: global_atomic_add_f32 v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:-4
+define amdgpu_kernel void @global_atomic_add_f32_offneg4(float addrspace(1)* %ptr, float %data) {
+main_body:
+  %p = getelementptr float, float addrspace(1)* %ptr, i64 -1
+  call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %p, float %data)
+  ret void
+}
+
 ; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16:
 ; GCN: global_atomic_pk_add_f16 v[{{[0-9:]+}}], v{{[0-9]+}}, off
 define amdgpu_kernel void @global_atomic_pk_add_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
@@ -71,6 +80,15 @@ main_body:
   ret void
 }
 
+; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4:
+; GCN: global_atomic_pk_add_f16 v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:-4
+define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
+main_body:
+  %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -1
+  call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)
+  ret void
+}
+
 ; Make sure this artificially selects with an incorrect subtarget, but
 ; the feature set.
 ; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget:

From 66ffa0e91f1e602d11c5aad29b000b275d8be89a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sat, 15 Aug 2020 15:14:11 -0400
Subject: [PATCH 20/23] AMDGPU/GlobalISel: Fix using post-legal combiner
 without LegalizerInfo

---
 llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 8a48ea5bd30c57..c4fb1ad54910a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -258,7 +258,7 @@ class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
 bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
                                               MachineInstr &MI,
                                               MachineIRBuilder &B) const {
-  CombinerHelper Helper(Observer, B, KB, MDT);
+  CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
   AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg);
 
   if (Generated.tryCombineAll(Observer, MI, B, Helper))

From 924f31bc3c2c765f51eedda14534623053ffd75a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sat, 15 Aug 2020 10:26:21 -0400
Subject: [PATCH 21/23] GlobalISel: Remove unnecessary check for copy type

COPY isn't allowed to change the type, but can mix no type with type.
---
 llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 5 ++---
 llvm/lib/CodeGen/GlobalISel/Utils.cpp        | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index c0ca4a841b3aaf..23dc05c4e15707 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -151,9 +151,8 @@ const ConstantFP* getConstantFPVRegVal(Register VReg,
 MachineInstr *getOpcodeDef(unsigned Opcode, Register Reg,
                            const MachineRegisterInfo &MRI);
 
-/// Find the def instruction for \p Reg, folding away any trivial copies. Note
-/// it may still return a COPY, if it changes the type. May return nullptr if \p
-/// Reg is not a generic virtual register.
+/// Find the def instruction for \p Reg, folding away any trivial copies. May
+/// return nullptr if \p Reg is not a generic virtual register.
 MachineInstr *getDefIgnoringCopies(Register Reg,
                                    const MachineRegisterInfo &MRI);
 
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index cf5ff54e492d96..d598bf8b5a5350 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -365,7 +365,7 @@ getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
   while (DefMI->getOpcode() == TargetOpcode::COPY) {
     Register SrcReg = DefMI->getOperand(1).getReg();
     auto SrcTy = MRI.getType(SrcReg);
-    if (!SrcTy.isValid() || SrcTy != DstTy)
+    if (!SrcTy.isValid())
       break;
     DefMI = MRI.getVRegDef(SrcReg);
     DefSrcReg = SrcReg;

From 3b338e53e956d239efe2e8319d863838e5fdb857 Mon Sep 17 00:00:00 2001
From: David Truby <david.truby@arm.com>
Date: Fri, 14 Aug 2020 13:22:00 +0100
Subject: [PATCH 22/23] [flang] Add preprocessor test for defines passed on the
 command line

This adds a test for D85862 to ensure that preprocessor definitions
passed on command lines don't regress in future.

Reviewed By: tskeith

Differential Revision: https://reviews.llvm.org/D85967
---
 flang/test/Preprocessing/defines.F90 | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 flang/test/Preprocessing/defines.F90

diff --git a/flang/test/Preprocessing/defines.F90 b/flang/test/Preprocessing/defines.F90
new file mode 100644
index 00000000000000..449c54cbb01fbd
--- /dev/null
+++ b/flang/test/Preprocessing/defines.F90
@@ -0,0 +1,8 @@
+! RUN: %f18 -E -DFOO=1 -DBAR=2 %s | FileCheck %s
+
+! CHECK: integer :: a = 1
+  integer :: a = FOO
+! CHECK: integer :: b = 2
+  integer :: b = BAR
+
+end program

From 9c4825ce282d30ea108e6371c15cb692060ff4f3 Mon Sep 17 00:00:00 2001
From: Alex Zinenko <zinenko@google.com>
Date: Thu, 13 Aug 2020 14:46:31 +0200
Subject: [PATCH 23/23] [mlir] do not use llvm.cmpxchg with floats

According to the LLVM Language Reference, 'cmpxchg' accepts integer or pointer
types. Several MLIR tests were using it with floats as it appears possible to
programmatically construct and print such an instruction, but it cannot be
parsed back. Use integers instead.

Depends On D85899

Reviewed By: flaub, rriddle

Differential Revision: https://reviews.llvm.org/D85900
---
 .../StandardToLLVM/convert-to-llvmir.mlir     | 32 +++++++++----------
 mlir/test/Dialect/LLVMIR/invalid.mlir         |  4 +--
 mlir/test/Dialect/LLVMIR/roundtrip.mlir       |  6 ++--
 mlir/test/Target/llvmir.mlir                  | 14 ++++----
 4 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir
index e7935bc165f9b2..419ee17d8f0629 100644
--- a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir
@@ -1216,27 +1216,27 @@ func @atomic_rmw(%I : memref<10xi32>, %ival : i32, %F : memref<10xf32>, %fval :
 // -----
 
 // CHECK-LABEL: func @generic_atomic_rmw
-func @generic_atomic_rmw(%I : memref<10xf32>, %i : index) -> f32 {
-  %x = generic_atomic_rmw %I[%i] : memref<10xf32> {
-    ^bb0(%old_value : f32):
-      %c1 = constant 1.0 : f32
-      atomic_yield %c1 : f32
+func @generic_atomic_rmw(%I : memref<10xi32>, %i : index) -> i32 {
+  %x = generic_atomic_rmw %I[%i] : memref<10xi32> {
+    ^bb0(%old_value : i32):
+      %c1 = constant 1 : i32
+      atomic_yield %c1 : i32
   }
-  // CHECK: [[init:%.*]] = llvm.load %{{.*}} : !llvm.ptr<float>
-  // CHECK-NEXT: llvm.br ^bb1([[init]] : !llvm.float)
-  // CHECK-NEXT: ^bb1([[loaded:%.*]]: !llvm.float):
-  // CHECK-NEXT: [[c1:%.*]] = llvm.mlir.constant(1.000000e+00 : f32)
+  // CHECK: [[init:%.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
+  // CHECK-NEXT: llvm.br ^bb1([[init]] : !llvm.i32)
+  // CHECK-NEXT: ^bb1([[loaded:%.*]]: !llvm.i32):
+  // CHECK-NEXT: [[c1:%.*]] = llvm.mlir.constant(1 : i32)
   // CHECK-NEXT: [[pair:%.*]] = llvm.cmpxchg %{{.*}}, [[loaded]], [[c1]]
-  // CHECK-SAME:                    acq_rel monotonic : !llvm.float
+  // CHECK-SAME:                    acq_rel monotonic : !llvm.i32
   // CHECK-NEXT: [[new:%.*]] = llvm.extractvalue [[pair]][0]
   // CHECK-NEXT: [[ok:%.*]] = llvm.extractvalue [[pair]][1]
-  // CHECK-NEXT: llvm.cond_br [[ok]], ^bb2, ^bb1([[new]] : !llvm.float)
+  // CHECK-NEXT: llvm.cond_br [[ok]], ^bb2, ^bb1([[new]] : !llvm.i32)
   // CHECK-NEXT: ^bb2:
-  %c2 = constant 2.0 : f32
-  %add = addf %c2, %x : f32
-  return %add : f32
-  // CHECK-NEXT: [[c2:%.*]] = llvm.mlir.constant(2.000000e+00 : f32)
-  // CHECK-NEXT: [[add:%.*]] = llvm.fadd [[c2]], [[new]] : !llvm.float
+  %c2 = constant 2 : i32
+  %add = addi %c2, %x : i32
+  return %add : i32
+  // CHECK-NEXT: [[c2:%.*]] = llvm.mlir.constant(2 : i32)
+  // CHECK-NEXT: [[add:%.*]] = llvm.add [[c2]], [[new]] : !llvm.i32
   // CHECK-NEXT: llvm.return [[add]]
 }
 
diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir
index 737fa4ff8bf199..4260d8a053e11b 100644
--- a/mlir/test/Dialect/LLVMIR/invalid.mlir
+++ b/mlir/test/Dialect/LLVMIR/invalid.mlir
@@ -463,9 +463,9 @@ func @cmpxchg_expected_ptr(%f32_ptr : !llvm.ptr<float>, %f32 : !llvm.float) {
 // -----
 
 // CHECK-LABEL: @cmpxchg_mismatched_operands
-func @cmpxchg_mismatched_operands(%f32_ptr : !llvm.ptr<float>, %i32 : !llvm.i32) {
+func @cmpxchg_mismatched_operands(%i64_ptr : !llvm.ptr<i64>, %i32 : !llvm.i32) {
   // expected-error@+1 {{expected LLVM IR element type for operand #0 to match type for all other operands}}
-  %0 = "llvm.cmpxchg"(%f32_ptr, %i32, %i32) {success_ordering=2,failure_ordering=2} : (!llvm.ptr<float>, !llvm.i32, !llvm.i32) -> !llvm.struct<(i32, i1)>
+  %0 = "llvm.cmpxchg"(%i64_ptr, %i32, %i32) {success_ordering=2,failure_ordering=2} : (!llvm.ptr<i64>, !llvm.i32, !llvm.i32) -> !llvm.struct<(i32, i1)>
   llvm.return
 }
 
diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
index ef89d76387d774..6d5e07602316e0 100644
--- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir
+++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
@@ -256,9 +256,9 @@ func @atomicrmw(%ptr : !llvm.ptr<float>, %val : !llvm.float) {
 }
 
 // CHECK-LABEL: @cmpxchg
-func @cmpxchg(%ptr : !llvm.ptr<float>, %cmp : !llvm.float, %new : !llvm.float) {
-  // CHECK: llvm.cmpxchg %{{.*}}, %{{.*}}, %{{.*}} acq_rel monotonic : !llvm.float
-  %0 = llvm.cmpxchg %ptr, %cmp, %new acq_rel monotonic : !llvm.float
+func @cmpxchg(%ptr : !llvm.ptr<i32>, %cmp : !llvm.i32, %new : !llvm.i32) {
+  // CHECK: llvm.cmpxchg %{{.*}}, %{{.*}}, %{{.*}} acq_rel monotonic : !llvm.i32
+  %0 = llvm.cmpxchg %ptr, %cmp, %new acq_rel monotonic : !llvm.i32
   llvm.return
 }
 
diff --git a/mlir/test/Target/llvmir.mlir b/mlir/test/Target/llvmir.mlir
index b1abae64a2adf9..24add42289ae7b 100644
--- a/mlir/test/Target/llvmir.mlir
+++ b/mlir/test/Target/llvmir.mlir
@@ -1139,13 +1139,13 @@ llvm.func @atomicrmw(
 }
 
 // CHECK-LABEL: @cmpxchg
-llvm.func @cmpxchg(%ptr : !llvm.ptr<float>, %cmp : !llvm.float, %val: !llvm.float) {
-  // CHECK: cmpxchg float* %{{.*}}, float %{{.*}}, float %{{.*}} acq_rel monotonic
-  %0 = llvm.cmpxchg %ptr, %cmp, %val acq_rel monotonic : !llvm.float
-  // CHECK: %{{[0-9]+}} = extractvalue { float, i1 } %{{[0-9]+}}, 0
-  %1 = llvm.extractvalue %0[0] : !llvm.struct<(float, i1)>
-  // CHECK: %{{[0-9]+}} = extractvalue { float, i1 } %{{[0-9]+}}, 1
-  %2 = llvm.extractvalue %0[1] : !llvm.struct<(float, i1)>
+llvm.func @cmpxchg(%ptr : !llvm.ptr<i32>, %cmp : !llvm.i32, %val: !llvm.i32) {
+  // CHECK: cmpxchg i32* %{{.*}}, i32 %{{.*}}, i32 %{{.*}} acq_rel monotonic
+  %0 = llvm.cmpxchg %ptr, %cmp, %val acq_rel monotonic : !llvm.i32
+  // CHECK: %{{[0-9]+}} = extractvalue { i32, i1 } %{{[0-9]+}}, 0
+  %1 = llvm.extractvalue %0[0] : !llvm.struct<(i32, i1)>
+  // CHECK: %{{[0-9]+}} = extractvalue { i32, i1 } %{{[0-9]+}}, 1
+  %2 = llvm.extractvalue %0[1] : !llvm.struct<(i32, i1)>
   llvm.return
 }