Skip to content

Commit

Permalink
Merged master:8825fec37e73 into amd-gfx:f2aa04fae003
Browse files Browse the repository at this point in the history
Local branch amd-gfx f2aa04f Merged master:e3de249a4c94 into amd-gfx:2bc9ee2e9a05
Remote branch master 8825fec [AArch64] Add CPU Cortex-R82
  • Loading branch information
Sw authored and Sw committed Oct 2, 2020
2 parents f2aa04f + 8825fec commit ae984c5
Show file tree
Hide file tree
Showing 23 changed files with 729 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class PopulateSwitch : public Tweak {
Expected<Effect> apply(const Selection &Sel) override;
std::string title() const override { return "Populate switch"; }
llvm::StringLiteral kind() const override {
return CodeAction::REFACTOR_KIND;
return CodeAction::QUICKFIX_KIND;
}

private:
Expand Down
3 changes: 2 additions & 1 deletion clang-tools-extra/clangd/test/document-link.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: clangd -lit-test < %s | FileCheck -strict-whitespace %s
# %resource_dir actually points at builtin_include_dir, go up one directory.
# RUN: clangd -lit-test -resource-dir=%resource_dir/.. < %s | FileCheck -strict-whitespace %s
{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{},"trace":"off"}}
---
{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"#include <stdint.h>\n#include <stddef.h>"}}}
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
ArchKind = llvm::AArch64::ArchKind::ARMV8_5A;
if (Feature == "+v8.6a")
ArchKind = llvm::AArch64::ArchKind::ARMV8_6A;
if (Feature == "+v8r")
ArchKind = llvm::AArch64::ArchKind::ARMV8R;
if (Feature == "+fullfp16")
HasFullFP16 = true;
if (Feature == "+dotprod")
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChains/Arch/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,8 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
NoCrypto = true;
}

if (std::find(ItBegin, ItEnd, "+v8.4a") != ItEnd) {
if (std::find(ItBegin, ItEnd, "+v8.4a") != ItEnd ||
std::find(ItBegin, ItEnd, "+v8r") != ItEnd) {
if (HasCrypto && !NoCrypto) {
// Check if we have NOT disabled an algorithm with something like:
// +crypto, -algorithm
Expand Down
3 changes: 3 additions & 0 deletions clang/test/Driver/aarch64-cpus.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@
// RUN: %clang -target aarch64 -mcpu=cortex-a78 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXA78 %s
// CORTEXA78: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78"

// RUN: %clang -target aarch64 -mcpu=cortex-r82 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXR82 %s
// CORTEXR82: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-r82"

// RUN: %clang -target aarch64_be -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s
// RUN: %clang -target aarch64 -mbig-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s
// RUN: %clang -target aarch64_be -mbig-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s
Expand Down
1 change: 1 addition & 0 deletions clang/test/Driver/aarch64-dotprod.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@
// RUN: %clang -### -target aarch64 -mcpu=cortex-a75 %s 2>&1 | FileCheck %s
// RUN: %clang -### -target aarch64 -mcpu=cortex-a76 %s 2>&1 | FileCheck %s
// RUN: %clang -### -target aarch64 -mcpu=cortex-a55 %s 2>&1 | FileCheck %s
// RUN: %clang -### -target aarch64 -mcpu=cortex-r82 %s 2>&1 | FileCheck %s
// CHECK: "+dotprod"
2 changes: 2 additions & 0 deletions clang/test/Preprocessor/aarch64-target-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@
// RUN: %clang -target aarch64 -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A57 %s
// RUN: %clang -target aarch64 -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A72 %s
// RUN: %clang -target aarch64 -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-CORTEX-A73 %s
// RUN: %clang -target aarch64 -mcpu=cortex-r82 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-CORTEX-R82 %s
// RUN: %clang -target aarch64 -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-M1 %s
// RUN: %clang -target aarch64 -mcpu=exynos-m4 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-M4 %s
// RUN: %clang -target aarch64 -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-M4 %s
Expand All @@ -237,6 +238,7 @@
// CHECK-MCPU-A57: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
// CHECK-MCPU-A72: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
// CHECK-MCPU-CORTEX-A73: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
// CHECK-MCPU-CORTEX-R82: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+v8r" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+dotprod" "-target-feature" "+fp16fml" "-target-feature" "+ras" "-target-feature" "+rdm" "-target-feature" "+rcpc" "-target-feature" "+fullfp16" "-target-feature" "+sm4" "-target-feature" "+sha3" "-target-feature" "+sha2" "-target-feature" "+aes"
// CHECK-MCPU-M1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
// CHECK-MCPU-M4: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+dotprod" "-target-feature" "+fullfp16"
// CHECK-MCPU-KRYO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
Expand Down
10 changes: 10 additions & 0 deletions llvm/include/llvm/Support/AArch64TargetParser.def
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ AARCH64_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a",
AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 |
AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM))
AARCH64_ARCH("armv8-r", ARMV8R, "8-R", "v8r",
ARMBuildAttrs::CPUArch::v8_R, FK_CRYPTO_NEON_FP_ARMV8,
(AArch64::AEK_CRC | AArch64::AEK_RDM | AArch64::AEK_SSBS |
AArch64::AEK_CRYPTO | AArch64::AEK_SM4 | AArch64::AEK_SHA3 |
AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_DOTPROD |
AArch64::AEK_FP | AArch64::AEK_SIMD | AArch64::AEK_FP16 |
AArch64::AEK_FP16FML | AArch64::AEK_RAS | AArch64::AEK_RCPC |
AArch64::AEK_SB))
#undef AARCH64_ARCH

#ifndef AARCH64_ARCH_EXT_NAME
Expand Down Expand Up @@ -130,6 +138,8 @@ AARCH64_CPU_NAME("cortex-a77", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
AARCH64_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_NONE))
AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Support/AArch64TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ bool AArch64::getArchFeatures(AArch64::ArchKind AK,
Features.push_back("+v8.5a");
if (AK == AArch64::ArchKind::ARMV8_6A)
Features.push_back("+v8.6a");
if(AK == AArch64::ArchKind::ARMV8R)
Features.push_back("+v8r");

return AK != ArchKind::INVALID;
}
Expand Down
33 changes: 30 additions & 3 deletions llvm/lib/Target/AArch64/AArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,11 @@ def FeatureLOR : SubtargetFeature<
"lor", "HasLOR", "true",
"Enables ARM v8.1 Limited Ordering Regions extension">;

def FeatureVH : SubtargetFeature<
"vh", "HasVH", "true",
"Enables ARM v8.1 Virtual Host extension">;
def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2",
"true", "Enable RW operand CONTEXTIDR_EL2" >;

def FeatureVH : SubtargetFeature<"vh", "HasVH", "true",
"Enables ARM v8.1 Virtual Host extension", [FeatureCONTEXTIDREL2] >;

def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable ARMv8 PMUv3 Performance Monitors extension">;
Expand Down Expand Up @@ -441,6 +443,22 @@ def HasV8_6aOps : SubtargetFeature<
[HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps,
FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>;

def HasV8_0rOps : SubtargetFeature<
"v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
[//v8.1
FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2,
//v8.2
FeaturePerfMon, FeatureRAS, FeaturePsUAO, FeatureSM4,
FeatureSHA3, FeatureCCPP, FeatureFullFP16, FeaturePAN_RWV,
//v8.3
FeatureComplxNum, FeatureCCIDX, FeatureJS,
FeaturePA, FeatureRCPC,
//v8.4
FeatureDotProd, FeatureFP16FML, FeatureRASv8_4, FeatureTRACEV8_4,
FeatureTLB_RMI, FeatureFMI, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO,
//v8.5
FeatureSSBS, FeaturePredRes, FeatureSB, FeatureSpecRestrict]>;

//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -506,6 +524,7 @@ def PAUnsupported : AArch64Unsupported {
}

include "AArch64SchedA53.td"
include "AArch64SchedA55.td"
include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
include "AArch64SchedFalkor.td"
Expand Down Expand Up @@ -652,6 +671,13 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
FeatureSSBS,
FeatureDotProd]>;

def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
"CortexR82",
"Cortex-R82 ARM Processors", [
// All features are implied by v8_0r ops:
HasV8_0rOps,
]>;

def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
"Cortex-X1 ARM processors", [
HasV8_2aOps,
Expand Down Expand Up @@ -1013,6 +1039,7 @@ def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
def : ProcessorModel<"cortex-a77", CortexA57Model, [ProcA77]>;
def : ProcessorModel<"cortex-a78", CortexA57Model, [ProcA78]>;
def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>;
def : ProcessorModel<"cortex-x1", CortexA57Model, [ProcX1]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>;
def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ void AArch64Subtarget::initializeProperties() {
case CortexA76:
case CortexA77:
case CortexA78:
case CortexR82:
case CortexX1:
PrefFunctionLogAlignment = 4;
break;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
CortexA76,
CortexA77,
CortexA78,
CortexR82,
CortexX1,
ExynosM3,
Falkor,
Expand Down Expand Up @@ -84,6 +85,9 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;

bool HasV8_0rOps = false;
bool HasCONTEXTIDREL2 = false;

bool HasFPARMv8 = false;
bool HasNEON = false;
bool HasCrypto = false;
Expand Down Expand Up @@ -306,6 +310,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasV8_0rOps() const { return HasV8_0rOps; }

bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }

Expand Down Expand Up @@ -343,6 +348,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool hasSHA3() const { return HasSHA3; }
bool hasSHA2() const { return HasSHA2; }
bool hasAES() const { return HasAES; }
bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; }
bool balanceFPOps() const { return BalanceFPOps; }
bool predictableSelectIsExpensive() const {
return PredictableSelectIsExpensive;
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Target/AArch64/AArch64SystemOperands.td
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
AssemblerPredicate<(all_of FeaturePAN_RWV),
"ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;

def HasCONTEXTIDREL2
: Predicate<"Subtarget->hasCONTEXTIDREL2()">,
AssemblerPredicate<(all_of FeatureCONTEXTIDREL2),
"Target contains CONTEXTIDR_EL2 RW operand">;

//===----------------------------------------------------------------------===//
// AT (address translate) instruction options.
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1220,7 +1225,6 @@ def : RWSysReg<"LORC_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b011>;
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::FeatureVH} }] in {
def : RWSysReg<"TTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b001>;
def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>;
def : RWSysReg<"CNTHV_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b000>;
def : RWSysReg<"CNTHV_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b010>;
def : RWSysReg<"CNTHV_CTL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b001>;
Expand All @@ -1246,6 +1250,9 @@ def : RWSysReg<"CNTV_CTL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b001>;
def : RWSysReg<"CNTV_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b010>;
def : RWSysReg<"SPSR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b000>;
def : RWSysReg<"ELR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b001>;
let Requires = [{ {AArch64::FeatureCONTEXTIDREL2} }] in {
def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>;
}
}
// v8.2a registers
// Op0 Op1 CRn CRm Op2
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5251,6 +5251,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
case AArch64::ArchKind::ARMV8_4A:
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
case AArch64::ArchKind::ARMV8R:
RequestedExtensions.push_back("sm4");
RequestedExtensions.push_back("sha3");
RequestedExtensions.push_back("sha2");
Expand Down
144 changes: 144 additions & 0 deletions llvm/test/Transforms/GVN/loadpre-context.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -gvn --basic-aa -S | FileCheck %s

; load may be speculated, adress is not null using context search.
; There is a critical edge.
define i32 @loadpre_critical_edge(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) {
; CHECK-LABEL: @loadpre_critical_edge(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null
; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[HEADER:%.*]]
; CHECK: header:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ]
; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]])
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]]
; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]]
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[HEADER]]
; CHECK: exit:
; CHECK-NEXT: ret i32 [[SUM]]
; CHECK: null_exit:
; CHECK-NEXT: ret i32 0
;
entry:
%cmp = icmp eq i32* %arg, null
br i1 %cmp, label %null_exit, label %header

header:
%iv = phi i32 [0, %entry], [%iv.next, %header]
%new_v = call i32 @foo(i32 %iv)
%v = load i32, i32* %arg
%sum = add i32 %new_v, %v
store i32 %sum, i32* %arg
%iv.next = add i32 %iv, 1
%cond = icmp eq i32 %iv.next, %N
br i1 %cond, label %exit, label %header

exit:
ret i32 %sum

null_exit:
ret i32 0
}

; load may be speculated, adress is not null using context search.
define i32 @loadpre_basic(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) {
; CHECK-LABEL: @loadpre_basic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null
; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[PREHEADER:%.*]]
; CHECK: preheader:
; CHECK-NEXT: br label [[HEADER:%.*]]
; CHECK: header:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ]
; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]])
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]]
; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]]
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[HEADER]]
; CHECK: exit:
; CHECK-NEXT: ret i32 [[SUM]]
; CHECK: null_exit:
; CHECK-NEXT: ret i32 0
;
entry:
%cmp = icmp eq i32* %arg, null
br i1 %cmp, label %null_exit, label %preheader

preheader:
br label %header

header:
%iv = phi i32 [0, %preheader], [%iv.next, %header]
%new_v = call i32 @foo(i32 %iv)
%v = load i32, i32* %arg
%sum = add i32 %new_v, %v
store i32 %sum, i32* %arg
%iv.next = add i32 %iv, 1
%cond = icmp eq i32 %iv.next, %N
br i1 %cond, label %exit, label %header

exit:
ret i32 %sum

null_exit:
ret i32 0
}

; load cannot be speculated, adress is not null check does not dominate the loop.
define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N, i1 %c) {
; CHECK-LABEL: @loadpre_maybe_null(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C:%.*]], label [[NULL_CHECK:%.*]], label [[PREHEADER:%.*]]
; CHECK: null_check:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null
; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[PREHEADER]]
; CHECK: preheader:
; CHECK-NEXT: br label [[HEADER:%.*]]
; CHECK: header:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ]
; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]])
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]]
; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]]
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[HEADER]]
; CHECK: exit:
; CHECK-NEXT: ret i32 [[SUM]]
; CHECK: null_exit:
; CHECK-NEXT: ret i32 0
;
entry:
br i1 %c, label %null_check, label %preheader

null_check:
%cmp = icmp eq i32* %arg, null
br i1 %cmp, label %null_exit, label %preheader

preheader:
br label %header

header:
%iv = phi i32 [0, %preheader], [%iv.next, %header]
%new_v = call i32 @foo(i32 %iv)
%v = load i32, i32* %arg
%sum = add i32 %new_v, %v
store i32 %sum, i32* %arg
%iv.next = add i32 %iv, 1
%cond = icmp eq i32 %iv.next, %N
br i1 %cond, label %exit, label %header

exit:
ret i32 %sum

null_exit:
ret i32 0
}

; Does not guarantee that returns.
declare i32 @foo(i32) readnone
Loading

0 comments on commit ae984c5

Please sign in to comment.