-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[SelectionDAG][RISCV] Teach computeKnownBits to use range metadata for atomic_load. #137119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…r atomic_load. And teach SelectionDAGBuilder to get the range metadata in visitAtomicLoad. This allows us to recognize that sign extending a byte load of a boolean value from memory will produces zeros for the extended bits. This allow us to remove an AND on RISC-V. Tests copied from llvm#136502 with range metadata added to i1 cases. Some of the test effects overlap with llvm#136502, but that patch can't handle the acquire or seq_cst cases with the Zalasr extension. We only have sign extending versions of those loads.
@llvm/pr-subscribers-llvm-selectiondag Author: Craig Topper (topperc) ChangesAnd teach SelectionDAGBuilder to get the range metadata in This allows us to recognize that sign extending a byte load of a Tests copied from #136502 with range metadata added to i1 cases. Patch is 50.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137119.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index cf88c1f4ae937..092dbf926eec9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4382,6 +4382,38 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits());
break;
}
+ case ISD::ATOMIC_LOAD: {
+ // If we are looking at the loaded value.
+ if (Op.getResNo() == 0) {
+ auto *AT = cast<AtomicSDNode>(Op);
+ unsigned ScalarMemorySize = AT->getMemoryVT().getScalarSizeInBits();
+ KnownBits KnownScalarMemory(ScalarMemorySize);
+ if (const MDNode *MD = AT->getRanges())
+ computeKnownBitsFromRangeMetadata(*MD, KnownScalarMemory);
+
+ switch (AT->getExtensionType()) {
+ case ISD::ZEXTLOAD:
+ Known = KnownScalarMemory.zext(BitWidth);
+ break;
+ case ISD::SEXTLOAD:
+ Known = KnownScalarMemory.sext(BitWidth);
+ break;
+ case ISD::EXTLOAD:
+ if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ Known = KnownScalarMemory.zext(BitWidth);
+ else if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND)
+ Known = KnownScalarMemory.sext(BitWidth);
+ else
+ Known = KnownScalarMemory.anyext(BitWidth);
+ break;
+ case ISD::NON_EXTLOAD:
+ Known = KnownScalarMemory;
+ break;
+ }
+ assert(Known.getBitWidth() == BitWidth);
+ }
+ break;
+ }
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
if (Op.getResNo() == 1) {
// The boolean result conforms to getBooleanContents.
@@ -4407,21 +4439,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- case ISD::ATOMIC_LOAD: {
+ case ISD::ATOMIC_LOAD_UMAX: {
// If we are looking at the loaded value.
if (Op.getResNo() == 0) {
auto *AT = cast<AtomicSDNode>(Op);
unsigned MemBits = AT->getMemoryVT().getScalarSizeInBits();
- // For atomic_load, prefer to use the extension type.
- if (Op->getOpcode() == ISD::ATOMIC_LOAD) {
- if (AT->getExtensionType() == ISD::ZEXTLOAD)
- Known.Zero.setBitsFrom(MemBits);
- else if (AT->getExtensionType() != ISD::SEXTLOAD &&
- TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
- Known.Zero.setBitsFrom(MemBits);
- } else if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
Known.Zero.setBitsFrom(MemBits);
}
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 66bd78af2939c..4b05d89417d2c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5150,9 +5150,10 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
+ const MDNode *Ranges = getRangeMetadata(I);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
- I.getAlign(), AAMDNodes(), nullptr, SSID, Order);
+ I.getAlign(), AAMDNodes(), Ranges, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
new file mode 100644
index 0000000000000..8097179443791
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
@@ -0,0 +1,1362 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
+
+
+; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s
+
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s
+
+define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i1_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i1_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a unordered, align 1, !range !0, !noundef !1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i1_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i1_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a monotonic, align 1, !range !0, !noundef !1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i1_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i1_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i1_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i1_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i8, ptr %a acquire, align 1, !range !0, !noundef !1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i8, ptr %a seq_cst, align 1, !range !0, !noundef !1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i8_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i8_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a unordered, align 1
+ ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i8_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i8_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a monotonic, align 1
+ ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i8_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i8_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i8_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i8_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT...
[truncated]
|
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) | ||
Known = KnownScalarMemory.zext(BitWidth); | ||
else if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND) | ||
Known = KnownScalarMemory.sext(BitWidth); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Avoid repeated getExtendForAtomicOps, switch over it? We should get rid of it eventually
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@@ -4382,6 +4382,38 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, | |||
Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits()); | |||
break; | |||
} | |||
case ISD::ATOMIC_LOAD: { | |||
// If we are looking at the loaded value. | |||
if (Op.getResNo() == 0) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(minor) Possibly could be an assertion, chain result is not a part of data flow
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was keeping consistency with ISD::LOAD. Maybe we should change all of them at once?
And teach SelectionDAGBuilder to get the range metadata in
visitAtomicLoad.
This allows us to recognize that sign extending a byte load of a
boolean value from memory will produce zeros for the extended bits.
This allow us to remove an AND on RISC-V.
Tests copied from #136502 with range metadata added to i1 cases.
Some of the test effects overlap with #136502, but that patch can't
handle the acquire or seq_cst cases with the Zalasr extension. We
only have sign extending versions of those loads.