From d95a1d60836150b807b59dce0a4927c488062c7a Mon Sep 17 00:00:00 2001 From: Ronen Ulanovsky Date: Sun, 16 Jul 2023 04:58:04 +0300 Subject: [PATCH] [Xtensa] Implement CTLZ/CTTZ with NSAU Close https://github.com/espressif/llvm-project/pull/77 --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 22 ++++-- llvm/lib/Target/Xtensa/XtensaISelLowering.h | 5 ++ llvm/lib/Target/Xtensa/XtensaInstrInfo.td | 3 +- llvm/test/CodeGen/Xtensa/ctlz-cttz.ll | 67 +++++++++++++++++++ llvm/test/MC/Xtensa/xtensa-valid-nsa.s | 12 ++++ 5 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/Xtensa/ctlz-cttz.ll create mode 100644 llvm/test/MC/Xtensa/xtensa-valid-nsa.s diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 28b47e142d84ea..f3c0e92f179b9d 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -201,10 +201,12 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &tm, setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTR, MVT::i32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); - setOperationAction(ISD::CTTZ, MVT::i32, Expand); - setOperationAction(ISD::CTLZ, MVT::i32, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Expand); + if (Subtarget.hasNSA()) + setOperationAction(ISD::CTLZ, MVT::i32, Legal); + else + setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Expand); + setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32, Subtarget.hasMINMAX() ? Legal : Expand); @@ -409,6 +411,18 @@ bool XtensaTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, return false; } +bool XtensaTargetLowering::isCheapToSpeculateCtlz(Type *) const { + return Subtarget.hasNSA(); +} + +bool XtensaTargetLowering::isCheapToSpeculateCttz(Type *) const { + return Subtarget.hasNSA(); +} + +bool XtensaTargetLowering::isCtlzFast() const { + return Subtarget.hasNSA(); +} + /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. Register XtensaTargetLowering::getExceptionPointerRegister( diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index 0d472ccdce5767..3f170d57e16b9b 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -121,6 +121,11 @@ class XtensaTargetLowering : public TargetLowering { EVT VT) const override; bool isFNegFree(EVT VT) const override; + bool isCheapToSpeculateCtlz(Type *Ty) const override; + + bool isCheapToSpeculateCttz(Type *Ty) const override; + + bool isCtlzFast() const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td index 20b36155eb9906..51b0dd9a0cba58 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td @@ -1362,7 +1362,8 @@ def NSA : RRR_Inst<0x00, 0x00, 0x04, (outs AR:$t), (ins AR:$s), } def NSAU : RRR_Inst<0x00, 0x00, 0x04, (outs AR:$t), (ins AR:$s), - "nsau\t$t, $s", []>, Requires<[HasNSA]> { + "nsau\t$t, $s", + [(set AR:$t, (ctlz AR:$s))]>, Requires<[HasNSA]> { let r = 0xF; } diff --git a/llvm/test/CodeGen/Xtensa/ctlz-cttz.ll b/llvm/test/CodeGen/Xtensa/ctlz-cttz.ll new file mode 100644 index 00000000000000..8008ba354e6ab8 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/ctlz-cttz.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=xtensa -mcpu=esp32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +declare i32 @llvm.ctlz.i32(i32, i1) + +define i32 @test1_ctlz(i32 %v) { +; XTENSA-LABEL: test1_ctlz: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: .cfi_def_cfa_offset 32 +; XTENSA-NEXT: nsau a2, a2 +; XTENSA-NEXT: retw.n + %1 = tail call i32 @llvm.ctlz.i32(i32 %v, i1 false) + ret i32 %1 +} + +define i32 @test2_ctlz(i32 %v) { +; XTENSA-LABEL: test2_ctlz: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: .cfi_def_cfa_offset 32 +; XTENSA-NEXT: nsau a2, a2 +; XTENSA-NEXT: retw.n + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} + +declare i32 @llvm.cttz.i32(i32, i1) + +define i32 @test1_cttz(i32 %v) { +; XTENSA-LABEL: test1_cttz: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: .cfi_def_cfa_offset 32 +; XTENSA-NEXT: movi.n a8, -1 +; XTENSA-NEXT: xor a8, a2, a8 +; XTENSA-NEXT: addi.n a9, a2, -1 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: nsau a8, a8 +; XTENSA-NEXT: movi.n a9, 32 +; XTENSA-NEXT: sub a2, a9, a8 +; XTENSA-NEXT: retw.n + %1 = tail call i32 @llvm.cttz.i32(i32 %v, i1 false) + ret i32 %1 +} + +define i32 @test2_cttz(i32 %v) { +; XTENSA-LABEL: test2_cttz: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: .cfi_def_cfa_offset 32 +; XTENSA-NEXT: movi.n a8, -1 +; XTENSA-NEXT: xor a8, a2, a8 +; XTENSA-NEXT: addi.n a9, a2, -1 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: nsau a8, a8 +; XTENSA-NEXT: movi.n a9, 32 +; XTENSA-NEXT: sub a2, a9, a8 +; XTENSA-NEXT: retw.n + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} diff --git a/llvm/test/MC/Xtensa/xtensa-valid-nsa.s b/llvm/test/MC/Xtensa/xtensa-valid-nsa.s new file mode 100644 index 00000000000000..150818b2dbaa99 --- /dev/null +++ b/llvm/test/MC/Xtensa/xtensa-valid-nsa.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc %s -triple=xtensa -mattr=+nsa -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST %s + +# Instruction format RRR +# CHECK-INST: nsa a3, a2 +# CHECK: encoding: [0x30,0xe2,0x40] +nsa a3, a2 + +# Instruction format RRR +# CHECK-INST: nsau a3, a2 +# CHECK: encoding: [0x30,0xf2,0x40] +nsau a3, a2