Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

[SystemZ] Support LRVH and STRVH opcodes #14

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::BSWAP);

// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
Expand Down Expand Up @@ -4601,6 +4602,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_LOADW_UMIN);
OPCODE(ATOMIC_LOADW_UMAX);
OPCODE(ATOMIC_CMP_SWAPW);
OPCODE(LRV);
OPCODE(STRV);
OPCODE(PREFETCH);
}
return nullptr;
Expand Down Expand Up @@ -4897,6 +4900,74 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}

// Combine BSWAP (LOAD) into LRVH/LRV/LRVG
// These loads are allowed to access memory multiple times, and so we must check
// that the loads are not volatile before performing the combine.
if (Opcode == ISD::BSWAP &&
ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
(N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
N->getValueType(0) == MVT::i64) &&
!cast<LoadSDNode>(N->getOperand(0))->isVolatile()) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);

// Create the byte-swapping load.
SDValue Ops[] = {
LD->getChain(), // Chain
LD->getBasePtr(), // Ptr
DAG.getValueType(N->getValueType(0)) // VT
};
SDValue BSLoad =
DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
DAG.getVTList(N->getValueType(0) == MVT::i64 ?
MVT::i64 : MVT::i32, MVT::Other),
Ops, LD->getMemoryVT(), LD->getMemOperand());

// If this is an i16 load, insert the truncate.
SDValue ResVal = BSLoad;
if (N->getValueType(0) == MVT::i16)
ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);

// First, combine the bswap away. This makes the value produced by the
// load dead.
DCI.CombineTo(N, ResVal);

// Next, combine the load away, we give it a bogus result value but a real
// chain result. The result value is dead because the bswap is dead.
DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));

// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}

// Combine STORE (BSWAP) into STRVH/STRV/STRVG
// See comment above about volatile accesses.
if (Opcode == ISD::STORE &&
!cast<StoreSDNode>(N)->isVolatile() &&
N->getOperand(1).getOpcode() == ISD::BSWAP &&
N->getOperand(1).getNode()->hasOneUse() &&
(N->getOperand(1).getValueType() == MVT::i16 ||
N->getOperand(1).getValueType() == MVT::i32 ||
N->getOperand(1).getValueType() == MVT::i64)) {

SDValue BSwapOp = N->getOperand(1).getOperand(0);

if (BSwapOp.getValueType() == MVT::i16)
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);

SDValue Ops[] = {
N->getOperand(0), BSwapOp, N->getOperand(2),
DAG.getValueType(N->getOperand(1).getValueType())
};

return
DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
Ops, cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
}

return SDValue();
}

Expand Down
13 changes: 13 additions & 0 deletions lib/Target/SystemZ/SystemZISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,19 @@ enum NodeType : unsigned {
// Operand 5: the width of the field in bits (8 or 16)
ATOMIC_CMP_SWAPW,

// Byte swapping load.
//
// Operand 0: the address to load from
// Operand 1: the type of load (i16, i32, i64)
LRV,

// Byte swapping store.
//
// Operand 0: the value to store
// Operand 1: the address to store to
// Operand 2: the type of store (i16, i32, i64)
STRV,

// Prefetch from the second operand using the 4-bit control code in
// the first operand. The code is 1 for a load prefetch and 2 for
// a store prefetch.
Expand Down
11 changes: 6 additions & 5 deletions lib/Target/SystemZ/SystemZInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -563,13 +563,14 @@ let hasSideEffects = 0 in {

// Byte-swapping loads. Unlike normal loads, these instructions are
// allowed to access storage more than once.
def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap, nonvolatile_load>, GR32, 4>;
def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64, 8>;
def LRVH : UnaryRXY<"lrvh", 0xE31F, z_lrvh, GR32, 2>;
def LRV : UnaryRXY<"lrv", 0xE31E, z_lrv, GR32, 4>;
def LRVG : UnaryRXY<"lrvg", 0xE30F, z_lrvg, GR64, 8>;

// Likewise byte-swapping stores.
def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap, nonvolatile_store>, GR32, 4>;
def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>,
GR64, 8>;
def STRVH : StoreRXY<"strvh", 0xE33F, z_strvh, GR32, 2>;
def STRV : StoreRXY<"strv", 0xE33E, z_strv, GR32, 4>;
def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>;

//===----------------------------------------------------------------------===//
// Load address instructions
Expand Down
24 changes: 24 additions & 0 deletions lib/Target/SystemZ/SystemZOperators.td
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
def SDT_ZPrefetch : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>]>;
def SDT_ZLoadBSwap : SDTypeProfile<1, 2,
[SDTCisInt<0>,
SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDT_ZStoreBSwap : SDTypeProfile<0, 3,
[SDTCisInt<0>,
SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDT_ZTBegin : SDTypeProfile<0, 2,
[SDTCisPtrTy<0>,
SDTCisVT<1, i32>]>;
Expand Down Expand Up @@ -191,6 +199,11 @@ def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone,
def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;

def z_loadbswap : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def z_storebswap : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;

// Defined because the index is an i32 rather than a pointer.
def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
SDT_ZInsertVectorElt>;
Expand Down Expand Up @@ -331,6 +344,17 @@ def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>;
// Pattern fragments
//===----------------------------------------------------------------------===//

def z_lrvh : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i16)>;
def z_lrv : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i32)>;
def z_lrvg : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i64)>;

def z_strvh : PatFrag<(ops node:$src, node:$addr),
(z_storebswap node:$src, node:$addr, i16)>;
def z_strv : PatFrag<(ops node:$src, node:$addr),
(z_storebswap node:$src, node:$addr, i32)>;
def z_strvg : PatFrag<(ops node:$src, node:$addr),
(z_storebswap node:$src, node:$addr, i64)>;

// Signed and unsigned comparisons.
def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
Expand Down
99 changes: 99 additions & 0 deletions test/CodeGen/SystemZ/bswap-06.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
; Test 16-bit byteswaps from memory to registers.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

declare i16 @llvm.bswap.i16(i16 %a)

; Check LRVH with no displacement.
define i16 @f1(i16 *%src) {
; CHECK-LABEL: f1:
; CHECK: lrvh %r2, 0(%r2)
; CHECK: br %r14
%a = load i16 , i16 *%src
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}

; Check the high end of the aligned LRVH range.
define i16 @f2(i16 *%src) {
; CHECK-LABEL: f2:
; CHECK: lrvh %r2, 524286(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%src, i64 262143
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}

; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define i16 @f3(i16 *%src) {
; CHECK-LABEL: f3:
; CHECK: agfi %r2, 524288
; CHECK: lrvh %r2, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%src, i64 262144
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}

; Check the high end of the negative aligned LRVH range.
define i16 @f4(i16 *%src) {
; CHECK-LABEL: f4:
; CHECK: lrvh %r2, -2(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%src, i64 -1
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}

; Check the low end of the LRVH range.
define i16 @f5(i16 *%src) {
; CHECK-LABEL: f5:
; CHECK: lrvh %r2, -524288(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%src, i64 -262144
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}

; Check the next word down, which needs separate address logic.
; Other sequences besides this one would be OK.
define i16 @f6(i16 *%src) {
; CHECK-LABEL: f6:
; CHECK: agfi %r2, -524290
; CHECK: lrvh %r2, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%src, i64 -262145
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}

; Check that LRVH allows an index.
define i16 @f7(i64 %src, i64 %index) {
; CHECK-LABEL: f7:
; CHECK: lrvh %r2, 524287({{%r3,%r2|%r2,%r3}})
; CHECK: br %r14
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i16 *
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}

; Check that volatile accesses do not use LRVH, which might access the
; storage multple times.
define i16 @f8(i16 *%src) {
; CHECK-LABEL: f8:
; CHECK: lh [[REG:%r[0-5]]], 0(%r2)
; CHECK: lrvr %r2, [[REG]]
; CHECK: br %r14
%a = load volatile i16 , i16 *%src
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}
100 changes: 100 additions & 0 deletions test/CodeGen/SystemZ/bswap-07.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
; Test 32-bit byteswaps from registers to memory.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

declare i16 @llvm.bswap.i16(i16 %a)

; Check STRVH with no displacement.
define void @f1(i16 *%dst, i16 %a) {
; CHECK-LABEL: f1:
; CHECK: strvh %r3, 0(%r2)
; CHECK: br %r14
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%dst
ret void
}

; Check the high end of the aligned STRVH range.
define void @f2(i16 *%dst, i16 %a) {
; CHECK-LABEL: f2:
; CHECK: strvh %r3, 524286(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%dst, i64 262143
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}

; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f3(i16 *%dst, i16 %a) {
; CHECK-LABEL: f3:
; CHECK: agfi %r2, 524288
; CHECK: strvh %r3, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%dst, i64 262144
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}

; Check the high end of the negative aligned STRVH range.
define void @f4(i16 *%dst, i16 %a) {
; CHECK-LABEL: f4:
; CHECK: strvh %r3, -2(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%dst, i64 -1
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}

; Check the low end of the STRVH range.
define void @f5(i16 *%dst, i16 %a) {
; CHECK-LABEL: f5:
; CHECK: strvh %r3, -524288(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%dst, i64 -262144
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}

; Check the next word down, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f6(i16 *%dst, i16 %a) {
; CHECK-LABEL: f6:
; CHECK: agfi %r2, -524290
; CHECK: strvh %r3, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%dst, i64 -262145
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}

; Check that STRVH allows an index.
define void @f7(i64 %src, i64 %index, i16 %a) {
; CHECK-LABEL: f7:
; CHECK: strvh %r4, 524287({{%r3,%r2|%r2,%r3}})
; CHECK: br %r14
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i16 *
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}

; Check that volatile stores do not use STRVH, which might access the
; storage multple times.
define void @f8(i16 *%dst, i16 %a) {
; CHECK-LABEL: f8:
; CHECK: lrvr [[REG:%r[0-5]]], %r3
; CHECK: srl [[REG]], 16
; CHECK: sth [[REG]], 0(%r2)
; CHECK: br %r14
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store volatile i16 %swapped, i16 *%dst
ret void
}
Loading