Skip to content

Commit

Permalink
[SystemZ] Don't create PERMUTE nodes with an undef operand.
Browse files Browse the repository at this point in the history
It's better to reuse the first source value than to use an undef second
operand, because that will make more resulting VPERMs have identical operands
and therefore MachineCSE more successful.

Review: Ulrich Weigand
  • Loading branch information
JonPsson committed May 18, 2020
1 parent 691980e commit 31ecef7
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 4 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4474,7 +4474,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
else
IndexNodes[I] = DAG.getUNDEF(MVT::i32);
SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
(!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
}

namespace {
Expand Down
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/SystemZ/vec-perm-14.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
;
; Test that only one vperm of the vector compare is needed for both extracts.

define void @fun() {
; CHECK-LABEL: fun
; CHECK: vperm
; CHECK-NOT: vperm
bb:
%tmp = load <4 x i8>, <4 x i8>* undef
%tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp
%tmp2 = extractelement <4 x i1> %tmp1, i32 0
br i1 %tmp2, label %bb1, label %bb2

bb1:
unreachable

bb2:
%tmp3 = extractelement <4 x i1> %tmp1, i32 1
br i1 %tmp3, label %bb3, label %bb4

bb3:
unreachable

bb4:
unreachable
}
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5377,12 +5377,12 @@ define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* %
; SZ13-LABEL: constrained_vector_fptrunc_v3f64:
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: vl %v1, 0(%r2), 4
; SZ13-NEXT: ld %f0, 16(%r2)
; SZ13-NEXT: vledb %v1, %v1, 0, 0
; SZ13-NEXT: larl %r1, .LCPI97_0
; SZ13-NEXT: ld %f0, 16(%r2)
; SZ13-NEXT: vl %v2, 0(%r1), 3
; SZ13-NEXT: vperm %v1, %v1, %v0, %v2
; SZ13-NEXT: ledbra %f0, 0, %f0, 0
; SZ13-NEXT: vl %v2, 0(%r1), 3
; SZ13-NEXT: vperm %v1, %v1, %v1, %v2
; SZ13-NEXT: ste %f0, 8(%r3)
; SZ13-NEXT: vsteg %v1, 0(%r3), 0
; SZ13-NEXT: br %r14
Expand Down

0 comments on commit 31ecef7

Please sign in to comment.