Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 467016e

Browse files
committed
Fix 256-bit PALIGNR comment decoding to understand that it works on independent 256-bit lanes.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173674 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 6ab4cbc commit 467016e

File tree

2 files changed

+26
-2
lines changed

2 files changed

+26
-2
lines changed

lib/Target/X86/Utils/X86ShuffleDecode.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,17 @@ void DecodePALIGNRMask(MVT VT, unsigned Imm,
6666
unsigned NumElts = VT.getVectorNumElements();
6767
unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
6868

69-
for (unsigned i = 0; i != NumElts; ++i)
70-
ShuffleMask.push_back((i + Offset) % (NumElts * 2));
69+
unsigned NumLanes = VT.getSizeInBits() / 128;
70+
unsigned NumLaneElts = NumElts / NumLanes;
71+
72+
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
73+
for (unsigned i = 0; i != NumLaneElts; ++i) {
74+
unsigned Base = i + Offset;
75+
// if i+offset is out of this lane then we actually need the other source
76+
if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
77+
ShuffleMask.push_back(Base + l);
78+
}
79+
}
7180
}
7281

7382
/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.

test/MC/X86/shuffle-comments.s

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,18 @@ vpalignr $0, %xmm0, %xmm1, %xmm2
2929
# CHECK: xmm2 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
3030
vpalignr $0, (%rax), %xmm1, %xmm2
3131
# CHECK: xmm2 = mem[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
32+
33+
vpalignr $8, %ymm0, %ymm1, %ymm2
34+
# CHECK: ymm2 = ymm0[8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7],ymm0[24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23]
35+
vpalignr $8, (%rax), %ymm1, %ymm2
36+
# CHECK: ymm2 = mem[8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7],mem[24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23]
37+
38+
vpalignr $16, %ymm0, %ymm1, %ymm2
39+
# CHECK: ymm2 = ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
40+
vpalignr $16, (%rax), %ymm1, %ymm2
41+
# CHECK: ymm2 = ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
42+
43+
vpalignr $0, %ymm0, %ymm1, %ymm2
44+
# CHECK: ymm2 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
45+
vpalignr $0, (%rax), %ymm1, %ymm2
46+
# CHECK: ymm2 = mem[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]

0 commit comments

Comments
 (0)