Skip to content

Commit 2ec70db

Browse files
author
Igor Breger
committed
AVX512: Fix scalar mem operands.
Differential Revision: http://reviews.llvm.org/D17500 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@261520 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 8659296 commit 2ec70db

File tree

2 files changed

+71
-68
lines changed

2 files changed

+71
-68
lines changed

lib/Target/X86/X86InstrAVX512.td

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1414,7 +1414,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
14141414
let mayLoad = 1 in
14151415
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
14161416
(outs _.KRC:$dst),
1417-
(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
1417+
(ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
14181418
"vcmp${cc}"#_.Suffix,
14191419
"$src2, $src1", "$src1, $src2",
14201420
(OpNode (_.VT _.RC:$src1),
@@ -1439,7 +1439,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
14391439
"$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
14401440
defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
14411441
(outs _.KRC:$dst),
1442-
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
1442+
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
14431443
"vcmp"#_.Suffix,
14441444
"$cc, $src2, $src1", "$src1, $src2, $cc">,
14451445
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
@@ -3616,7 +3616,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
36163616
itins.rr, IsCommutable>;
36173617

36183618
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
3619-
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
3619+
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
36203620
"$src2, $src1", "$src1, $src2",
36213621
(VecNode (_.VT _.RC:$src1),
36223622
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
@@ -3821,9 +3821,11 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
38213821
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>;
38223822
let mayLoad = 1 in {
38233823
defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
3824-
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
3824+
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
38253825
"$src2, $src1", "$src1, $src2",
3826-
(OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>;
3826+
(OpNode _.RC:$src1,
3827+
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
3828+
(i32 FROUND_CURRENT))>;
38273829
}//let mayLoad = 1
38283830
}
38293831

@@ -4691,7 +4693,7 @@ multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
46914693

46924694
let mayLoad = 1 in
46934695
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4694-
(ins _.RC:$src2, _.MemOp:$src3), OpcodeStr,
4696+
(ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr,
46954697
"$src3, $src2", "$src2, $src3", RHS_VEC_m>, AVX512FMA3Base;
46964698

46974699
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -5035,7 +5037,7 @@ let Predicates = [HasAVX512] in {
50355037
def rb : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
50365038
!strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
50375039
[]>, EVEX, EVEX_B;
5038-
def rm : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.MemOp:$src),
5040+
def rm : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
50395041
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
50405042
[(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
50415043
EVEX;
@@ -5109,7 +5111,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
51095111
(_Src.VT _Src.RC:$src2)))>,
51105112
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
51115113
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5112-
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5114+
(ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
51135115
"$src2, $src1", "$src1, $src2",
51145116
(_.VT (OpNode (_Src.VT _Src.RC:$src1),
51155117
(_Src.VT (scalar_to_vector
@@ -5206,7 +5208,7 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
52065208
(bitconvert (_Src.LdFrag addr:$src)))))>, EVEX;
52075209

52085210
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5209-
(ins _Src.MemOp:$src), OpcodeStr,
5211+
(ins _Src.ScalarMemOp:$src), OpcodeStr,
52105212
"${src}"##Broadcast, "${src}"##Broadcast,
52115213
(_.VT (OpNode (_Src.VT
52125214
(X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
@@ -5739,7 +5741,7 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
57395741
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
57405742
let mayLoad = 1 in {
57415743
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5742-
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5744+
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
57435745
"$src2, $src1", "$src1, $src2",
57445746
(OpNode (_.VT _.RC:$src1),
57455747
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))))>, EVEX_4V;
@@ -5819,7 +5821,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
58195821
(i32 FROUND_NO_EXC))>, EVEX_B;
58205822

58215823
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5822-
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5824+
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
58235825
"$src2, $src1", "$src1, $src2",
58245826
(OpNode (_.VT _.RC:$src1),
58255827
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
@@ -5855,7 +5857,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
58555857
(i32 FROUND_CURRENT))>;
58565858

58575859
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5858-
(ins _.MemOp:$src), OpcodeStr,
5860+
(ins _.ScalarMemOp:$src), OpcodeStr,
58595861
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
58605862
(OpNode (_.FloatVT
58615863
(X86VBroadcast (_.ScalarLdFrag addr:$src))),
@@ -5973,7 +5975,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
59735975
(i32 FROUND_CURRENT))>;
59745976
let mayLoad = 1 in
59755977
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5976-
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5978+
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
59775979
"$src2, $src1", "$src1, $src2",
59785980
(OpNodeRnd (_.VT _.RC:$src1),
59795981
(_.VT (scalar_to_vector
@@ -6051,7 +6053,8 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
60516053

60526054
let mayLoad = 1 in
60536055
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6054-
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), OpcodeStr,
6056+
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
6057+
OpcodeStr,
60556058
"$src3, $src2, $src1", "$src1, $src2, $src3",
60566059
(_.VT (X86RndScales (_.VT _.RC:$src1),
60576060
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),

test/MC/X86/avx512-encodings.s

Lines changed: 54 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -6416,29 +6416,29 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
64166416
// CHECK: encoding: [0x62,0xb1,0x56,0x78,0x58,0xfb]
64176417
vaddss {rz-sae}, %xmm19, %xmm5, %xmm7
64186418

6419-
// CHECK: vaddss (%rcx), %xmm5, %xmm7
6420-
// CHECK: encoding: [0x62,0xf1,0x56,0x08,0x58,0x39]
6421-
vaddss (%rcx), %xmm5, %xmm7
6419+
// CHECK: vaddss (%rcx), %xmm25, %xmm7
6420+
// CHECK: encoding: [0x62,0xf1,0x36,0x00,0x58,0x39]
6421+
vaddss (%rcx), %xmm25, %xmm7
64226422

6423-
// CHECK: vaddss 291(%rax,%r14,8), %xmm5, %xmm7
6424-
// CHECK: encoding: [0x62,0xb1,0x56,0x08,0x58,0xbc,0xf0,0x23,0x01,0x00,0x00]
6425-
vaddss 291(%rax,%r14,8), %xmm5, %xmm7
6423+
// CHECK: vaddss 291(%rax,%r14,8), %xmm25, %xmm7
6424+
// CHECK: encoding: [0x62,0xb1,0x36,0x00,0x58,0xbc,0xf0,0x23,0x01,0x00,0x00]
6425+
vaddss 291(%rax,%r14,8), %xmm25, %xmm7
64266426

6427-
// CHECK: vaddss 508(%rdx), %xmm5, %xmm7
6428-
// CHECK: encoding: [0x62,0xf1,0x56,0x08,0x58,0x7a,0x7f]
6429-
vaddss 508(%rdx), %xmm5, %xmm7
6427+
// CHECK: vaddss 508(%rdx), %xmm25, %xmm7
6428+
// CHECK: encoding: [0x62,0xf1,0x36,0x00,0x58,0x7a,0x7f]
6429+
vaddss 508(%rdx), %xmm25, %xmm7
64306430

6431-
// CHECK: vaddss 512(%rdx), %xmm5, %xmm7
6432-
// CHECK: encoding: [0x62,0xf1,0x56,0x08,0x58,0xba,0x00,0x02,0x00,0x00]
6433-
vaddss 512(%rdx), %xmm5, %xmm7
6431+
// CHECK: vaddss 512(%rdx), %xmm25, %xmm7
6432+
// CHECK: encoding: [0x62,0xf1,0x36,0x00,0x58,0xba,0x00,0x02,0x00,0x00]
6433+
vaddss 512(%rdx), %xmm25, %xmm7
64346434

6435-
// CHECK: vaddss -512(%rdx), %xmm5, %xmm7
6436-
// CHECK: encoding: [0x62,0xf1,0x56,0x08,0x58,0x7a,0x80]
6437-
vaddss -512(%rdx), %xmm5, %xmm7
6435+
// CHECK: vaddss -512(%rdx), %xmm25, %xmm7
6436+
// CHECK: encoding: [0x62,0xf1,0x36,0x00,0x58,0x7a,0x80]
6437+
vaddss -512(%rdx), %xmm25, %xmm7
64386438

6439-
// CHECK: vaddss -516(%rdx), %xmm5, %xmm7
6440-
// CHECK: encoding: [0x62,0xf1,0x56,0x08,0x58,0xba,0xfc,0xfd,0xff,0xff]
6441-
vaddss -516(%rdx), %xmm5, %xmm7
6439+
// CHECK: vaddss -516(%rdx), %xmm25, %xmm7
6440+
// CHECK: encoding: [0x62,0xf1,0x36,0x00,0x58,0xba,0xfc,0xfd,0xff,0xff]
6441+
vaddss -516(%rdx), %xmm25, %xmm7
64426442

64436443
// CHECK: vdivpd {rn-sae}, %zmm11, %zmm6, %zmm18
64446444
// CHECK: encoding: [0x62,0xc1,0xcd,0x18,0x5e,0xd3]
@@ -6640,29 +6640,29 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
66406640
// CHECK: encoding: [0x62,0x71,0x5e,0x18,0x5f,0xc6]
66416641
vmaxss {sae}, %xmm6, %xmm4, %xmm8
66426642

6643-
// CHECK: vmaxss (%rcx), %xmm4, %xmm8
6644-
// CHECK: encoding: [0x62,0x71,0x5e,0x08,0x5f,0x01]
6645-
vmaxss (%rcx), %xmm4, %xmm8
6643+
// CHECK: vmaxss (%rcx), %xmm4, %xmm18
6644+
// CHECK: encoding: [0x62,0xe1,0x5e,0x08,0x5f,0x11]
6645+
vmaxss (%rcx), %xmm4, %xmm18
66466646

6647-
// CHECK: vmaxss 291(%rax,%r14,8), %xmm4, %xmm8
6648-
// CHECK: encoding: [0x62,0x31,0x5e,0x08,0x5f,0x84,0xf0,0x23,0x01,0x00,0x00]
6649-
vmaxss 291(%rax,%r14,8), %xmm4, %xmm8
6647+
// CHECK: vmaxss 291(%rax,%r14,8), %xmm4, %xmm18
6648+
// CHECK: encoding: [0x62,0xa1,0x5e,0x08,0x5f,0x94,0xf0,0x23,0x01,0x00,0x00]
6649+
vmaxss 291(%rax,%r14,8), %xmm4, %xmm18
66506650

6651-
// CHECK: vmaxss 508(%rdx), %xmm4, %xmm8
6652-
// CHECK: encoding: [0x62,0x71,0x5e,0x08,0x5f,0x42,0x7f]
6653-
vmaxss 508(%rdx), %xmm4, %xmm8
6651+
// CHECK: vmaxss 508(%rdx), %xmm4, %xmm18
6652+
// CHECK: encoding: [0x62,0xe1,0x5e,0x08,0x5f,0x52,0x7f]
6653+
vmaxss 508(%rdx), %xmm4, %xmm18
66546654

6655-
// CHECK: vmaxss 512(%rdx), %xmm4, %xmm8
6656-
// CHECK: encoding: [0x62,0x71,0x5e,0x08,0x5f,0x82,0x00,0x02,0x00,0x00]
6657-
vmaxss 512(%rdx), %xmm4, %xmm8
6655+
// CHECK: vmaxss 512(%rdx), %xmm4, %xmm18
6656+
// CHECK: encoding: [0x62,0xe1,0x5e,0x08,0x5f,0x92,0x00,0x02,0x00,0x00]
6657+
vmaxss 512(%rdx), %xmm4, %xmm18
66586658

6659-
// CHECK: vmaxss -512(%rdx), %xmm4, %xmm8
6660-
// CHECK: encoding: [0x62,0x71,0x5e,0x08,0x5f,0x42,0x80]
6661-
vmaxss -512(%rdx), %xmm4, %xmm8
6659+
// CHECK: vmaxss -512(%rdx), %xmm4, %xmm18
6660+
// CHECK: encoding: [0x62,0xe1,0x5e,0x08,0x5f,0x52,0x80]
6661+
vmaxss -512(%rdx), %xmm4, %xmm18
66626662

6663-
// CHECK: vmaxss -516(%rdx), %xmm4, %xmm8
6664-
// CHECK: encoding: [0x62,0x71,0x5e,0x08,0x5f,0x82,0xfc,0xfd,0xff,0xff]
6665-
vmaxss -516(%rdx), %xmm4, %xmm8
6663+
// CHECK: vmaxss -516(%rdx), %xmm4, %xmm18
6664+
// CHECK: encoding: [0x62,0xe1,0x5e,0x08,0x5f,0x92,0xfc,0xfd,0xff,0xff]
6665+
vmaxss -516(%rdx), %xmm4, %xmm18
66666666

66676667
// CHECK: vminpd {sae}, %zmm22, %zmm6, %zmm6
66686668
// CHECK: encoding: [0x62,0xb1,0xcd,0x18,0x5d,0xf6]
@@ -6812,29 +6812,29 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
68126812
// CHECK: encoding: [0x62,0x31,0xdf,0x78,0x59,0xea]
68136813
vmulsd {rz-sae}, %xmm18, %xmm4, %xmm13
68146814

6815-
// CHECK: vmulsd (%rcx), %xmm4, %xmm13
6816-
// CHECK: encoding: [0x62,0x71,0xdf,0x08,0x59,0x29]
6817-
vmulsd (%rcx), %xmm4, %xmm13
6815+
// CHECK: vmulsd (%rcx), %xmm4, %xmm23
6816+
// CHECK: encoding: [0x62,0xe1,0xdf,0x08,0x59,0x39]
6817+
vmulsd (%rcx), %xmm4, %xmm23
68186818

6819-
// CHECK: vmulsd 291(%rax,%r14,8), %xmm4, %xmm13
6820-
// CHECK: encoding: [0x62,0x31,0xdf,0x08,0x59,0xac,0xf0,0x23,0x01,0x00,0x00]
6821-
vmulsd 291(%rax,%r14,8), %xmm4, %xmm13
6819+
// CHECK: vmulsd 291(%rax,%r14,8), %xmm4, %xmm23
6820+
// CHECK: encoding: [0x62,0xa1,0xdf,0x08,0x59,0xbc,0xf0,0x23,0x01,0x00,0x00]
6821+
vmulsd 291(%rax,%r14,8), %xmm4, %xmm23
68226822

6823-
// CHECK: vmulsd 1016(%rdx), %xmm4, %xmm13
6824-
// CHECK: encoding: [0x62,0x71,0xdf,0x08,0x59,0x6a,0x7f]
6825-
vmulsd 1016(%rdx), %xmm4, %xmm13
6823+
// CHECK: vmulsd 1016(%rdx), %xmm4, %xmm23
6824+
// CHECK: encoding: [0x62,0xe1,0xdf,0x08,0x59,0x7a,0x7f]
6825+
vmulsd 1016(%rdx), %xmm4, %xmm23
68266826

6827-
// CHECK: vmulsd 1024(%rdx), %xmm4, %xmm13
6828-
// CHECK: encoding: [0x62,0x71,0xdf,0x08,0x59,0xaa,0x00,0x04,0x00,0x00]
6829-
vmulsd 1024(%rdx), %xmm4, %xmm13
6827+
// CHECK: vmulsd 1024(%rdx), %xmm4, %xmm23
6828+
// CHECK: encoding: [0x62,0xe1,0xdf,0x08,0x59,0xba,0x00,0x04,0x00,0x00]
6829+
vmulsd 1024(%rdx), %xmm4, %xmm23
68306830

6831-
// CHECK: vmulsd -1024(%rdx), %xmm4, %xmm13
6832-
// CHECK: encoding: [0x62,0x71,0xdf,0x08,0x59,0x6a,0x80]
6833-
vmulsd -1024(%rdx), %xmm4, %xmm13
6831+
// CHECK: vmulsd -1024(%rdx), %xmm4, %xmm23
6832+
// CHECK: encoding: [0x62,0xe1,0xdf,0x08,0x59,0x7a,0x80]
6833+
vmulsd -1024(%rdx), %xmm4, %xmm23
68346834

6835-
// CHECK: vmulsd -1032(%rdx), %xmm4, %xmm13
6836-
// CHECK: encoding: [0x62,0x71,0xdf,0x08,0x59,0xaa,0xf8,0xfb,0xff,0xff]
6837-
vmulsd -1032(%rdx), %xmm4, %xmm13
6835+
// CHECK: vmulsd -1032(%rdx), %xmm4, %xmm23
6836+
// CHECK: encoding: [0x62,0xe1,0xdf,0x08,0x59,0xba,0xf8,0xfb,0xff,0xff]
6837+
vmulsd -1032(%rdx), %xmm4, %xmm23
68386838

68396839
// CHECK: vmulss %xmm14, %xmm10, %xmm22
68406840
// CHECK: encoding: [0x62,0xc1,0x2e,0x08,0x59,0xf6]

0 commit comments

Comments
 (0)