Skip to content

Commit

Permalink
[X86] avx512fp16: add missing instruction selection patterns for "i16…
Browse files Browse the repository at this point in the history
…" `VMOVSH`

For all other patterns, we consistently have both I and F variants,
let's not diverge.

Fixes #59628
  • Loading branch information
LebedevRI committed Dec 21, 2022
1 parent b2abbd1 commit 1cbcd8a
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 0 deletions.
12 changes: 12 additions & 0 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -4705,16 +4705,28 @@ let Predicates = [HasAVX512] in {
let Predicates = [HasFP16] in {
def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
(VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
(VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;

// FIXME we need better canonicalization in dag combine
def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
(v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
(v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
(v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
(v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;

// FIXME we need better canonicalization in dag combine
def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
(SUBREG_TO_REG (i32 0),
(v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
(v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
(SUBREG_TO_REG (i32 0),
(v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
(v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;

def : Pat<(v8f16 (X86vzload16 addr:$src)),
(VMOVSHZrm addr:$src)>;
Expand Down
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/X86/avx512fp16-mov.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2058,3 +2058,27 @@ define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width
%3 = and <16 x i32> %2, <i32 65535, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 65535>
ret <16 x i32> %3
}

define <8 x i16> @pr59628_xmm(i16 %arg) {
; X64-LABEL: pr59628_xmm:
; X64: # %bb.0:
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: vpbroadcastw %edi, %xmm1
; X64-NEXT: vmovsh %xmm1, %xmm0, %xmm0
; X64-NEXT: vpcmpneqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; X64-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
;
; X86-LABEL: pr59628_xmm:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X86-NEXT: vpbroadcastw %eax, %xmm1
; X86-NEXT: vmovsh %xmm1, %xmm0, %xmm0
; X86-NEXT: vpcmpneqw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %k1
; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
%I1 = insertelement <8 x i16> zeroinitializer, i16 %arg, i16 0
%I2 = insertelement <8 x i16> %I1, i16 0, i16 %arg
ret <8 x i16> %I2
}

0 comments on commit 1cbcd8a

Please sign in to comment.