@@ -1707,6 +1707,11 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsic(LIR::Use& use)
1707
1707
return DecomposeHWIntrinsicGetElement (use, hwintrinsicTree);
1708
1708
}
1709
1709
1710
+ case NI_EVEX_MoveMask:
1711
+ {
1712
+ return DecomposeHWIntrinsicMoveMask (use, hwintrinsicTree);
1713
+ }
1714
+
1710
1715
default :
1711
1716
{
1712
1717
noway_assert (!" unexpected GT_HWINTRINSIC node in long decomposition" );
@@ -1830,6 +1835,106 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHW
1830
1835
return FinalizeDecomposition (use, loResult, hiResult, hiResult);
1831
1836
}
1832
1837
1838
+ // ------------------------------------------------------------------------
1839
+ // DecomposeHWIntrinsicMoveMask: Decompose GT_HWINTRINSIC -- NI_EVEX_MoveMask
1840
+ //
1841
+ // Decompose a MoveMask(x) node on Vector512<*>. For:
1842
+ //
1843
+ // GT_HWINTRINSIC{MoveMask}[*](simd_var)
1844
+ //
1845
+ // create:
1846
+ //
1847
+ // tmp_simd_var = simd_var
1848
+ // tmp_simd_lo = GT_HWINTRINSIC{GetLower}(tmp_simd_var)
1849
+ // lo_result = GT_HWINTRINSIC{MoveMask}(tmp_simd_lo)
1850
+ // tmp_simd_hi = GT_HWINTRINSIC{GetUpper}(tmp_simd_var)
1851
+ // hi_result = GT_HWINTRINSIC{MoveMask}(tmp_simd_hi)
1852
+ // return: GT_LONG(lo_result, hi_result)
1853
+ //
1854
+ // Noting that for all types except byte/sbyte, hi_result will be exclusively
1855
+ // zero and so we can actually optimize this a bit more directly
1856
+ //
1857
+ // Arguments:
1858
+ // use - the LIR::Use object for the def that needs to be decomposed.
1859
+ // node - the hwintrinsic node to decompose
1860
+ //
1861
+ // Return Value:
1862
+ // The next node to process.
1863
+ //
1864
+ GenTree* DecomposeLongs::DecomposeHWIntrinsicMoveMask (LIR::Use& use, GenTreeHWIntrinsic* node)
1865
+ {
1866
+ assert (node == use.Def ());
1867
+ assert (varTypeIsLong (node));
1868
+ assert (node->GetHWIntrinsicId () == NI_EVEX_MoveMask);
1869
+
1870
+ GenTree* op1 = node->Op (1 );
1871
+ CorInfoType simdBaseJitType = node->GetSimdBaseJitType ();
1872
+ var_types simdBaseType = node->GetSimdBaseType ();
1873
+ unsigned simdSize = node->GetSimdSize ();
1874
+
1875
+ assert (varTypeIsArithmetic (simdBaseType));
1876
+ assert (op1->TypeGet () == TYP_MASK);
1877
+ assert (simdSize == 64 );
1878
+
1879
+ GenTree* loResult = nullptr ;
1880
+ GenTree* hiResult = nullptr ;
1881
+
1882
+ if (varTypeIsByte (simdBaseType))
1883
+ {
1884
+ // Create:
1885
+ // simdTmpVar = op1
1886
+
1887
+ GenTree* simdTmpVar = RepresentOpAsLocalVar (op1, node, &node->Op (1 ));
1888
+ unsigned simdTmpVarNum = simdTmpVar->AsLclVarCommon ()->GetLclNum ();
1889
+ JITDUMP (" [DecomposeHWIntrinsicMoveMask]: Saving op1 tree to a temp var:\n " );
1890
+ DISPTREERANGE (Range (), simdTmpVar);
1891
+ Range ().Remove (simdTmpVar);
1892
+
1893
+ Range ().InsertBefore (node, simdTmpVar);
1894
+
1895
+ // Create:
1896
+ // loResult = GT_HWINTRINSIC{MoveMask}(simdTmpVar)
1897
+
1898
+ loResult = m_compiler->gtNewSimdHWIntrinsicNode (TYP_INT, simdTmpVar, NI_EVEX_MoveMask, simdBaseJitType, 32 );
1899
+ Range ().InsertBefore (node, loResult);
1900
+
1901
+ simdTmpVar = m_compiler->gtNewLclLNode (simdTmpVarNum, simdTmpVar->TypeGet ());
1902
+ Range ().InsertBefore (node, simdTmpVar);
1903
+
1904
+ // Create:
1905
+ // simdTmpVar = GT_HWINTRINSIC{ShiftRightMask}(simdTmpVar, 32)
1906
+ // hiResult = GT_HWINTRINSIC{MoveMask}(simdTmpVar)
1907
+
1908
+ GenTree* shiftIcon = m_compiler->gtNewIconNode (32 , TYP_INT);
1909
+ Range ().InsertBefore (node, shiftIcon);
1910
+
1911
+ simdTmpVar = m_compiler->gtNewSimdHWIntrinsicNode (TYP_INT, simdTmpVar, shiftIcon, NI_EVEX_ShiftRightMask,
1912
+ simdBaseJitType, 64 );
1913
+ Range ().InsertBefore (node, simdTmpVar);
1914
+
1915
+ hiResult = m_compiler->gtNewSimdHWIntrinsicNode (TYP_INT, simdTmpVar, NI_EVEX_MoveMask, simdBaseJitType, 32 );
1916
+ Range ().InsertBefore (node, hiResult);
1917
+ }
1918
+ else
1919
+ {
1920
+ // Create:
1921
+ // loResult = GT_HWINTRINSIC{MoveMask}(op1)
1922
+
1923
+ loResult = m_compiler->gtNewSimdHWIntrinsicNode (TYP_INT, op1, NI_EVEX_MoveMask, simdBaseJitType, simdSize);
1924
+ Range ().InsertBefore (node, loResult);
1925
+
1926
+ // Create:
1927
+ // hiResult = GT_ICON(0)
1928
+
1929
+ hiResult = m_compiler->gtNewZeroConNode (TYP_INT);
1930
+ Range ().InsertBefore (node, hiResult);
1931
+ }
1932
+
1933
+ // Done with the original tree; remove it.
1934
+ Range ().Remove (node);
1935
+
1936
+ return FinalizeDecomposition (use, loResult, hiResult, hiResult);
1937
+ }
1833
1938
#endif // FEATURE_HW_INTRINSICS
1834
1939
1835
1940
// ------------------------------------------------------------------------
0 commit comments