-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[Clang][AArch64] Expose compatible SVE intrinsics with only +sme #95787
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Clang][AArch64] Expose compatible SVE intrinsics with only +sme #95787
Conversation
This allows code with SVE intrinsics to be compiled with +sme,+nosve, assuming the encompassing function is in the correct mode (see llvm#93802)
@llvm/pr-subscribers-clang Author: Sander de Smalen (sdesmalen-arm) ChangesThis allows code with SVE intrinsics to be compiled with +sme,+nosve, Patch is 2.45 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95787.diff 227 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index f5972b41e7b50..f7d64d0d35d35 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -41,6 +41,7 @@ def SVLD1UH_VNUM : MInst<"svld1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad,
def SVLD1SW_VNUM : MInst<"svld1sw_vnum_{d}", "dPUl", "lUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">;
def SVLD1UW_VNUM : MInst<"svld1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">;
+let TargetGuard = "sve" in {
// Load one vector (vector base)
def SVLD1_GATHER_BASES_U : MInst<"svld1_gather[_{2}base]_{d}", "dPu", "ilUiUlfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ld1_gather_scalar_offset">;
def SVLD1SB_GATHER_BASES_U : MInst<"svld1sb_gather[_{2}base]_{d}", "dPu", "ilUiUl", [IsGatherLoad], MemEltTyInt8, "aarch64_sve_ld1_gather_scalar_offset">;
@@ -136,12 +137,14 @@ def SVLDFF1SH_VNUM : MInst<"svldff1sh_vnum_{d}", "dPTl", "ilUiUl", [IsL
def SVLDFF1UH_VNUM : MInst<"svldff1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldff1">;
def SVLDFF1SW_VNUM : MInst<"svldff1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldff1">;
def SVLDFF1UW_VNUM : MInst<"svldff1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">;
+} // let TargetGuard = "sve"
let TargetGuard = "sve,bf16" in {
def SVLDFF1_BF : MInst<"svldff1[_{2}]", "dPc", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">;
def SVLDFF1_VNUM_BF : MInst<"svldff1_vnum[_{2}]", "dPcl", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">;
}
+let TargetGuard = "sve" in {
// First-faulting load one vector (vector base)
def SVLDFF1_GATHER_BASES_U : MInst<"svldff1_gather[_{2}base]_{d}", "dPu", "ilUiUlfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ldff1_gather_scalar_offset">;
def SVLDFF1SB_GATHER_BASES_U : MInst<"svldff1sb_gather[_{2}base]_{d}", "dPu", "ilUiUl", [IsGatherLoad], MemEltTyInt8, "aarch64_sve_ldff1_gather_scalar_offset">;
@@ -236,6 +239,7 @@ def SVLDNF1SH_VNUM : MInst<"svldnf1sh_vnum_{d}", "dPTl", "ilUiUl", [IsL
def SVLDNF1UH_VNUM : MInst<"svldnf1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldnf1">;
def SVLDNF1SW_VNUM : MInst<"svldnf1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldnf1">;
def SVLDNF1UW_VNUM : MInst<"svldnf1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">;
+} // let TargetGuard = "sve"
let TargetGuard = "sve,bf16" in {
def SVLDNF1_BF : MInst<"svldnf1[_{2}]", "dPc", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">;
@@ -286,10 +290,13 @@ let TargetGuard = "sve,f64mm,bf16" in {
}
let TargetGuard = "sve,bf16" in {
+ def SVBFMMLA : SInst<"svbfmmla[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmmla", [IsOverloadNone]>;
+}
+
+let TargetGuard = "(sve,bf16)|sme" in {
def SVBFDOT : SInst<"svbfdot[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, VerifyRuntimeMode]>;
def SVBFMLALB : SInst<"svbfmlalb[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, VerifyRuntimeMode]>;
def SVBFMLALT : SInst<"svbfmlalt[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone, VerifyRuntimeMode]>;
- def SVBFMMLA : SInst<"svbfmmla[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmmla", [IsOverloadNone, VerifyRuntimeMode]>;
def SVBFDOT_N : SInst<"svbfdot[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, VerifyRuntimeMode]>;
def SVBFMLAL_N : SInst<"svbfmlalb[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, VerifyRuntimeMode]>;
def SVBFMLALT_N : SInst<"svbfmlalt[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone, VerifyRuntimeMode]>;
@@ -356,6 +363,7 @@ let TargetGuard = "(sve,bf16)|sme" in {
def SVST1_VNUM_BF : MInst<"svst1_vnum[_{d}]", "vPpld", "b", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">;
}
+let TargetGuard = "sve" in {
// Store one vector (vector base)
def SVST1_SCATTER_BASES_U : MInst<"svst1_scatter[_{2}base_{d}]", "vPud", "ilUiUlfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1_scatter_scalar_offset">;
def SVST1B_SCATTER_BASES_U : MInst<"svst1b_scatter[_{2}base_{d}]", "vPud", "ilUiUl", [IsScatterStore], MemEltTyInt8, "aarch64_sve_st1_scatter_scalar_offset">;
@@ -424,10 +432,11 @@ def SVST1H_SCATTER_32B_INDICES_UU : MInst<"svst1h_scatter_[{3}]index[_{d}]", "vP
def SVST1_SCATTER_INDEX_S : MInst<"svst1_scatter[_{2}base]_index[_{d}]", "vPuld", "ilUiUlfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1_scatter_scalar_offset">;
def SVST1H_SCATTER_INDEX_S : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "vPuld", "ilUiUl", [IsScatterStore], MemEltTyInt16, "aarch64_sve_st1_scatter_scalar_offset">;
def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">;
+} // let TargetGuard = "sve"
multiclass StructStore<string name, string proto, string i> {
def : SInst<name, proto, "csilUcUsUiUlhfd", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
- let TargetGuard = "sve,bf16" in {
+ let TargetGuard = "(sve,bf16)|sme" in {
def: SInst<name, proto, "b", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
}
}
@@ -499,6 +508,7 @@ def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPQlJ", "s", [IsPrefetch, VerifyRuntimeM
def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPQlJ", "i", [IsPrefetch, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_prf">;
def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPQlJ", "l", [IsPrefetch, VerifyRuntimeMode], MemEltTyInt64, "aarch64_sve_prf">;
+let TargetGuard = "sve" in {
// Prefetch (Vector bases)
def SVPRFB_GATHER_BASES : MInst<"svprfb_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_scalar_offset">;
def SVPRFH_GATHER_BASES : MInst<"svprfh_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">;
@@ -531,14 +541,17 @@ def SVPRFB_GATHER_BASES_OFFSET : MInst<"svprfb_gather[_{2}base]_offset", "vPdlJ"
def SVPRFH_GATHER_BASES_OFFSET : MInst<"svprfh_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">;
def SVPRFW_GATHER_BASES_OFFSET : MInst<"svprfw_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">;
def SVPRFD_GATHER_BASES_OFFSET : MInst<"svprfd_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">;
+} // let TargetGuard = "sve"
////////////////////////////////////////////////////////////////////////////////
// Address calculations
+let TargetGuard = "sve" in {
def SVADRB : SInst<"svadrb[_{0}base]_[{2}]offset", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrb">;
def SVADRH : SInst<"svadrh[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrh">;
def SVADRW : SInst<"svadrw[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrw">;
def SVADRD : SInst<"svadrd[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrd">;
+} // let TargetGuard = "sve"
////////////////////////////////////////////////////////////////////////////////
// Scalar to vector
@@ -826,10 +839,12 @@ defm SVRINTX : SInstZPZ<"svrintx", "hfd", "aarch64_sve_frintx">;
defm SVRINTZ : SInstZPZ<"svrintz", "hfd", "aarch64_sve_frintz">;
defm SVSQRT : SInstZPZ<"svsqrt", "hfd", "aarch64_sve_fsqrt">;
+let TargetGuard = "sve" in {
def SVEXPA : SInst<"svexpa[_{d}]", "du", "hfd", MergeNone, "aarch64_sve_fexpa_x">;
def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftmad_x", [], [ImmCheck<2, ImmCheck0_7>]>;
def SVTSMUL : SInst<"svtsmul[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftsmul_x">;
def SVTSSEL : SInst<"svtssel[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftssel_x">;
+}
def SVSCALE_M : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeOp1, "aarch64_sve_fscale", [VerifyRuntimeMode]>;
def SVSCALE_X : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeAny, "aarch64_sve_fscale", [VerifyRuntimeMode]>;
@@ -992,7 +1007,7 @@ defm SVFCVT_F32_F64 : SInstCvtMXZ<"svcvt_f32[_f64]", "MMPd", "MPd", "d", "aarc
defm SVFCVT_F64_F16 : SInstCvtMXZ<"svcvt_f64[_f16]", "ddPO", "dPO", "d", "aarch64_sve_fcvt_f64f16">;
defm SVFCVT_F64_F32 : SInstCvtMXZ<"svcvt_f64[_f32]", "ddPM", "dPM", "d", "aarch64_sve_fcvt_f64f32">;
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
defm SVCVTLT_F32 : SInstCvtMX<"svcvtlt_f32[_f16]", "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">;
defm SVCVTLT_F64 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">;
@@ -1012,7 +1027,7 @@ def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch6
multiclass SVEPerm<string name, string proto, string i> {
def : SInst<name, proto, "csilUcUsUiUlhfd", MergeNone, i, [VerifyRuntimeMode]>;
- let TargetGuard = "sve,bf16" in {
+ let TargetGuard = "(sve,bf16)|sme" in {
def: SInst<name, proto, "b", MergeNone, i, [VerifyRuntimeMode]>;
}
}
@@ -1022,19 +1037,22 @@ defm SVCLASTA_N : SVEPerm<"svclasta[_n_{d}]", "sPsd", "aarch64_sve_clasta_n">;
defm SVCLASTB : SVEPerm<"svclastb[_{d}]", "dPdd", "aarch64_sve_clastb">;
defm SVCLASTB_N : SVEPerm<"svclastb[_n_{d}]", "sPsd", "aarch64_sve_clastb_n">;
+let TargetGuard = "sve" in {
def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">;
+}
+
// Note: svdup_lane is implemented using the intrinsic for TBL to represent a
// splat of any possible lane. It is upto LLVM to pick a more efficient
// instruction such as DUP (indexed) if the lane index fits the range of the
// instruction's immediate.
def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>;
-let TargetGuard = "sve,bf16" in {
+let TargetGuard = "(sve,bf16)|sme" in {
def SVDUP_LANE_BF16 :
SInst<"svdup_lane[_{d}]", "ddL", "b", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>;
}
def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane", [VerifyRuntimeMode]>;
-let TargetGuard = "sve,bf16" in {
+let TargetGuard = "(sve,bf16)|sme" in {
def SVDUPQ_LANE_BF16 : SInst<"svdupq_lane[_{d}]", "ddn", "b", MergeNone, "aarch64_sve_dupq_lane", [VerifyRuntimeMode]>;
}
def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>;
@@ -1045,7 +1063,7 @@ def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNo
def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>;
def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>;
-let TargetGuard = "sve,bf16" in {
+let TargetGuard = "(sve,bf16)|sme" in {
def SVTBL_BF16 : SInst<"svtbl[_{d}]", "ddu", "b", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>;
}
@@ -1158,10 +1176,12 @@ def SVPTEST_LAST : SInst<"svptest_last", "sPP", "Pc", MergeNone, "aarch64_sve_
////////////////////////////////////////////////////////////////////////////////
// FFR manipulation
+let TargetGuard = "sve" in {
def SVRDFFR : SInst<"svrdffr", "Pv", "Pc", MergeNone, "", [IsOverloadNone]>;
def SVRDFFR_Z : SInst<"svrdffr_z", "PP", "Pc", MergeNone, "", [IsOverloadNone]>;
def SVSETFFR : SInst<"svsetffr", "vv", "", MergeNone, "", [IsOverloadNone]>;
def SVWRFFR : SInst<"svwrffr", "vP", "Pc", MergeNone, "", [IsOverloadNone]>;
+}
////////////////////////////////////////////////////////////////////////////////
// Counting elements
@@ -1179,7 +1199,7 @@ def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, "aarch64_sve_cntd", [IsAppendS
def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp", [VerifyRuntimeMode]>;
def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfd", MergeNone, "", [VerifyRuntimeMode]>;
-let TargetGuard = "sve,bf16" in {
+let TargetGuard = "(sve,bf16)|sme" in {
def SVLEN_BF16 : SInst<"svlen[_{d}]", "nd", "b", MergeNone, "", [VerifyRuntimeMode]>;
}
@@ -1249,7 +1269,9 @@ let TargetGuard = "sve,i8mm" in {
def SVMLLA_S32 : SInst<"svmmla[_s32]", "ddqq","i", MergeNone, "aarch64_sve_smmla">;
def SVMLLA_U32 : SInst<"svmmla[_u32]", "ddqq","Ui", MergeNone, "aarch64_sve_ummla">;
def SVUSMLLA_S32 : SInst<"svusmmla[_s32]", "ddbq","i", MergeNone, "aarch64_sve_usmmla">;
+}
+let TargetGuard = "(sve|sme),i8mm" in {
def SVUSDOT_S : SInst<"svusdot[_s32]", "ddbq", "i", MergeNone, "aarch64_sve_usdot", [VerifyRuntimeMode]>;
def SVUSDOT_N_S : SInst<"svusdot[_n_s32]", "ddbr", "i", MergeNone, "aarch64_sve_usdot", [VerifyRuntimeMode]>;
def SVSUDOT_S : SInst<"svsudot[_s32]", "ddqb", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT, VerifyRuntimeMode]>;
@@ -1265,21 +1287,21 @@ def SVMLLA_F32 : SInst<"svmmla[_f32]", "dddd","f", MergeNone, "aarch64_sve_fmmla
let TargetGuard = "sve,f64mm" in {
def SVMLLA_F64 : SInst<"svmmla[_f64]", "dddd","d", MergeNone, "aarch64_sve_fmmla">;
-def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1q", [VerifyRuntimeMode]>;
-def SVTRN2Q : SInst<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2q", [VerifyRuntimeMode]>;
-def SVUZP1Q : SInst<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1q", [VerifyRuntimeMode]>;
-def SVUZP2Q : SInst<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2q", [VerifyRuntimeMode]>;
-def SVZIP1Q : SInst<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1q", [VerifyRuntimeMode]>;
-def SVZIP2Q : SInst<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2q", [VerifyRuntimeMode]>;
+def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1q">;
+def SVTRN2Q : SInst<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2q">;
+def SVUZP1Q : SInst<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1q">;
+def SVUZP2Q : SInst<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2q">;
+def SVZIP1Q : SInst<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1q">;
+def SVZIP2Q : SInst<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2q">;
}
let TargetGuard = "sve,bf16,f64mm" in {
def SVTRN1Q_BF16 : SInst<"svtrn1q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn1q">;
def SVTRN2Q_BF16 : SInst<"svtrn2q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn2q">;
-def SVUZP1Q_BF16 : SInst<"svuzp1q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp1q", [VerifyRuntimeMode]>;
-def SVUZP2Q_BF16 : SInst<"svuzp2q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp2q", [VerifyRuntimeMode]>;
-def SVZIP1Q_BF16 : SInst<"svzip1q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip1q", [VerifyRuntimeMode]>;
-def SVZIP2Q_BF16 : SInst<"svzip2q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2q", [VerifyRuntimeMode]>;
+def SVUZP1Q_BF16 : SInst<"svuzp1q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp1q">;
+def SVUZP2Q_BF16 : SInst<"svuzp2q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp2q">;
+def SVZIP1Q_BF16 : SInst<"svzip1q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip1q">;
+def SVZIP2Q_BF16 : SInst<"svzip2q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2q">;
}
////////////////////////////////////////////////////////////////////////////////
@@ -1347,7 +1369,7 @@ let TargetGuard = "sve2p1|sme2" in {
}
////////////////////////////////////////////////////////////////////////////////
// SVE2 WhileGE/GT
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
@@ -1373,7 +1395,7 @@ let TargetGuard = "sve2p1|sme2" in {
////////////////////////////////////////////////////////////////////////////////
// SVE2 - Uniform DSP operations
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
defm SVQADD_S : SInstZPZZ<"svqadd", "csli", "aarch64_sve_sqadd", "aarch64_sve_sqadd">;
defm SVQADD_U : SInstZPZZ<"svqadd", "UcUsUiUl", "aarch64_sve_uqadd", "aarch64_sve_uqadd">;
defm SVHADD_S : SInstZPZZ<"svhadd", "csli", "aarch64_sve_shadd", "aarch64_sve_shadd">;
@@ -1408,7 +1430,7 @@ multiclass SInstZPZxZ<string name, string types, string pat_v, string pat_n, str
def _N_Z : SInst<name # "[_n_{d}]", pat_n, types, MergeZero, intrinsic, flags>;
}
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
defm SVQRSHL_S : SInstZPZxZ<"svqrshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqrshl", [VerifyRuntimeMode]>;
defm SVQRSHL_U : SInstZPZxZ<"svqrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqrshl", [VerifyRuntimeMode]>;
defm SVQSHL_S : SInstZPZxZ<"svqshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqshl", [VerifyRuntimeMode]>;
@@ -1462,7 +1484,7 @@ multiclass SInstPairwise<string name, string types, string intrinsic, list<FlagT
def _X : SInst<name # "[_{d}]", "dPdd", types, MergeAny, intrinsic, flags>;
}
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
defm SVADDP : SInstPairwise<"svaddp", "csliUcUsUiUl", "aarch64_sve_addp", [VerifyRuntimeMode]>;
defm SVADDP_F : SInstPairwise<"svaddp", "hfd", "aarch64_sve_faddp", [VerifyRuntimeMode]>;
defm SVMAXNMP : SInstPairwise<"svmaxnmp", "hfd", "aarch64_sve_fmaxnmp", [VerifyRuntimeMode]>;
@@ -1478,7 +1500,7 @@ defm SVMINP_U : SInstPairwise<"svminp", "UcUsUiUl", "aarch64_sve_uminp", [
////////////////////////////////////////////////////////////////////////////////
// SVE2 - Widening pairwise arithmetic
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
def SVADALP_S_M : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeOp1, "aarch64_sve_sadalp", [VerifyRuntimeMode]>;
def SVADALP_S_X : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeAny, "aarch64_sve_sadalp", [VerifyRuntimeMode]>;
def SVADALP_S_Z : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeZero, "aarch64_sve_sadalp", [VerifyRuntimeMode]>;
@@ -1492,7 +1514,7 @@ def SVADALP_U_Z : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeZero, "aarch64_s
// SVE2 - Bitwise ternary logical instructions
//
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
def SVBCAX : SInst<"svbcax[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax", [VerifyRuntimeMode]>;
def SVBSL : SInst<"svbsl[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl", [VerifyRuntimeMode]>;
def SVBSL1N : SInst<"svbsl1n[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n", [VerifyRuntimeMode]>;
@@ -1512,7 +1534,7 @@ def SVXAR_N : SInst<"svxar[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aar
////////////////////////////////////////////////////////////////////////////////
// SVE2 - Large integer arithmetic
-let TargetGuard = "sve2" in {
+let Tar...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not for this patch but I do wonder if there's value in protecting non-bf16 instruction backed builtins (e.g. loads, stores and shuffles) with the bf16 target guard. I figure we'll either error on the use of the svbfloat
type or the code generation should just work and thus there's no reason to artificially restrict user code.
clang/utils/TableGen/SveEmitter.cpp
Outdated
assert((((Def->getGuard().contains("sve") + | ||
Def->getGuard().contains("sme")) <= 1) || | ||
Def->isFlagSet(VerifyRuntimeMode)) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given this is a build time error that represents a bug in arm_sve.td
, do you think it's worth using llvm_unreachable
rather than an assert?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes that makes sense. Done.
clang/include/clang/Basic/arm_sve.td
Outdated
def SVBFMMLA : SInst<"svbfmmla[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmmla", [IsOverloadNone]>; | ||
} | ||
|
||
let TargetGuard = "(sve,bf16)|sme" in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at the specification suggests this should be (sve|sme),bf16
?
Also, the closing }
could do with a matching comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at the specification suggests this should be (sve|sme),bf16?
+sme
implies +bf16
, so this is equivalent, although I guess that when modelling it with (sve,bf16)|sme
, Clang would still accept the builtin with +sme,+nobf16
, even though LLVM will fail to select.
clang/include/clang/Basic/arm_sve.td
Outdated
@@ -2264,6 +2278,18 @@ let TargetGuard = "sve2p1" in { | |||
defm SVPMOV_TO_VEC_LANE_D : PMOV_TO_VEC<"svpmov", "lUl", "aarch64_sve_pmov_to_vector_lane" ,[], ImmCheck1_7>; | |||
} | |||
|
|||
let TargetGuard = "sve2p1|sme2" in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is a sme2p1 feature?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good spot!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With the change of default it's very hard to check everything but we've already agreed there'll need to be a full audit once all the inflight work has landed. I did spot one thing though:
Should the integer svclamp and svrevd builtins be protected by "sve2p1|sme" rather than "sve2p1|sme2"?
clang/include/clang/Basic/arm_sve.td
Outdated
def SVBFDOT : SInst<"svbfdot[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, VerifyRuntimeMode]>; | ||
def SVBFMLALB : SInst<"svbfmlalb[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, VerifyRuntimeMode]>; | ||
def SVBFMLALT : SInst<"svbfmlalt[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone, VerifyRuntimeMode]>; | ||
def SVBFMMLA : SInst<"svbfmmla[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmmla", [IsOverloadNone, VerifyRuntimeMode]>; | ||
def SVBFDOT_N : SInst<"svbfdot[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, VerifyRuntimeMode]>; | ||
def SVBFMLAL_N : SInst<"svbfmlalb[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, VerifyRuntimeMode]>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not relevant to this patch but there's a typo here SVBFMLAL_N
should SVBFMLALB_N
.
clang/include/clang/Basic/arm_sve.td
Outdated
def SVDUP_LANEQ_D : SInst<"svdup_laneq[_{d}]", "ddi", "lUld", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; | ||
} | ||
|
||
let TargetGuard = "(sve2p1|sme2),bf16" in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should match the above and thus be "(sve2p1|sme2p1),bf16".
@@ -17,7 +25,7 @@ | |||
// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 | |||
// CPP-CHECK-NEXT: ret i64 [[TMP1]] | |||
// | |||
uint64_t test_svcntb() | |||
uint64_t test_svcntb(void) MODE_ATTR |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a problem we need to worry about with using the SME keywords with ()
functions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think so.
The ACLE describes that the SME keyword attributes cannot be used with K&R unprototyped C functions, https://github.com/ARM-software/acle/blob/main/main/acle.md#sme-keyword-attributes
Clang gives a clear diagnostic describing the issue: https://godbolt.org/z/7GabEWro1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is that restriction there? We often run into confusion when int foo() __arm_streaming;
and the like is used.
clang/utils/TableGen/SveEmitter.cpp
Outdated
if (!Def->isFlagSet(VerifyRuntimeMode) && | ||
(Def->getGuard().contains("sve") + Def->getGuard().contains("sme")) == | ||
2) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Surely this is just
if (!Def->isFlagSet(VerifyRuntimeMode) && Def->getGuard().contains("sve") && Def->getGuard().contains("sme"))
isn't it?
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/573 Here is the relevant piece of the build log for the reference:
|
…m#95787) This allows code with SVE intrinsics to be compiled with +sme,+nosve, assuming the encompassing function is in the correct mode (see llvm#93802)
This allows code with SVE intrinsics to be compiled with +sme,+nosve,
assuming the encompassing function is in the correct mode (see #93802)