Skip to content

[SYCL] Refactor HW targeted extensions into the INTEL namespace #2246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Aug 17, 2020
Merged
4 changes: 2 additions & 2 deletions llvm/lib/SYCLLowerIR/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1180,7 +1180,7 @@ void SYCLLowerESIMDLegacyPass::collectGenXVolatileType(Module &M) {
if (!PTy)
continue;
auto GTy = dyn_cast<StructType>(PTy->getPointerElementType());
if (!GTy || !GTy->getName().endswith("cl::sycl::intel::gpu::simd"))
if (!GTy || !GTy->getName().endswith("cl::sycl::INTEL::gpu::simd"))
continue;
assert(GTy->getNumContainedTypes() == 1);
auto VTy = GTy->getContainedType(0);
Expand Down Expand Up @@ -1238,7 +1238,7 @@ PreservedAnalyses SYCLLowerESIMDPass::run(Function &F,

// process ESIMD builtins that go through special handling instead of
// the translation procedure
if (Name.startswith("N2cl4sycl5intel3gpu8slm_init")) {
if (Name.startswith("N2cl4sycl5INTEL3gpu8slm_init")) {
// tag the kernel with meta-data SLMSize, and remove this builtin
translateSLMInit(*CI);
ESIMDToErases.push_back(CI);
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/SYCLLowerIR/esimd_global.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown-sycldevice"

%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" = type { <16 x i32> }
%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" = type { <16 x i32> }

$"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" = comdat any

; CHECK: [[NEWGLOBAL:[@a-zA-Z0-9_]*]] = dso_local global <16 x i32> zeroinitializer, align 64 #0
@0 = dso_local global %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" zeroinitializer, align 64 #0
@0 = dso_local global %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" zeroinitializer, align 64 #0

; Function Attrs: norecurse
define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test"(i32 addrspace(1)* %_arg_) local_unnamed_addr #1 comdat !kernel_arg_addr_space !8 !kernel_arg_access_qual !9 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !11 !sycl_explicit_simd !12 !intel_reqd_sub_group_size !8 {
entry:
%vc.i = alloca %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd", align 64
%agg.tmp.i = alloca %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd", align 64
%vc.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64
%agg.tmp.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64
%call.esimd.i.i.i.i.i = call <3 x i32> @llvm.genx.local.id.v3i32() #5
%local_id.y.i.i.i.i.i = extractelement <3 x i32> %call.esimd.i.i.i.i.i, i32 1
%local_id.y.cast.ty.i.i.i.i.i = zext i32 %local_id.y.i.i.i.i.i to i64
Expand All @@ -36,15 +36,15 @@ entry:
%group.id.x.cast.ty.i.i.i.i.i = zext i32 %group.id.x.i.i.i.i.i to i64
%mul.i4.i.i.i.i = mul nuw i64 %group.id.x.cast.ty.i.i.i.i.i, %wgsize.x.cast.ty.i.i.i.i.i
%add.i5.i.i.i.i = add i64 %mul.i4.i.i.i.i, %local_id.x.cast.ty.i.i.i.i.i
%0 = bitcast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd"* %agg.tmp.i to i8*
%0 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp.i to i8*
call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0)
%1 = bitcast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd"* %vc.i to i8*
%1 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %vc.i to i8*
call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %1) #5
%conv.i = trunc i64 %add.i5.i.i.i.i to i32
%2 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd"* %vc.i to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" addrspace(4)*
%2 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %vc.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*
%splat.splatinsert.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0
%splat.splat.i.i = shufflevector <16 x i32> %splat.splatinsert.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
%M_data.i13.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd", %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" addrspace(4)* %2, i64 0, i32 0
%M_data.i13.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %2, i64 0, i32 0
store <16 x i32> %splat.splat.i.i, <16 x i32> addrspace(4)* %M_data.i13.i, align 64, !tbaa !13
%conv3.i = trunc i64 %add.i.i.i.i.i to i32
%splat.splatinsert.i20.i = insertelement <8 x i32> undef, i32 %conv3.i, i32 0
Expand All @@ -56,17 +56,17 @@ entry:
%..i = select i1 %cmp.i, i64 %add.i5.i.i.i.i, i64 %add.i.i.i.i.i
%conv9.i = trunc i64 %..i to i32
; CHECK: store <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds ({{.+}}, {{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*), i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16
store <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd", %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd"* @0, i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16
store <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0, i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16
%mul.i = shl nsw i32 %conv9.i, 4
%idx.ext.i = sext i32 %mul.i to i64
%add.ptr.i16 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %idx.ext.i
%add.ptr.i = addrspacecast i32 addrspace(1)* %add.ptr.i16 to i32 addrspace(4)*
%3 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd"* %agg.tmp.i to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" addrspace(4)*
%3 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*
%call.esimd.i.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i13.i) #5
%M_data.i2.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd", %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" addrspace(4)* %3, i64 0, i32 0
%M_data.i2.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %3, i64 0, i32 0
call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %call.esimd.i.i.i, <16 x i32> addrspace(4)* %M_data.i2.i.i) #5
call spir_func void @_Z3fooPiN2cl4sycl5intel3gpu4simdIiLi16EEE(i32 addrspace(4)* %add.ptr.i, %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd"* nonnull %agg.tmp.i) #5
store <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd", %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd"* @0, i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16
call spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(i32 addrspace(4)* %add.ptr.i, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* nonnull %agg.tmp.i) #5
store <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0, i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16
call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %1) #5
call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0)
ret void
Expand All @@ -79,17 +79,17 @@ declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #2
declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #2

; Function Attrs: noinline norecurse nounwind
define dso_local spir_func void @_Z3fooPiN2cl4sycl5intel3gpu4simdIiLi16EEE(i32 addrspace(4)* %C, %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd"* %v) local_unnamed_addr #3 !sycl_explicit_simd !12 {
define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(i32 addrspace(4)* %C, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %v) local_unnamed_addr #3 !sycl_explicit_simd !12 {
entry:
%agg.tmp = alloca %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd", align 64
%0 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd"* %v to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" addrspace(4)*
%1 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd"* %agg.tmp to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" addrspace(4)*
%M_data.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd", %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" addrspace(4)* %0, i64 0, i32 0
%agg.tmp = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64
%0 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %v to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*
%1 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*
%M_data.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %0, i64 0, i32 0
%call.esimd.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i.i), !noalias !17
; CHECK: {{.+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr ({{.+}}, {{.+}} addrspace(4)* addrspacecast ({{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*) to {{.+}} addrspace(4)*), i64 0, i32 0)), !noalias !17
%call.esimd.i8.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd", %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" addrspace(4)* addrspacecast (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd"* @0 to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" addrspace(4)*), i64 0, i32 0)), !noalias !17
%call.esimd.i8.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* addrspacecast (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0 to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*), i64 0, i32 0)), !noalias !17
%add.i = add <16 x i32> %call.esimd.i8.i, %call.esimd.i.i
%M_data.i.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd", %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE.cl::sycl::intel::gpu::simd" addrspace(4)* %1, i64 0, i32 0
%M_data.i.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %1, i64 0, i32 0
call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %add.i, <16 x i32> addrspace(4)* %M_data.i.i.i)
%2 = ptrtoint i32 addrspace(4)* %C to i64
%call.esimd.i.i2 = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i.i.i)
Expand Down Expand Up @@ -153,8 +153,8 @@ attributes #5 = { nounwind }
!15 = !{!"Simple C++ TBAA"}
!16 = !{i64 0, i64 64, !13}
!17 = !{!18}
!18 = distinct !{!18, !19, !"_ZNK2cl4sycl5intel3gpu4simdIiLi16EEplERKS4_: %agg.result"}
!19 = distinct !{!19, !"_ZNK2cl4sycl5intel3gpu4simdIiLi16EEplERKS4_"}
!18 = distinct !{!18, !19, !"_ZNK2cl4sycl5INTEL3gpu4simdIiLi16EEplERKS4_: %agg.result"}
!19 = distinct !{!19, !"_ZNK2cl4sycl5INTEL3gpu4simdIiLi16EEplERKS4_"}
!20 = !{i32 8275}
!21 = !{i32 8268}
!22 = !{i32 8269}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/SYCLLowerIR/esimd_lower_intrins.ll
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ define dso_local spir_func void @FUNC_29() !sycl_explicit_simd !1 {

define dso_local spir_kernel void @FUNC_30() !sycl_explicit_simd !1 {
; CHECK: define dso_local spir_kernel void @FUNC_30() !sycl_explicit_simd !1
call spir_func void @_ZN2cl4sycl5intel3gpu8slm_initEj(i32 1023)
call spir_func void @_ZN2cl4sycl5INTEL3gpu8slm_initEj(i32 1023)
ret void
; CHECK-NEXT: ret void
}
Expand Down Expand Up @@ -210,7 +210,7 @@ declare dso_local spir_func <32 x i32> @_Z24__esimd_media_block_loadIiLi4ELi8E14
declare dso_local spir_func void @_Z25__esimd_media_block_storeIiLi4ELi8E14ocl_image2d_woEvjT2_jjjjN2cm3gen13__vector_typeIT_XmlT0_T1_EE4typeE(i32 %0, %opencl.image2d_wo_t addrspace(1)* %1, i32 %2, i32 %3, i32 %4, i32 %5, <32 x i32> %6)
declare dso_local spir_func <32 x i32> @_Z13__esimd_vloadIiLi32EEN2cm3gen13__vector_typeIT_XT0_EE4typeEPKS5_(<32 x i32> addrspace(4)* %0)
declare dso_local spir_func void @_Z14__esimd_vstoreIfLi16EEvPN2cm3gen13__vector_typeIT_XT0_EE4typeES5_(<16 x float> addrspace(4)* %0, <16 x float> %1)
declare dso_local spir_func void @_ZN2cl4sycl5intel3gpu8slm_initEj(i32)
declare dso_local spir_func void @_ZN2cl4sycl5INTEL3gpu8slm_initEj(i32)

attributes #0 = { "genx_byte_offset"="192" "genx_volatile" }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@

#pragma once

#include <CL/sycl/intel/esimd/esimd.hpp>
#include <CL/sycl/intel/esimd/esimd_math.hpp>
#include <CL/sycl/intel/esimd/esimd_memory.hpp>
#include <CL/sycl/intel/esimd/esimd_view.hpp>
#include <CL/sycl/INTEL/esimd/esimd.hpp>
#include <CL/sycl/INTEL/esimd/esimd_math.hpp>
#include <CL/sycl/INTEL/esimd/esimd_memory.hpp>
#include <CL/sycl/INTEL/esimd/esimd_view.hpp>

#ifdef __SYCL_DEVICE_ONLY__
#define SYCL_ESIMD_KERNEL __attribute__((sycl_explicit_simd))
Expand Down
Loading