Skip to content

Commit 5b0e19a

Browse files
committed
[TLI][AArch64] Add mappings to vectorized functions from ArmPL
Arm Performance Libraries contain math library which provides vectorized versions of common math functions. This patch allows to use it with clang and llvm via -fveclib=ArmPL or -vector-library=ArmPL, so loops with such calls can be vectorized. The executable needs to be linked with the amath library. Arm Performance Libraries are available at: https://developer.arm.com/Tools%20and%20Software/Arm%20Performance%20Libraries Reviewed by: paulwalker-arm Differential Revision: https://reviews.llvm.org/D154508
1 parent 221c5fb commit 5b0e19a

File tree

12 files changed

+2908
-12
lines changed

12 files changed

+2908
-12
lines changed

clang/include/clang/Basic/CodeGenOptions.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,14 @@ class CodeGenOptions : public CodeGenOptionsBase {
5555
};
5656

5757
enum VectorLibrary {
58-
NoLibrary, // Don't use any vector library.
59-
Accelerate, // Use the Accelerate framework.
60-
LIBMVEC, // GLIBC vector math library.
61-
MASSV, // IBM MASS vector library.
62-
SVML, // Intel short vector math library.
63-
SLEEF, // SLEEF SIMD Library for Evaluating Elementary Functions.
64-
Darwin_libsystem_m // Use Darwin's libsytem_m vector functions.
58+
NoLibrary, // Don't use any vector library.
59+
Accelerate, // Use the Accelerate framework.
60+
LIBMVEC, // GLIBC vector math library.
61+
MASSV, // IBM MASS vector library.
62+
SVML, // Intel short vector math library.
63+
SLEEF, // SLEEF SIMD Library for Evaluating Elementary Functions.
64+
Darwin_libsystem_m, // Use Darwin's libsytem_m vector functions.
65+
ArmPL // Arm Performance Libraries.
6566
};
6667

6768
enum ObjCDispatchMethodKind {

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2621,10 +2621,10 @@ def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group<f_clang_
26212621
Alias<fno_global_isel>;
26222622
def fveclib : Joined<["-"], "fveclib=">, Group<f_Group>, Flags<[CC1Option]>,
26232623
HelpText<"Use the given vector functions library">,
2624-
Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,none">,
2624+
Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,ArmPL,none">,
26252625
NormalizedValuesScope<"CodeGenOptions">,
26262626
NormalizedValues<["Accelerate", "LIBMVEC", "MASSV", "SVML", "SLEEF",
2627-
"Darwin_libsystem_m", "NoLibrary"]>,
2627+
"Darwin_libsystem_m", "ArmPL", "NoLibrary"]>,
26282628
MarshallingInfoEnum<CodeGenOpts<"VecLib">, "NoLibrary">;
26292629
def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group<f_Group>,
26302630
Alias<flax_vector_conversions_EQ>, AliasArgs<["none"]>;

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,10 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
284284
TLII->addVectorizableFunctionsFromVecLib(
285285
TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple);
286286
break;
287+
case CodeGenOptions::ArmPL:
288+
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL,
289+
TargetTriple);
290+
break;
287291
default:
288292
break;
289293
}

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5334,7 +5334,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
53345334
Triple.getArch() != llvm::Triple::x86_64)
53355335
D.Diag(diag::err_drv_unsupported_opt_for_target)
53365336
<< Name << Triple.getArchName();
5337-
} else if (Name == "SLEEF") {
5337+
} else if (Name == "SLEEF" || Name == "ArmPL") {
53385338
if (Triple.getArch() != llvm::Triple::aarch64 &&
53395339
Triple.getArch() != llvm::Triple::aarch64_be)
53405340
D.Diag(diag::err_drv_unsupported_opt_for_target)

clang/test/Driver/autocomplete.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
// FLTOALL-NEXT: thin
8181
// RUN: %clang --autocomplete=-fveclib= | FileCheck %s -check-prefix=FVECLIBALL
8282
// FVECLIBALL: Accelerate
83+
// FVECLIBALL-NEXT: ArmPL
8384
// FVECLIBALL-NEXT: Darwin_libsystem_m
8485
// FVECLIBALL-NEXT: libmvec
8586
// FVECLIBALL-NEXT: MASSV

clang/test/Driver/fveclib.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
// RUN: %clang -### -c -fveclib=MASSV %s 2>&1 | FileCheck -check-prefix CHECK-MASSV %s
55
// RUN: %clang -### -c -fveclib=Darwin_libsystem_m %s 2>&1 | FileCheck -check-prefix CHECK-DARWIN_LIBSYSTEM_M %s
66
// RUN: %clang -### -c --target=aarch64-none-none -fveclib=SLEEF %s 2>&1 | FileCheck -check-prefix CHECK-SLEEF %s
7+
// RUN: %clang -### -c --target=aarch64-none-none -fveclib=ArmPL %s 2>&1 | FileCheck -check-prefix CHECK-ARMPL %s
78
// RUN: not %clang -c -fveclib=something %s 2>&1 | FileCheck -check-prefix CHECK-INVALID %s
89

910
// CHECK-NOLIB: "-fveclib=none"
@@ -12,10 +13,12 @@
1213
// CHECK-MASSV: "-fveclib=MASSV"
1314
// CHECK-DARWIN_LIBSYSTEM_M: "-fveclib=Darwin_libsystem_m"
1415
// CHECK-SLEEF: "-fveclib=SLEEF"
16+
// CHECK-ARMPL: "-fveclib=ArmPL"
1517

1618
// CHECK-INVALID: error: invalid value 'something' in '-fveclib=something'
1719

1820
// RUN: not %clang --target=x86-none-none -c -fveclib=SLEEF %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s
21+
// RUN: not %clang --target=x86-none-none -c -fveclib=ArmPL %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s
1922
// RUN: not %clang --target=aarch64-none-none -c -fveclib=LIBMVEC-X86 %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s
2023
// RUN: not %clang --target=aarch64-none-none -c -fveclib=SVML %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s
2124
// CHECK-ERROR: unsupported option {{.*}} for target

llvm/include/llvm/Analysis/TargetLibraryInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ class TargetLibraryInfoImpl {
9696
LIBMVEC_X86, // GLIBC Vector Math library.
9797
MASSV, // IBM MASS vector library.
9898
SVML, // Intel short vector math library.
99-
SLEEFGNUABI // SLEEF - SIMD Library for Evaluating Elementary Functions.
99+
SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions.
100+
ArmPL // Arm Performance Libraries.
100101
};
101102

102103
TargetLibraryInfoImpl();

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,228 @@ TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED)
682682
TLI_DEFINE_VECFUNC("tgamma", "_ZGVsMxv_tgamma", SCALABLE(2), MASKED)
683683
TLI_DEFINE_VECFUNC("tgammaf", "_ZGVsMxv_tgammaf", SCALABLE(4), MASKED)
684684

685+
#elif defined(TLI_DEFINE_ARMPL_VECFUNCS)
686+
687+
TLI_DEFINE_VECFUNC("acos", "armpl_vacosq_f64", FIXED(2), NOMASK)
688+
TLI_DEFINE_VECFUNC("acosf", "armpl_vacosq_f32", FIXED(4), NOMASK)
689+
TLI_DEFINE_VECFUNC("acos", "armpl_svacos_f64_x", SCALABLE(2), MASKED)
690+
TLI_DEFINE_VECFUNC("acosf", "armpl_svacos_f32_x", SCALABLE(4), MASKED)
691+
692+
TLI_DEFINE_VECFUNC("acosh", "armpl_vacoshq_f64", FIXED(2), NOMASK)
693+
TLI_DEFINE_VECFUNC("acoshf", "armpl_vacoshq_f32", FIXED(4), NOMASK)
694+
TLI_DEFINE_VECFUNC("acosh", "armpl_svacosh_f64_x", SCALABLE(2), MASKED)
695+
TLI_DEFINE_VECFUNC("acoshf", "armpl_svacosh_f32_x", SCALABLE(4), MASKED)
696+
697+
TLI_DEFINE_VECFUNC("asin", "armpl_vasinq_f64", FIXED(2), NOMASK)
698+
TLI_DEFINE_VECFUNC("asinf", "armpl_vasinq_f32", FIXED(4), NOMASK)
699+
TLI_DEFINE_VECFUNC("asin", "armpl_svasin_f64_x", SCALABLE(2), MASKED)
700+
TLI_DEFINE_VECFUNC("asinf", "armpl_svasin_f32_x", SCALABLE(4), MASKED)
701+
702+
TLI_DEFINE_VECFUNC("asinh", "armpl_vasinhq_f64", FIXED(2), NOMASK)
703+
TLI_DEFINE_VECFUNC("asinhf", "armpl_vasinhq_f32", FIXED(4), NOMASK)
704+
TLI_DEFINE_VECFUNC("asinh", "armpl_svasinh_f64_x", SCALABLE(2), MASKED)
705+
TLI_DEFINE_VECFUNC("asinhf", "armpl_svasinh_f32_x", SCALABLE(4), MASKED)
706+
707+
TLI_DEFINE_VECFUNC("atan", "armpl_vatanq_f64", FIXED(2), NOMASK)
708+
TLI_DEFINE_VECFUNC("atanf", "armpl_vatanq_f32", FIXED(4), NOMASK)
709+
TLI_DEFINE_VECFUNC("atan", "armpl_svatan_f64_x", SCALABLE(2), MASKED)
710+
TLI_DEFINE_VECFUNC("atanf", "armpl_svatan_f32_x", SCALABLE(4), MASKED)
711+
712+
TLI_DEFINE_VECFUNC("atan2", "armpl_vatan2q_f64", FIXED(2), NOMASK)
713+
TLI_DEFINE_VECFUNC("atan2f", "armpl_vatan2q_f32", FIXED(4), NOMASK)
714+
TLI_DEFINE_VECFUNC("atan2", "armpl_svatan2_f64_x", SCALABLE(2), MASKED)
715+
TLI_DEFINE_VECFUNC("atan2f", "armpl_svatan2_f32_x", SCALABLE(4), MASKED)
716+
717+
TLI_DEFINE_VECFUNC("atanh", "armpl_vatanhq_f64", FIXED(2), NOMASK)
718+
TLI_DEFINE_VECFUNC("atanhf", "armpl_vatanhq_f32", FIXED(4), NOMASK)
719+
TLI_DEFINE_VECFUNC("atanh", "armpl_svatanh_f64_x", SCALABLE(2), MASKED)
720+
TLI_DEFINE_VECFUNC("atanhf", "armpl_svatanh_f32_x", SCALABLE(4), MASKED)
721+
722+
TLI_DEFINE_VECFUNC("cbrt", "armpl_vcbrtq_f64", FIXED(2), NOMASK)
723+
TLI_DEFINE_VECFUNC("cbrtf", "armpl_vcbrtq_f32", FIXED(4), NOMASK)
724+
TLI_DEFINE_VECFUNC("cbrt", "armpl_svcbrt_f64_x", SCALABLE(2), MASKED)
725+
TLI_DEFINE_VECFUNC("cbrtf", "armpl_svcbrt_f32_x", SCALABLE(4), MASKED)
726+
727+
TLI_DEFINE_VECFUNC("copysign", "armpl_vcopysignq_f64", FIXED(2), NOMASK)
728+
TLI_DEFINE_VECFUNC("copysignf", "armpl_vcopysignq_f32", FIXED(4), NOMASK)
729+
TLI_DEFINE_VECFUNC("copysign", "armpl_svcopysign_f64_x", SCALABLE(2), MASKED)
730+
TLI_DEFINE_VECFUNC("copysignf", "armpl_svcopysign_f32_x", SCALABLE(4), MASKED)
731+
732+
TLI_DEFINE_VECFUNC("cos", "armpl_vcosq_f64", FIXED(2), NOMASK)
733+
TLI_DEFINE_VECFUNC("cosf", "armpl_vcosq_f32", FIXED(4), NOMASK)
734+
TLI_DEFINE_VECFUNC("cos", "armpl_svcos_f64_x", SCALABLE(2), MASKED)
735+
TLI_DEFINE_VECFUNC("cosf", "armpl_svcos_f32_x", SCALABLE(4), MASKED)
736+
737+
TLI_DEFINE_VECFUNC("llvm.cos.f64", "armpl_vcosq_f64", FIXED(2), NOMASK)
738+
TLI_DEFINE_VECFUNC("llvm.cos.f32", "armpl_vcosq_f32", FIXED(4), NOMASK)
739+
TLI_DEFINE_VECFUNC("llvm.cos.f64", "armpl_svcos_f64_x", SCALABLE(2), MASKED)
740+
TLI_DEFINE_VECFUNC("llvm.cos.f32", "armpl_svcos_f32_x", SCALABLE(4), MASKED)
741+
742+
TLI_DEFINE_VECFUNC("cosh", "armpl_vcoshq_f64", FIXED(2), NOMASK)
743+
TLI_DEFINE_VECFUNC("coshf", "armpl_vcoshq_f32", FIXED(4), NOMASK)
744+
TLI_DEFINE_VECFUNC("cosh", "armpl_svcosh_f64_x", SCALABLE(2), MASKED)
745+
TLI_DEFINE_VECFUNC("coshf", "armpl_svcosh_f32_x", SCALABLE(4), MASKED)
746+
747+
TLI_DEFINE_VECFUNC("erf", "armpl_verfq_f64", FIXED(2), NOMASK)
748+
TLI_DEFINE_VECFUNC("erff", "armpl_verfq_f32", FIXED(4), NOMASK)
749+
TLI_DEFINE_VECFUNC("erf", "armpl_sverf_f64_x", SCALABLE(2), MASKED)
750+
TLI_DEFINE_VECFUNC("erff", "armpl_sverf_f32_x", SCALABLE(4), MASKED)
751+
752+
TLI_DEFINE_VECFUNC("erfc", "armpl_verfcq_f64", FIXED(2), NOMASK)
753+
TLI_DEFINE_VECFUNC("erfcf", "armpl_verfcq_f32", FIXED(4), NOMASK)
754+
TLI_DEFINE_VECFUNC("erfc", "armpl_sverfc_f64_x", SCALABLE(2), MASKED)
755+
TLI_DEFINE_VECFUNC("erfcf", "armpl_sverfc_f32_x", SCALABLE(4), MASKED)
756+
757+
TLI_DEFINE_VECFUNC("exp", "armpl_vexpq_f64", FIXED(2), NOMASK)
758+
TLI_DEFINE_VECFUNC("expf", "armpl_vexpq_f32", FIXED(4), NOMASK)
759+
TLI_DEFINE_VECFUNC("exp", "armpl_svexp_f64_x", SCALABLE(2), MASKED)
760+
TLI_DEFINE_VECFUNC("expf", "armpl_svexp_f32_x", SCALABLE(4), MASKED)
761+
762+
TLI_DEFINE_VECFUNC("llvm.exp.f64", "armpl_vexpq_f64", FIXED(2), NOMASK)
763+
TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_vexpq_f32", FIXED(4), NOMASK)
764+
TLI_DEFINE_VECFUNC("llvm.exp.f64", "armpl_svexp_f64_x", SCALABLE(2), MASKED)
765+
TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_svexp_f32_x", SCALABLE(4), MASKED)
766+
767+
TLI_DEFINE_VECFUNC("exp2", "armpl_vexp2q_f64", FIXED(2), NOMASK)
768+
TLI_DEFINE_VECFUNC("exp2f", "armpl_vexp2q_f32", FIXED(4), NOMASK)
769+
TLI_DEFINE_VECFUNC("exp2", "armpl_svexp2_f64_x", SCALABLE(2), MASKED)
770+
TLI_DEFINE_VECFUNC("exp2f", "armpl_svexp2_f32_x", SCALABLE(4), MASKED)
771+
772+
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_vexp2q_f64", FIXED(2), NOMASK)
773+
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_vexp2q_f32", FIXED(4), NOMASK)
774+
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_svexp2_f64_x", SCALABLE(2), MASKED)
775+
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_svexp2_f32_x", SCALABLE(4), MASKED)
776+
777+
TLI_DEFINE_VECFUNC("exp10", "armpl_vexp10q_f64", FIXED(2), NOMASK)
778+
TLI_DEFINE_VECFUNC("exp10f", "armpl_vexp10q_f32", FIXED(4), NOMASK)
779+
TLI_DEFINE_VECFUNC("exp10", "armpl_svexp10_f64_x", SCALABLE(2), MASKED)
780+
TLI_DEFINE_VECFUNC("exp10f", "armpl_svexp10_f32_x", SCALABLE(4), MASKED)
781+
782+
TLI_DEFINE_VECFUNC("expm1", "armpl_vexpm1q_f64", FIXED(2), NOMASK)
783+
TLI_DEFINE_VECFUNC("expm1f", "armpl_vexpm1q_f32", FIXED(4), NOMASK)
784+
TLI_DEFINE_VECFUNC("expm1", "armpl_svexpm1_f64_x", SCALABLE(2), MASKED)
785+
TLI_DEFINE_VECFUNC("expm1f", "armpl_svexpm1_f32_x", SCALABLE(4), MASKED)
786+
787+
TLI_DEFINE_VECFUNC("fdim", "armpl_vfdimq_f64", FIXED(2), NOMASK)
788+
TLI_DEFINE_VECFUNC("fdimf", "armpl_vfdimq_f32", FIXED(4), NOMASK)
789+
TLI_DEFINE_VECFUNC("fdim", "armpl_svfdim_f64_x", SCALABLE(2), MASKED)
790+
TLI_DEFINE_VECFUNC("fdimf", "armpl_svfdim_f32_x", SCALABLE(4), MASKED)
791+
792+
TLI_DEFINE_VECFUNC("fma", "armpl_vfmaq_f64", FIXED(2), NOMASK)
793+
TLI_DEFINE_VECFUNC("fmaf", "armpl_vfmaq_f32", FIXED(4), NOMASK)
794+
TLI_DEFINE_VECFUNC("fma", "armpl_svfma_f64_x", SCALABLE(2), MASKED)
795+
TLI_DEFINE_VECFUNC("fmaf", "armpl_svfma_f32_x", SCALABLE(4), MASKED)
796+
797+
TLI_DEFINE_VECFUNC("fmin", "armpl_vfminq_f64", FIXED(2), NOMASK)
798+
TLI_DEFINE_VECFUNC("fminf", "armpl_vfminq_f32", FIXED(4), NOMASK)
799+
TLI_DEFINE_VECFUNC("fmin", "armpl_svfmin_f64_x", SCALABLE(2), MASKED)
800+
TLI_DEFINE_VECFUNC("fminf", "armpl_svfmin_f32_x", SCALABLE(4), MASKED)
801+
802+
TLI_DEFINE_VECFUNC("fmod", "armpl_vfmodq_f64", FIXED(2), NOMASK)
803+
TLI_DEFINE_VECFUNC("fmodf", "armpl_vfmodq_f32", FIXED(4), NOMASK)
804+
TLI_DEFINE_VECFUNC("fmod", "armpl_svfmod_f64_x", SCALABLE(2), MASKED)
805+
TLI_DEFINE_VECFUNC("fmodf", "armpl_svfmod_f32_x", SCALABLE(4), MASKED)
806+
807+
TLI_DEFINE_VECFUNC("hypot", "armpl_vhypotq_f64", FIXED(2), NOMASK)
808+
TLI_DEFINE_VECFUNC("hypotf", "armpl_vhypotq_f32", FIXED(4), NOMASK)
809+
TLI_DEFINE_VECFUNC("hypot", "armpl_svhypot_f64_x", SCALABLE(2), MASKED)
810+
TLI_DEFINE_VECFUNC("hypotf", "armpl_svhypot_f32_x", SCALABLE(4), MASKED)
811+
812+
TLI_DEFINE_VECFUNC("lgamma", "armpl_vlgammaq_f64", FIXED(2), NOMASK)
813+
TLI_DEFINE_VECFUNC("lgammaf", "armpl_vlgammaq_f32", FIXED(4), NOMASK)
814+
TLI_DEFINE_VECFUNC("lgamma", "armpl_svlgamma_f64_x", SCALABLE(2), MASKED)
815+
TLI_DEFINE_VECFUNC("lgammaf", "armpl_svlgamma_f32_x", SCALABLE(4), MASKED)
816+
817+
TLI_DEFINE_VECFUNC("log", "armpl_vlogq_f64", FIXED(2), NOMASK)
818+
TLI_DEFINE_VECFUNC("logf", "armpl_vlogq_f32", FIXED(4), NOMASK)
819+
TLI_DEFINE_VECFUNC("log", "armpl_svlog_f64_x", SCALABLE(2), MASKED)
820+
TLI_DEFINE_VECFUNC("logf", "armpl_svlog_f32_x", SCALABLE(4), MASKED)
821+
822+
TLI_DEFINE_VECFUNC("llvm.log.f64", "armpl_vlogq_f64", FIXED(2), NOMASK)
823+
TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_vlogq_f32", FIXED(4), NOMASK)
824+
TLI_DEFINE_VECFUNC("llvm.log.f64", "armpl_svlog_f64_x", SCALABLE(2), MASKED)
825+
TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_svlog_f32_x", SCALABLE(4), MASKED)
826+
827+
TLI_DEFINE_VECFUNC("log1p", "armpl_vlog1pq_f64", FIXED(2), NOMASK)
828+
TLI_DEFINE_VECFUNC("log1pf", "armpl_vlog1pq_f32", FIXED(4), NOMASK)
829+
TLI_DEFINE_VECFUNC("log1p", "armpl_svlog1p_f64_x", SCALABLE(2), MASKED)
830+
TLI_DEFINE_VECFUNC("log1pf", "armpl_svlog1p_f32_x", SCALABLE(4), MASKED)
831+
832+
TLI_DEFINE_VECFUNC("log2", "armpl_vlog2q_f64", FIXED(2), NOMASK)
833+
TLI_DEFINE_VECFUNC("log2f", "armpl_vlog2q_f32", FIXED(4), NOMASK)
834+
TLI_DEFINE_VECFUNC("log2", "armpl_svlog2_f64_x", SCALABLE(2), MASKED)
835+
TLI_DEFINE_VECFUNC("log2f", "armpl_svlog2_f32_x", SCALABLE(4), MASKED)
836+
837+
TLI_DEFINE_VECFUNC("llvm.log2.f64", "armpl_vlog2q_f64", FIXED(2), NOMASK)
838+
TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_vlog2q_f32", FIXED(4), NOMASK)
839+
TLI_DEFINE_VECFUNC("llvm.log2.f64", "armpl_svlog2_f64_x", SCALABLE(2), MASKED)
840+
TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_svlog2_f32_x", SCALABLE(4), MASKED)
841+
842+
TLI_DEFINE_VECFUNC("log10", "armpl_vlog10q_f64", FIXED(2), NOMASK)
843+
TLI_DEFINE_VECFUNC("log10f", "armpl_vlog10q_f32", FIXED(4), NOMASK)
844+
TLI_DEFINE_VECFUNC("log10", "armpl_svlog10_f64_x", SCALABLE(2), MASKED)
845+
TLI_DEFINE_VECFUNC("log10f", "armpl_svlog10_f32_x", SCALABLE(4), MASKED)
846+
847+
TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_vlog10q_f64", FIXED(2), NOMASK)
848+
TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK)
849+
TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED)
850+
TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED)
851+
852+
TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK)
853+
TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK)
854+
TLI_DEFINE_VECFUNC("nextafter", "armpl_svnextafter_f64_x", SCALABLE(2), MASKED)
855+
TLI_DEFINE_VECFUNC("nextafterf", "armpl_svnextafter_f32_x", SCALABLE(4), MASKED)
856+
857+
TLI_DEFINE_VECFUNC("pow", "armpl_vpowq_f64", FIXED(2), NOMASK)
858+
TLI_DEFINE_VECFUNC("powf", "armpl_vpowq_f32", FIXED(4), NOMASK)
859+
TLI_DEFINE_VECFUNC("pow", "armpl_svpow_f64_x", SCALABLE(2), MASKED)
860+
TLI_DEFINE_VECFUNC("powf", "armpl_svpow_f32_x", SCALABLE(4), MASKED)
861+
862+
TLI_DEFINE_VECFUNC("llvm.pow.f64", "armpl_vpowq_f64", FIXED(2), NOMASK)
863+
TLI_DEFINE_VECFUNC("llvm.pow.f32", "armpl_vpowq_f32", FIXED(4), NOMASK)
864+
TLI_DEFINE_VECFUNC("llvm.pow.f64", "armpl_svpow_f64_x", SCALABLE(2), MASKED)
865+
TLI_DEFINE_VECFUNC("llvm.pow.f32", "armpl_svpow_f32_x", SCALABLE(4), MASKED)
866+
867+
TLI_DEFINE_VECFUNC("sin", "armpl_vsinq_f64", FIXED(2), NOMASK)
868+
TLI_DEFINE_VECFUNC("sinf", "armpl_vsinq_f32", FIXED(4), NOMASK)
869+
TLI_DEFINE_VECFUNC("sin", "armpl_svsin_f64_x", SCALABLE(2), MASKED)
870+
TLI_DEFINE_VECFUNC("sinf", "armpl_svsin_f32_x", SCALABLE(4), MASKED)
871+
872+
TLI_DEFINE_VECFUNC("llvm.sin.f64", "armpl_vsinq_f64", FIXED(2), NOMASK)
873+
TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_vsinq_f32", FIXED(4), NOMASK)
874+
TLI_DEFINE_VECFUNC("llvm.sin.f64", "armpl_svsin_f64_x", SCALABLE(2), MASKED)
875+
TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED)
876+
877+
TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK)
878+
TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK)
879+
TLI_DEFINE_VECFUNC("sinh", "armpl_svsinh_f64_x", SCALABLE(2), MASKED)
880+
TLI_DEFINE_VECFUNC("sinhf", "armpl_svsinh_f32_x", SCALABLE(4), MASKED)
881+
882+
TLI_DEFINE_VECFUNC("sinpi", "armpl_vsinpiq_f64", FIXED(2), NOMASK)
883+
TLI_DEFINE_VECFUNC("sinpif", "armpl_vsinpiq_f32", FIXED(4), NOMASK)
884+
TLI_DEFINE_VECFUNC("sinpi", "armpl_svsinpi_f64_x", SCALABLE(2), MASKED)
885+
TLI_DEFINE_VECFUNC("sinpif", "armpl_svsinpi_f32_x", SCALABLE(4), MASKED)
886+
887+
TLI_DEFINE_VECFUNC("sqrt", "armpl_vsqrtq_f64", FIXED(2), NOMASK)
888+
TLI_DEFINE_VECFUNC("sqrtf", "armpl_vsqrtq_f32", FIXED(4), NOMASK)
889+
TLI_DEFINE_VECFUNC("sqrt", "armpl_svsqrt_f64_x", SCALABLE(2), MASKED)
890+
TLI_DEFINE_VECFUNC("sqrtf", "armpl_svsqrt_f32_x", SCALABLE(4), MASKED)
891+
892+
TLI_DEFINE_VECFUNC("tan", "armpl_vtanq_f64", FIXED(2), NOMASK)
893+
TLI_DEFINE_VECFUNC("tanf", "armpl_vtanq_f32", FIXED(4), NOMASK)
894+
TLI_DEFINE_VECFUNC("tan", "armpl_svtan_f64_x", SCALABLE(2), MASKED)
895+
TLI_DEFINE_VECFUNC("tanf", "armpl_svtan_f32_x", SCALABLE(4), MASKED)
896+
897+
TLI_DEFINE_VECFUNC("tanh", "armpl_vtanhq_f64", FIXED(2), NOMASK)
898+
TLI_DEFINE_VECFUNC("tanhf", "armpl_vtanhq_f32", FIXED(4), NOMASK)
899+
TLI_DEFINE_VECFUNC("tanh", "armpl_svtanh_f64_x", SCALABLE(2), MASKED)
900+
TLI_DEFINE_VECFUNC("tanhf", "armpl_svtanh_f32_x", SCALABLE(4), MASKED)
901+
902+
TLI_DEFINE_VECFUNC("tgamma", "armpl_vtgammaq_f64", FIXED(2), NOMASK)
903+
TLI_DEFINE_VECFUNC("tgammaf", "armpl_vtgammaq_f32", FIXED(4), NOMASK)
904+
TLI_DEFINE_VECFUNC("tgamma", "armpl_svtgamma_f64_x", SCALABLE(2), MASKED)
905+
TLI_DEFINE_VECFUNC("tgammaf", "armpl_svtgamma_f32_x", SCALABLE(4), MASKED)
906+
685907
#else
686908
#error "Must choose which vector library functions are to be defined."
687909
#endif
@@ -701,3 +923,4 @@ TLI_DEFINE_VECFUNC("tgammaf", "_ZGVsMxv_tgammaf", SCALABLE(4), MASKED)
701923
#undef TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS
702924
#undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS
703925
#undef TLI_DEFINE_MASSV_VECFUNCS_NAMES
926+
#undef TLI_DEFINE_ARMPL_VECFUNCS

llvm/lib/Analysis/TargetLibraryInfo.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
3333
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
3434
"Intel SVML library"),
3535
clEnumValN(TargetLibraryInfoImpl::SLEEFGNUABI, "sleefgnuabi",
36-
"SIMD Library for Evaluating Elementary Functions")));
36+
"SIMD Library for Evaluating Elementary Functions"),
37+
clEnumValN(TargetLibraryInfoImpl::ArmPL, "ArmPL",
38+
"Arm Performance Libraries")));
3739

3840
StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
3941
{
@@ -1215,6 +1217,23 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
12151217
}
12161218
break;
12171219
}
1220+
case ArmPL: {
1221+
const VecDesc VecFuncs[] = {
1222+
#define TLI_DEFINE_ARMPL_VECFUNCS
1223+
#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK) {SCAL, VEC, VF, MASK},
1224+
#include "llvm/Analysis/VecFuncs.def"
1225+
};
1226+
1227+
switch (TargetTriple.getArch()) {
1228+
default:
1229+
break;
1230+
case llvm::Triple::aarch64:
1231+
case llvm::Triple::aarch64_be:
1232+
addVectorizableFunctions(VecFuncs);
1233+
break;
1234+
}
1235+
break;
1236+
}
12181237
case NoLibrary:
12191238
break;
12201239
}

0 commit comments

Comments
 (0)