Skip to content

Commit ba0e9b9

Browse files
IanButterworthgbaraldiDilumAluthge
authored and
KristofferC
committed
Actually setup jit targets when compiling packageimages instead of targeting only one (#54471)
Co-authored-by: Gabriel Baraldi <baraldigabriel@gmail.com> Co-authored-by: Dilum Aluthge <dilum@aluthge.com>
1 parent 34a8c47 commit ba0e9b9

6 files changed

+162
-12
lines changed

src/codegen.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7260,8 +7260,11 @@ static Function* gen_cfun_wrapper(
72607260
ctx.builder.ClearInsertionPoint();
72617261

72627262
if (aliasname) {
7263-
GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
7263+
auto alias = GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
72647264
GlobalValue::ExternalLinkage, aliasname, cw, M);
7265+
if(ctx.emission_context.TargetTriple.isOSBinFormatCOFF()) {
7266+
alias->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
7267+
}
72657268
}
72667269

72677270
if (nest) {

src/llvm-multiversioning.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,7 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
673673
trampoline->removeFnAttr("julia.mv.reloc");
674674
trampoline->removeFnAttr("julia.mv.clones");
675675
trampoline->addFnAttr("julia.mv.alias");
676+
trampoline->setDLLStorageClass(alias->getDLLStorageClass());
676677
alias->eraseFromParent();
677678

678679
uint32_t id;

src/processor_arm.cpp

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1890,12 +1890,56 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
18901890
return res;
18911891
}
18921892

1893+
#ifndef __clang_gcanalyzer__
18931894
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
18941895
{
1895-
if (jit_targets.empty())
1896-
jl_error("JIT targets not initialized");
1896+
1897+
auto &cmdline = get_cmdline_targets();
1898+
check_cmdline(cmdline, true);
1899+
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
1900+
for (auto &arg: cmdline) {
1901+
auto data = arg_target_data(arg, image_targets.empty());
1902+
image_targets.push_back(std::move(data));
1903+
}
1904+
auto ntargets = image_targets.size();
1905+
if (image_targets.empty())
1906+
jl_error("No targets specified");
18971907
llvm::SmallVector<jl_target_spec_t, 0> res;
1898-
for (auto &target: jit_targets) {
1908+
// Now decide the clone condition.
1909+
for (size_t i = 1; i < ntargets; i++) {
1910+
auto &t = image_targets[i];
1911+
if (t.en.flags & JL_TARGET_CLONE_ALL)
1912+
continue;
1913+
auto &features0 = image_targets[t.base].en.features;
1914+
// Always clone when code checks CPU features
1915+
t.en.flags |= JL_TARGET_CLONE_CPU;
1916+
static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16};
1917+
for (auto fe: clone_fp16) {
1918+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1919+
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
1920+
break;
1921+
}
1922+
}
1923+
// The most useful one in general...
1924+
t.en.flags |= JL_TARGET_CLONE_LOOP;
1925+
#ifdef _CPU_ARM_
1926+
static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon};
1927+
for (auto fe: clone_math) {
1928+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1929+
t.en.flags |= JL_TARGET_CLONE_MATH;
1930+
break;
1931+
}
1932+
}
1933+
static constexpr uint32_t clone_simd[] = {Feature::neon};
1934+
for (auto fe: clone_simd) {
1935+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1936+
t.en.flags |= JL_TARGET_CLONE_SIMD;
1937+
break;
1938+
}
1939+
}
1940+
#endif
1941+
}
1942+
for (auto &target: image_targets) {
18991943
auto features_en = target.en.features;
19001944
auto features_dis = target.dis.features;
19011945
for (auto &fename: feature_names) {
@@ -1916,6 +1960,8 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
19161960
return res;
19171961
}
19181962

1963+
#endif
1964+
19191965
extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
19201966
{
19211967
if (feature >= 32 * feature_sz)

src/processor_fallback.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,13 +144,27 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
144144
jl_get_cpu_features_llvm(), {{}, 0}, {{}, 0}, 0});
145145
return res;
146146
}
147-
147+
#ifndef __clang_gcanalyzer__
148148
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
149149
{
150-
if (jit_targets.empty())
151-
jl_error("JIT targets not initialized");
150+
151+
auto &cmdline = get_cmdline_targets();
152+
check_cmdline(cmdline, true);
153+
llvm::SmallVector<TargetData<1>, 0> image_targets;
154+
for (auto &arg: cmdline) {
155+
auto data = arg_target_data(arg, image_targets.empty());
156+
image_targets.push_back(std::move(data));
157+
}
158+
auto ntargets = image_targets.size();
159+
// Now decide the clone condition.
160+
for (size_t i = 1; i < ntargets; i++) {
161+
auto &t = image_targets[i];
162+
t.en.flags |= JL_TARGET_CLONE_ALL;
163+
}
164+
if (image_targets.empty())
165+
jl_error("No image targets found");
152166
llvm::SmallVector<jl_target_spec_t, 0> res;
153-
for (auto &target: jit_targets) {
167+
for (auto &target: image_targets) {
154168
jl_target_spec_t ele;
155169
std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target);
156170
ele.data = serialize_target_data(target.name, target.en.features,
@@ -161,6 +175,7 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
161175
}
162176
return res;
163177
}
178+
#endif
164179

165180
JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
166181
{

src/processor_x86.cpp

Lines changed: 76 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,8 @@ static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason)
910910
return match.best_idx;
911911
}
912912

913+
//This function serves as a fallback during bootstrapping, at that point we don't have a sysimage with native code
914+
// so we won't call sysimg_init_cb, else this function shouldn't do anything.
913915
static void ensure_jit_target(bool imaging)
914916
{
915917
auto &cmdline = get_cmdline_targets();
@@ -1102,13 +1104,82 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
11021104
{feature_masks, 0}, {{}, 0}, 0});
11031105
return res;
11041106
}
1105-
1107+
//This function parses the -C command line to figure out which targets to multiversion to.
1108+
#ifndef __clang_gcanalyzer__
11061109
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
11071110
{
1108-
if (jit_targets.empty())
1109-
jl_error("JIT targets not initialized");
1111+
auto &cmdline = get_cmdline_targets();
1112+
check_cmdline(cmdline, true);
1113+
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
1114+
for (auto &arg: cmdline) {
1115+
auto data = arg_target_data(arg, image_targets.empty());
1116+
image_targets.push_back(std::move(data));
1117+
}
1118+
1119+
auto ntargets = image_targets.size();
1120+
// Now decide the clone condition.
1121+
for (size_t i = 1; i < ntargets; i++) {
1122+
auto &t = image_targets[i];
1123+
if (t.en.flags & JL_TARGET_CLONE_ALL)
1124+
continue;
1125+
// Always clone when code checks CPU features
1126+
t.en.flags |= JL_TARGET_CLONE_CPU;
1127+
// The most useful one in general...
1128+
t.en.flags |= JL_TARGET_CLONE_LOOP;
1129+
auto &features0 = image_targets[t.base].en.features;
1130+
// Special case for KNL/KNM since they're so different
1131+
if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) {
1132+
if ((t.name == "knl" || t.name == "knm") &&
1133+
image_targets[t.base].name != "knl" && image_targets[t.base].name != "knm") {
1134+
t.en.flags |= JL_TARGET_CLONE_ALL;
1135+
break;
1136+
}
1137+
}
1138+
static constexpr uint32_t clone_math[] = {Feature::fma, Feature::fma4};
1139+
static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
1140+
Feature::sse41, Feature::sse42,
1141+
Feature::avx, Feature::avx2,
1142+
Feature::vaes, Feature::vpclmulqdq,
1143+
Feature::sse4a, Feature::avx512f,
1144+
Feature::avx512dq, Feature::avx512ifma,
1145+
Feature::avx512pf, Feature::avx512er,
1146+
Feature::avx512cd, Feature::avx512bw,
1147+
Feature::avx512vl, Feature::avx512vbmi,
1148+
Feature::avx512vpopcntdq, Feature::avxvnni,
1149+
Feature::avx512vbmi2, Feature::avx512vnni,
1150+
Feature::avx512bitalg, Feature::avx512bf16,
1151+
Feature::avx512vp2intersect, Feature::avx512fp16};
1152+
for (auto fe: clone_math) {
1153+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1154+
t.en.flags |= JL_TARGET_CLONE_MATH;
1155+
break;
1156+
}
1157+
}
1158+
for (auto fe: clone_simd) {
1159+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1160+
t.en.flags |= JL_TARGET_CLONE_SIMD;
1161+
break;
1162+
}
1163+
}
1164+
static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
1165+
for (auto fe: clone_fp16) {
1166+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1167+
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
1168+
break;
1169+
}
1170+
}
1171+
static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
1172+
for (auto fe: clone_bf16) {
1173+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1174+
t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
1175+
break;
1176+
}
1177+
}
1178+
}
1179+
if (image_targets.empty())
1180+
jl_error("No targets specified");
11101181
llvm::SmallVector<jl_target_spec_t, 0> res;
1111-
for (auto &target: jit_targets) {
1182+
for (auto &target: image_targets) {
11121183
auto features_en = target.en.features;
11131184
auto features_dis = target.dis.features;
11141185
for (auto &fename: feature_names) {
@@ -1128,6 +1199,7 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
11281199
}
11291200
return res;
11301201
}
1202+
#endif
11311203

11321204
extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
11331205
{

test/precompile.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2124,6 +2124,19 @@ precompile_test_harness("Test flags") do load_path
21242124
@test !Base.isprecompiled(id, ;flags=current_flags)
21252125
end
21262126

2127+
if Base.get_bool_env("CI", false) && (Sys.ARCH === :x86_64 || Sys.ARCH === :aarch64)
2128+
@testset "Multiversioning" begin # This test isn't the most robust because it relies on being in CI,
2129+
pkg = Base.identify_package("Test") # but we need better target reflection to make a better one.
2130+
cachefiles = Base.find_all_in_cache_path(pkg)
2131+
pkgpath = Base.locate_package(pkg)
2132+
idx = findfirst(cachefiles) do cf
2133+
Base.stale_cachefile(pkgpath, cf) !== true
2134+
end
2135+
targets = Base.parse_image_targets(Base.parse_cache_header(cachefiles[idx])[7])
2136+
@test length(targets) > 1
2137+
end
2138+
end
2139+
21272140
precompile_test_harness("Issue #52063") do load_path
21282141
fname = joinpath(load_path, "i_do_not_exist.jl")
21292142
@test try

0 commit comments

Comments
 (0)