Skip to content

Commit a386cd1

Browse files
authored
Fix multiversioning issues caused by the parallel llvm work (#52194)
So after struggling with this for a long while it seems there were two different issues. The first one we lacked coverage over, but the other was a very subtle issue when we sorted the fptrs. ~I still need to add test that does multiversioning where we call between multiversioned functions~ Fixes #52079
1 parent 187e8c2 commit a386cd1

File tree

4 files changed

+151
-3
lines changed

4 files changed

+151
-3
lines changed

src/llvm-multiversioning.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ void CloneCtx::clone_decls()
532532
new_F->setVisibility(F->getVisibility());
533533
new_F->setDSOLocal(true);
534534
auto base_func = F;
535-
if (specs[i].flags & JL_TARGET_CLONE_ALL)
535+
if (!(specs[i].flags & JL_TARGET_CLONE_ALL))
536536
base_func = static_cast<Group*>(linearized[specs[i].base])->base_func(F);
537537
(*linearized[i]->vmap)[base_func] = new_F;
538538
}
@@ -587,7 +587,7 @@ void CloneCtx::clone_bodies()
587587
}
588588
for (auto &target : groups[i].clones) {
589589
prepare_vmap(*target.vmap);
590-
auto target_F = cast_or_null<Function>(map_get(*target.vmap, F));
590+
auto target_F = cast_or_null<Function>(map_get(*target.vmap, group_F));
591591
if (target_F) {
592592
if (!F->isDeclaration()) {
593593
clone_function(group_F, target_F, *target.vmap);

src/processor.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,10 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
786786

787787
if (!clones.empty()) {
788788
assert(!fvars.empty());
789-
std::sort(clones.begin(), clones.end());
789+
std::sort(clones.begin(), clones.end(),
790+
[](const std::pair<uint32_t, const char *> &a, const std::pair<uint32_t, const char *> &b) {
791+
return (a.first & jl_sysimg_val_mask) < (b.first & jl_sysimg_val_mask);
792+
});
790793
auto clone_offsets = (int32_t *) malloc(sizeof(int32_t) * clones.size());
791794
auto clone_idxs = (uint32_t *) malloc(sizeof(uint32_t) * clones.size());
792795
for (size_t i = 0; i < clones.size(); i++) {

test/compiler/codegen.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,3 +860,16 @@ foo50964(1) # Shouldn't assert!
860860
# https://github.com/JuliaLang/julia/issues/51233
861861
obj51233 = (1,)
862862
@test_throws ErrorException obj51233.x
863+
864+
# Very specific test for multiversioning
865+
if Sys.ARCH === :x86_64
866+
foo52079() = Core.Intrinsics.have_fma(Float64)
867+
if foo52079() == true
868+
let io = IOBuffer()
869+
code_native(io,^,(Float64,Float64), dump_module=false)
870+
str = String(take!(io))
871+
@test !occursin("fma_emulated", str)
872+
@test occursin("vfmadd", str)
873+
end
874+
end
875+
end
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
; This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning,CPUFeatures' -S %s | FileCheck %s
4+
5+
; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning,CPUFeatures' -S %s | FileCheck %s
6+
7+
; COM: This test checks that multiversioning actually happens from start to finish
8+
; COM: We need the fvars for a proper test
9+
10+
11+
12+
; CHECK: @jl_fvar_idxs = hidden constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 16
13+
; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 16
14+
; TYPED: @simd_test.reloc_slot = hidden global i32 (<4 x i32>)* null
15+
; OPAQUE: @simd_test.reloc_slot = hidden global ptr null
16+
; TYPED: @jl_fvar_offsets = hidden constant [6 x i32] [i32 5, i32 0, i32 trunc (i64 sub (i64 ptrtoint (float (float, float)* @fastmath_test to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (i32)* @loop_test to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test_call to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32)]
17+
; OPAQUE: @jl_fvar_offsets = hidden constant [6 x i32] [i32 5, i32 0, i32 trunc (i64 sub (i64 ptrtoint (ptr @fastmath_test to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @loop_test to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test_call to i64), i64 ptrtoint (ptr @boring to i64)) to i32)]
18+
; CHECK: @jl_gvar_base = hidden constant i64 0
19+
; CHECK: @jl_gvar_offsets = hidden constant [1 x i32] zeroinitializer
20+
; TYPED: @jl_clone_slots = hidden constant [3 x i32] [i32 1, i32 3, i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)** @simd_test.reloc_slot to i64), i64 ptrtoint (i64* @jl_gvar_base to i64)) to i32)]
21+
; OPAQUE: @jl_clone_slots = hidden constant [3 x i32] [i32 1, i32 3, i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test.reloc_slot to i64), i64 ptrtoint (ptr @jl_gvar_base to i64)) to i32)]
22+
; CHECK: @jl_clone_idxs = hidden constant [10 x i32] [i32 -2147483647, i32 3, i32 -2147483647, i32 3, i32 4, i32 1, i32 1, i32 2, i32 -2147483645, i32 4]
23+
; TYPED: @jl_clone_offsets = hidden constant [9 x i32] [i32 trunc (i64 sub (i64 ptrtoint (i32 (i32)* @boring.1 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (float (float, float)* @fastmath_test.1 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (i32)* @loop_test.1 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test.1 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test_call.1 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (float (float, float)* @fastmath_test.2 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (i32)* @loop_test.2 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test.2 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test_call.2 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32)]
24+
; OPAQUE: @jl_clone_offsets = hidden constant [9 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @boring.1 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @fastmath_test.1 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @loop_test.1 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test.1 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test_call.1 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @fastmath_test.2 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @loop_test.2 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test.2 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test_call.2 to i64), i64 ptrtoint (ptr @boring to i64)) to i32)]
25+
; TYPED: @jl_fvar_base = hidden alias i64, bitcast (i32 (i32)* @boring to i64*)
26+
; OPAQUE: @jl_fvar_base = hidden alias i64, ptr @boring
27+
28+
29+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
30+
target triple = "x86_64-linux-gnu"
31+
32+
@jl_fvars = global [5 x i64*] [i64* bitcast (i32 (i32)* @boring to i64*),
33+
i64* bitcast (float (float, float)* @fastmath_test to i64*),
34+
i64* bitcast (i32 (i32)* @loop_test to i64*),
35+
i64* bitcast (i32 (<4 x i32>)* @simd_test to i64*),
36+
i64* bitcast (i32 (<4 x i32>)* @simd_test_call to i64*)
37+
], align 16
38+
@jl_gvars = global [0 x i64*] zeroinitializer, align 16
39+
@jl_fvar_idxs = hidden constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 16
40+
@jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 16
41+
42+
declare i1 @julia.cpu.have_fma.f32()
43+
44+
; CHECK: @boring{{.*}}#[[BORING_BASE:[0-9]+]]
45+
define noundef i32 @boring(i32 noundef %0) {
46+
ret i32 %0
47+
}
48+
49+
; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
50+
; CHECK: %3 = sitofp i1 false to float
51+
define noundef float @fastmath_test(float noundef %0, float noundef %1) {
52+
%3 = call i1 @julia.cpu.have_fma.f32()
53+
%4 = sitofp i1 %3 to float
54+
%5 = fadd fast float %0, %4
55+
ret float %5
56+
}
57+
58+
; CHECK: @loop_test{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
59+
define noundef i32 @loop_test(i32 noundef %0) {
60+
%2 = icmp sgt i32 %0, 0
61+
br i1 %2, label %5, label %3
62+
63+
3: ; preds = %5, %1
64+
%4 = phi i32 [ 0, %1 ], [ %9, %5 ]
65+
ret i32 %4
66+
67+
5: ; preds = %1, %5
68+
%6 = phi i32 [ %10, %5 ], [ 0, %1 ]
69+
%7 = phi i32 [ %9, %5 ], [ 0, %1 ]
70+
%8 = lshr i32 %6, 1
71+
%9 = add nuw nsw i32 %8, %7
72+
%10 = add nuw nsw i32 %6, 1
73+
%11 = icmp eq i32 %10, %0
74+
br i1 %11, label %3, label %5;, !llvm.loop -
75+
}
76+
77+
; CHECK: @simd_test{{.*}}#[[SIMD_BASE_RELOC:[0-9]+]]
78+
define noundef i32 @simd_test(<4 x i32> noundef %0) {
79+
%2 = extractelement <4 x i32> %0, i64 0
80+
ret i32 %2
81+
}
82+
83+
; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
84+
define noundef i32 @simd_test_call(<4 x i32> noundef %0) {
85+
%2 = call noundef i32 @simd_test(<4 x i32> noundef %0)
86+
ret i32 %2
87+
}
88+
89+
; CHECK: @boring{{.*}}#[[BORING_CLONE:[0-9]+]]
90+
91+
; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
92+
; CHECK: %3 = sitofp i1 false to float
93+
94+
; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
95+
; CHECK: %3 = sitofp i1 true to float
96+
97+
; CHECK: @loop_test{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
98+
99+
; CHECK: @loop_test{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
100+
101+
; CHECK: @simd_test{{.*}}#[[SIMD_CLONE1:[0-9]+]]
102+
103+
; CHECK: @simd_test{{.*}}#[[SIMD_CLONE2:[0-9]+]]
104+
105+
; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
106+
; TYPED: %2 = load i32 (<4 x i32>)*, i32 (<4 x i32>)** @simd_test.reloc_slot, align 8, !tbaa !8, !invariant.load !12
107+
; OPAQUE: %2 = load ptr, ptr @simd_test.reloc_slot, align 8, !tbaa !8, !invariant.load !12
108+
; CHECK: %3 = call noundef i32 %2(<4 x i32> noundef %0)
109+
110+
; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
111+
; CHECK: %2 = call noundef i32 @simd_test.2(<4 x i32> noundef %0)
112+
113+
; CHECK-DAG: attributes #[[BORING_BASE]] = { "julia.mv.clone"="0" "julia.mv.clones"="2" "julia.mv.fvar" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
114+
; CHECK-DAG: attributes #[[NOT_BORING_BASE]] = { "julia.mv.clone"="0" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
115+
; CHECK-DAG: attributes #[[SIMD_BASE_RELOC]] = { "julia.mv.clone"="0" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
116+
; CHECK-DAG: attributes #[[BORING_CLONE]] = { "julia.mv.clone"="1" "julia.mv.clones"="2" "julia.mv.fvar" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
117+
; CHECK-DAG: attributes #[[NOT_BORING_CLONE1]] = { "julia.mv.clone"="1" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
118+
; CHECK-DAG: attributes #[[NOT_BORING_CLONE2]] = { "julia.mv.clone"="2" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="haswell" "target-features"="+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
119+
; CHECK-DAG: attributes #[[SIMD_CLONE1]] = { "julia.mv.clone"="1" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
120+
; CHECK-DAG: attributes #[[SIMD_CLONE2]] = { "julia.mv.clone"="2" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="haswell" "target-features"="+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
121+
122+
123+
!llvm.module.flags = !{!0, !2}
124+
125+
126+
!0 = !{i32 1, !"julia.mv.enable", i32 1}
127+
!1 = !{!1}
128+
!2 = !{i32 1, !"julia.mv.specs", !3}
129+
!3 = !{!4, !5, !6}
130+
!4 = !{!"x86-64", !"+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 0, i32 0}
131+
!5 = !{!"sandybridge", !"+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 0, i32 2}
132+
!6 = !{!"haswell", !"+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 1, i32 284}

0 commit comments

Comments
 (0)