Skip to content

Commit 8bfef8f

Browse files
committed
codegen: take gc roots (and alloca alignment) more seriously
Due to limitations in the LLVM implementation, we are forced to emit fairly bad code here. But we need to make sure it is still correct with respect to GC rooting. The PR 50833c8 also changed the meaning of haspadding without changing all of the existing users to use the new equivalent flag (haspadding || !isbitsegal), incurring additional breakage here as well and needing more tests for that. Fixes #54720
1 parent e1e5a46 commit 8bfef8f

File tree

7 files changed

+83
-49
lines changed

7 files changed

+83
-49
lines changed

src/abi_aarch64.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ struct ABI_AArch64Layout : AbiLayout {
1616
Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
1717
{
1818
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
19-
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
19+
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields > 0`
2020
if (dt->layout == NULL || jl_is_layout_opaque(dt->layout))
2121
return nullptr;
2222
size_t nfields = dt->layout->nfields;
@@ -62,7 +62,7 @@ Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
6262
Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
6363
{
6464
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
65-
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
65+
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields == 0`
6666
Type *lltype;
6767
// Check size first since it's cheaper.
6868
switch (jl_datatype_size(dt)) {
@@ -88,7 +88,7 @@ Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
8888
Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
8989
{
9090
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
91-
if (dt->name->mutabl || dt->layout->npointers || dt->layout->flags.haspadding)
91+
if (dt->name->mutabl || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
9292
return nullptr;
9393
return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx);
9494
}
@@ -184,7 +184,7 @@ Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const
184184
// uniquely addressable members.
185185
// Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector)
186186
size_t dsz = jl_datatype_size(dt);
187-
if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->flags.haspadding)
187+
if (dsz > 64 || !dt->layout || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
188188
return NULL;
189189
nele = 0;
190190
ElementType eltype;

src/abi_arm.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base, LLVMContext &ctx) const
8282
if (jl_is_structtype(dt)) {
8383
// Fast path checks before descending the type hierarchy
8484
// (4 x 128b vector == 64B max size)
85-
if (jl_datatype_size(dt) > 64 || dt->layout->npointers || dt->layout->flags.haspadding)
85+
if (jl_datatype_size(dt) > 64 || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
8686
return 0;
8787

8888
base = NULL;

src/abi_ppc64le.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ struct ABI_PPC64leLayout : AbiLayout {
4444
// count the homogeneous floating aggregate size (saturating at max count of 8)
4545
unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
4646
{
47-
if (jl_datatype_size(ty) > 128 || ty->layout->npointers || ty->layout->flags.haspadding)
47+
if (jl_datatype_size(ty) > 128 || ty->layout->npointers || !ty->layout->flags.isbitsegal || ty->layout->flags.haspadding)
4848
return 9;
4949

5050
size_t i, l = ty->layout->nfields;

src/cgutils.cpp

+61-36
Original file line numberDiff line numberDiff line change
@@ -2119,12 +2119,15 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
21192119
FailOrder = AtomicOrdering::Monotonic;
21202120
unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype);
21212121
AllocaInst *intcast = nullptr;
2122+
Type *intcast_eltyp = nullptr;
2123+
bool tracked_pointers = isboxed || CountTrackedPointers(elty).count > 0;
21222124
if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
2125+
intcast_eltyp = elty;
2126+
elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
21232127
if (!issetfield) {
21242128
intcast = emit_static_alloca(ctx, elty);
21252129
setName(ctx.emission_context, intcast, "atomic_store_box");
21262130
}
2127-
elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
21282131
}
21292132
Type *realelty = elty;
21302133
if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
@@ -2133,14 +2136,20 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
21332136
elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
21342137
}
21352138
Value *r = nullptr;
2136-
if (issetfield || isswapfield || isreplacefield || issetfieldonce) {
2137-
if (isboxed)
2139+
if (issetfield || isswapfield || isreplacefield || issetfieldonce) { // e.g. !ismodifyfield
2140+
assert(isboxed || rhs.typ == jltype);
2141+
if (isboxed) {
21382142
r = boxed(ctx, rhs);
2139-
else if (aliasscope || Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
2143+
}
2144+
else if (intcast) {
2145+
emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
2146+
r = ctx.builder.CreateLoad(realelty, intcast);
2147+
}
2148+
else if (aliasscope || Order != AtomicOrdering::NotAtomic || tracked_pointers) {
21402149
r = emit_unbox(ctx, realelty, rhs, jltype);
2141-
if (realelty != elty)
2142-
r = ctx.builder.CreateZExt(r, elty);
21432150
}
2151+
if (realelty != elty)
2152+
r = ctx.builder.CreateZExt(r, elty);
21442153
}
21452154
if (isboxed)
21462155
alignment = sizeof(void*);
@@ -2222,7 +2231,14 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
22222231
Current->addIncoming(instr, SkipBB);
22232232
ctx.builder.SetInsertPoint(BB);
22242233
}
2225-
Compare = emit_unbox(ctx, realelty, cmp, jltype);
2234+
cmp = update_julia_type(ctx, cmp, jltype);
2235+
if (intcast) {
2236+
emit_unbox_store(ctx, cmp, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
2237+
Compare = ctx.builder.CreateLoad(realelty, intcast);
2238+
}
2239+
else {
2240+
Compare = emit_unbox(ctx, realelty, cmp, jltype);
2241+
}
22262242
if (realelty != elty)
22272243
Compare = ctx.builder.CreateZExt(Compare, elty);
22282244
}
@@ -2269,28 +2285,36 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
22692285
if (realelty != elty)
22702286
realCompare = ctx.builder.CreateTrunc(realCompare, realelty);
22712287
if (intcast) {
2288+
assert(!isboxed);
22722289
ctx.builder.CreateStore(realCompare, intcast);
2273-
if (maybe_null_if_boxed)
2274-
realCompare = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
2290+
if (tracked_pointers)
2291+
realCompare = ctx.builder.CreateLoad(intcast_eltyp, intcast);
22752292
}
2276-
if (maybe_null_if_boxed) {
2277-
Value *first_ptr = isboxed ? Compare : extract_first_ptr(ctx, Compare);
2278-
if (first_ptr)
2279-
null_load_check(ctx, first_ptr, mod, var);
2293+
if (maybe_null_if_boxed && tracked_pointers) {
2294+
Value *first_ptr = isboxed ? realCompare : extract_first_ptr(ctx, realCompare);
2295+
assert(first_ptr);
2296+
null_load_check(ctx, first_ptr, mod, var);
22802297
}
2281-
if (intcast)
2298+
if (intcast && !tracked_pointers)
22822299
oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
22832300
else
22842301
oldval = mark_julia_type(ctx, realCompare, isboxed, jltype);
22852302
rhs = newval(oldval);
22862303
if (isboxed) {
22872304
r = boxed(ctx, rhs);
22882305
}
2289-
else if (Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
2306+
else if (intcast) {
2307+
emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
2308+
r = ctx.builder.CreateLoad(realelty, intcast);
2309+
if (!tracked_pointers) // oldval is a slot, so put the oldval back
2310+
ctx.builder.CreateStore(realCompare, intcast);
2311+
}
2312+
else if (Order != AtomicOrdering::NotAtomic) {
2313+
assert(!tracked_pointers);
22902314
r = emit_unbox(ctx, realelty, rhs, jltype);
2291-
if (realelty != elty)
2292-
r = ctx.builder.CreateZExt(r, elty);
22932315
}
2316+
if (realelty != elty)
2317+
r = ctx.builder.CreateZExt(r, elty);
22942318
if (needlock)
22952319
emit_lockstate_value(ctx, needlock, true);
22962320
cmp = oldval;
@@ -2356,9 +2380,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
23562380
realinstr = ctx.builder.CreateTrunc(realinstr, realelty);
23572381
if (intcast) {
23582382
ctx.builder.CreateStore(realinstr, intcast);
2383+
// n.b. this oldval is only used for emit_f_is in this branch, so we know a priori that it does not need a gc-root
23592384
oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
23602385
if (maybe_null_if_boxed)
2361-
realinstr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
2386+
realinstr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
23622387
}
23632388
else {
23642389
oldval = mark_julia_type(ctx, realinstr, isboxed, jltype);
@@ -2398,20 +2423,23 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
23982423
ctx.builder.SetInsertPoint(DoneBB);
23992424
if (needlock)
24002425
emit_lockstate_value(ctx, needlock, false);
2401-
if (parent != NULL) {
2426+
if (parent != NULL && r && tracked_pointers && (!isboxed || !type_is_permalloc(rhs.typ))) {
24022427
if (isreplacefield || issetfieldonce) {
2403-
// TODO: avoid this branch if we aren't making a write barrier
24042428
BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "xchg_wb", ctx.f);
24052429
DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg_wb", ctx.f);
24062430
ctx.builder.CreateCondBr(Success, BB, DoneBB);
24072431
ctx.builder.SetInsertPoint(BB);
24082432
}
2409-
if (r) {
2410-
if (!isboxed)
2411-
emit_write_multibarrier(ctx, parent, r, rhs.typ);
2412-
else if (!type_is_permalloc(rhs.typ))
2413-
emit_write_barrier(ctx, parent, r);
2433+
if (realelty != elty)
2434+
r = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, r, realelty));
2435+
if (intcast) {
2436+
ctx.builder.CreateStore(r, intcast);
2437+
r = ctx.builder.CreateLoad(intcast_eltyp, intcast);
24142438
}
2439+
if (!isboxed)
2440+
emit_write_multibarrier(ctx, parent, r, rhs.typ);
2441+
else if (!type_is_permalloc(rhs.typ))
2442+
emit_write_barrier(ctx, parent, r);
24152443
if (isreplacefield || issetfieldonce) {
24162444
ctx.builder.CreateBr(DoneBB);
24172445
ctx.builder.SetInsertPoint(DoneBB);
@@ -2430,21 +2458,18 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
24302458
instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty));
24312459
if (intcast) {
24322460
ctx.builder.CreateStore(instr, intcast);
2433-
instr = nullptr;
2461+
if (tracked_pointers)
2462+
instr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
24342463
}
2435-
if (maybe_null_if_boxed) {
2436-
if (intcast)
2437-
instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
2464+
if (maybe_null_if_boxed && tracked_pointers) {
24382465
Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
2439-
if (first_ptr)
2440-
null_load_check(ctx, first_ptr, mod, var);
2441-
if (intcast && !first_ptr)
2442-
instr = nullptr;
2466+
assert(first_ptr);
2467+
null_load_check(ctx, first_ptr, mod, var);
24432468
}
2444-
if (instr)
2445-
oldval = mark_julia_type(ctx, instr, isboxed, jltype);
2446-
else
2469+
if (intcast && !tracked_pointers)
24472470
oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
2471+
else
2472+
oldval = mark_julia_type(ctx, instr, isboxed, jltype);
24482473
if (isreplacefield) {
24492474
Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
24502475
const jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};

src/codegen.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -3374,11 +3374,14 @@ static size_t emit_masked_bits_compare(callback &emit_desc, jl_datatype_t *aty,
33743374
size_t padding_bytes = 0;
33753375
size_t nfields = jl_datatype_nfields(aty);
33763376
size_t total_size = jl_datatype_size(aty);
3377+
assert(aty->layout->flags.isbitsegal);
33773378
for (size_t i = 0; i < nfields; ++i) {
33783379
size_t offset = jl_field_offset(aty, i);
33793380
size_t fend = i == nfields - 1 ? total_size : jl_field_offset(aty, i + 1);
33803381
size_t fsz = jl_field_size(aty, i);
33813382
jl_datatype_t *fty = (jl_datatype_t*)jl_field_type(aty, i);
3383+
assert(jl_is_datatype(fty)); // union fields should never reach here
3384+
assert(fty->layout->flags.isbitsegal);
33823385
if (jl_field_isptr(aty, i) || !fty->layout->flags.haspadding) {
33833386
// The field has no internal padding
33843387
data_bytes += fsz;

src/datatype.c

+7-7
Original file line numberDiff line numberDiff line change
@@ -1250,7 +1250,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
12501250
}
12511251
else if (nb == 1) {
12521252
uint8_t *y8 = (uint8_t*)y;
1253-
assert(!dt->layout->flags.haspadding);
1253+
assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
12541254
if (dt == et) {
12551255
*y8 = *(uint8_t*)expected;
12561256
uint8_t z8 = *(uint8_t*)src;
@@ -1263,7 +1263,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
12631263
}
12641264
else if (nb == 2) {
12651265
uint16_t *y16 = (uint16_t*)y;
1266-
assert(!dt->layout->flags.haspadding);
1266+
assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
12671267
if (dt == et) {
12681268
*y16 = *(uint16_t*)expected;
12691269
uint16_t z16 = *(uint16_t*)src;
@@ -1281,7 +1281,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
12811281
uint32_t z32 = zext_read32(src, nb);
12821282
while (1) {
12831283
success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, y32, z32);
1284-
if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
1284+
if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
12851285
break;
12861286
}
12871287
}
@@ -1298,7 +1298,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
12981298
uint64_t z64 = zext_read64(src, nb);
12991299
while (1) {
13001300
success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, y64, z64);
1301-
if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
1301+
if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
13021302
break;
13031303
}
13041304
}
@@ -1316,7 +1316,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
13161316
jl_uint128_t z128 = zext_read128(src, nb);
13171317
while (1) {
13181318
success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, y128, z128);
1319-
if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
1319+
if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
13201320
break;
13211321
}
13221322
}
@@ -2010,7 +2010,7 @@ inline jl_value_t *modify_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_
20102010
else {
20112011
char *px = lock(p, parent, needlock, isatomic);
20122012
int success = memcmp(px, (char*)r, fsz) == 0;
2013-
if (!success && ((jl_datatype_t*)rty)->layout->flags.haspadding)
2013+
if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
20142014
success = jl_egal__bits((jl_value_t*)px, r, (jl_datatype_t*)rty);
20152015
if (success) {
20162016
if (isunion) {
@@ -2125,7 +2125,7 @@ inline jl_value_t *replace_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value
21252125
success = (rty == jl_typeof(expected));
21262126
if (success) {
21272127
success = memcmp((char*)r, (char*)expected, rsz) == 0;
2128-
if (!success && ((jl_datatype_t*)rty)->layout->flags.haspadding)
2128+
if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
21292129
success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
21302130
}
21312131
*((uint8_t*)r + fsz) = success ? 1 : 0;

test/atomics.jl

+6
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ test_field_operators(ARefxy{Any}(123_10, 123_20))
129129
test_field_operators(ARefxy{Union{Nothing,Int}}(123_10, nothing))
130130
test_field_operators(ARefxy{Complex{Int32}}(123_10, 123_20))
131131
test_field_operators(ARefxy{Complex{Int128}}(123_10, 123_20))
132+
test_field_operators(ARefxy{Complex{Real}}(123_10, 123_20))
132133
test_field_operators(ARefxy{PadIntA}(123_10, 123_20))
133134
test_field_operators(ARefxy{PadIntB}(123_10, 123_20))
134135
#FIXME: test_field_operators(ARefxy{Int24}(123_10, 123_20))
@@ -317,6 +318,7 @@ test_field_orderings(ARefxy{Any}(true, false), true, false)
317318
test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing)
318319
test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1)
319320
test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
321+
test_field_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40))
320322
test_field_orderings(10.0, 20.0)
321323
test_field_orderings(NaN, Inf)
322324

@@ -568,6 +570,7 @@ test_global_operators(Any)
568570
test_global_operators(Union{Nothing,Int})
569571
test_global_operators(Complex{Int32})
570572
test_global_operators(Complex{Int128})
573+
test_global_operators(Complex{Real})
571574
test_global_operators(PadIntA)
572575
test_global_operators(PadIntB)
573576
#FIXME: test_global_operators(Int24)
@@ -691,6 +694,7 @@ test_global_orderings(Any, true, false)
691694
test_global_orderings(Union{Nothing,Missing}, nothing, missing)
692695
test_global_orderings(Union{Nothing,Int}, nothing, 123_1)
693696
test_global_orderings(Complex{Int128}, Complex{Int128}(10, 30), Complex{Int128}(20, 40))
697+
test_global_orderings(Complex{Real}, Complex{Real}(10, 30), Complex{Real}(20, 40))
694698
test_global_orderings(Float64, 10.0, 20.0)
695699
test_global_orderings(Float64, NaN, Inf)
696700

@@ -844,6 +848,7 @@ test_memory_operators(Any)
844848
test_memory_operators(Union{Nothing,Int})
845849
test_memory_operators(Complex{Int32})
846850
test_memory_operators(Complex{Int128})
851+
test_memory_operators(Complex{Real})
847852
test_memory_operators(PadIntA)
848853
test_memory_operators(PadIntB)
849854
#FIXME: test_memory_operators(Int24)
@@ -1031,6 +1036,7 @@ test_memory_orderings(Any, true, false)
10311036
test_memory_orderings(Union{Nothing,Missing}, nothing, missing)
10321037
test_memory_orderings(Union{Nothing,Int}, nothing, 123_1)
10331038
test_memory_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
1039+
test_memory_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40))
10341040
test_memory_orderings(10.0, 20.0)
10351041
test_memory_orderings(NaN, Inf)
10361042

0 commit comments

Comments
 (0)