Skip to content

Commit 7ccecf1

Browse files
committed
Add !noalias and !alias.scope metadata
The main idea here is that the TBAA domain is ill-equipped for reasoning about regions (and, in particular, suffers total precision less when merging disparate types in a `memcpy`). Instead, `!noalias` should be used for region-based memory information and `!tbaa` should be used exclusively for layout. We use (5) regions corresponding to the top level of the TBAA tree: - gcframe - stack - data - constant - type_metadata For now, this leaves the TBAA hierarchy in tact and only adds additional `!noalias` metadata. `!tbaa` annotations should be the same as before.
1 parent b07484c commit 7ccecf1

File tree

2 files changed

+238
-17
lines changed

2 files changed

+238
-17
lines changed

src/cgutils.cpp

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -890,8 +890,8 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x)
890890
return data;
891891
}
892892

893-
static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Value *src, MDNode *tbaa_src,
894-
uint64_t sz, unsigned align, bool is_volatile)
893+
static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
894+
jl_aliasinfo_t const &src_ai, uint64_t sz, unsigned align, bool is_volatile)
895895
{
896896
if (sz == 0)
897897
return;
@@ -933,44 +933,73 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Va
933933
src = emit_bitcast(ctx, src, dstty);
934934
}
935935
if (directel) {
936-
auto val = tbaa_decorate(tbaa_src, ctx.builder.CreateAlignedLoad(directel, src, Align(align), is_volatile));
937-
tbaa_decorate(tbaa_dst, ctx.builder.CreateAlignedStore(val, dst, Align(align), is_volatile));
936+
auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, Align(align), is_volatile));
937+
dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align), is_volatile));
938938
++SkippedMemcpys;
939939
return;
940940
}
941941
}
942942
#endif
943+
++EmittedMemcpys;
944+
943945
// the memcpy intrinsic does not allow to specify different alias tags
944946
// for the load part (x.tbaa) and the store part (ctx.tbaa().tbaa_stack).
945947
// since the tbaa lattice has to be a tree we have unfortunately
946948
// x.tbaa ∪ ctx.tbaa().tbaa_stack = tbaa_root if x.tbaa != ctx.tbaa().tbaa_stack
947-
++EmittedMemcpys;
948-
ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
949+
950+
// Now that we use scoped aliases to label disparate regions of memory, the TBAA
951+
// metadata should be revisited so that it only represents memory layouts. Once
952+
// that's done, we can expect that in most cases tbaa(src) == tbaa(dst) and the
953+
// above problem won't be as serious.
954+
955+
auto merged_ai = dst_ai.merge(src_ai);
956+
ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile,
957+
merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
949958
}
950959

951-
static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Value *src, MDNode *tbaa_src,
952-
Value *sz, unsigned align, bool is_volatile)
960+
static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
961+
jl_aliasinfo_t const &src_ai, Value *sz, unsigned align, bool is_volatile)
953962
{
954963
if (auto const_sz = dyn_cast<ConstantInt>(sz)) {
955-
emit_memcpy_llvm(ctx, dst, tbaa_dst, src, tbaa_src, const_sz->getZExtValue(), align, is_volatile);
964+
emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align, is_volatile);
956965
return;
957966
}
958967
++EmittedMemcpys;
959-
ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
968+
969+
auto merged_ai = dst_ai.merge(src_ai);
970+
ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile,
971+
merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
960972
}
961973

962974
template<typename T1>
963975
static void emit_memcpy(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Value *src, MDNode *tbaa_src,
964976
T1 &&sz, unsigned align, bool is_volatile=false)
965977
{
966-
emit_memcpy_llvm(ctx, dst, tbaa_dst, src, tbaa_src, sz, align, is_volatile);
978+
emit_memcpy_llvm(ctx, dst, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src,
979+
jl_aliasinfo_t::fromTBAA(ctx, tbaa_src), sz, align, is_volatile);
967980
}
968981

969982
template<typename T1>
970983
static void emit_memcpy(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, const jl_cgval_t &src,
971984
T1 &&sz, unsigned align, bool is_volatile=false)
972985
{
973-
emit_memcpy_llvm(ctx, dst, tbaa_dst, data_pointer(ctx, src), src.tbaa, sz, align, is_volatile);
986+
emit_memcpy_llvm(ctx, dst, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src),
987+
jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), sz, align, is_volatile);
988+
}
989+
990+
template<typename T1>
991+
static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
992+
jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align, bool is_volatile=false)
993+
{
994+
emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align, is_volatile);
995+
}
996+
997+
template<typename T1>
998+
static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, const jl_cgval_t &src,
999+
T1 &&sz, unsigned align, bool is_volatile=false)
1000+
{
1001+
auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, src.tbaa);
1002+
emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align, is_volatile);
9741003
}
9751004

9761005
static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *type)
@@ -2699,7 +2728,9 @@ static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo,
26992728
LoadInst *LI = ctx.builder.CreateAlignedLoad(LoadT, addr, Align(sizeof(char *)));
27002729
LI->setOrdering(AtomicOrdering::NotAtomic);
27012730
LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None));
2702-
tbaa_decorate(arraytype_constshape(tinfo.typ) ? ctx.tbaa().tbaa_const : ctx.tbaa().tbaa_arrayptr, LI);
2731+
jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, arraytype_constshape(tinfo.typ) ? ctx.tbaa().tbaa_const : ctx.tbaa().tbaa_arrayptr);
2732+
aliasinfo.decorateInst(LI);
2733+
27032734
return LI;
27042735
}
27052736

src/codegen.cpp

Lines changed: 194 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,57 @@ struct jl_tbaacache_t {
356356
}
357357
};
358358

359+
struct jl_noaliascache_t {
360+
// Each domain operates completely independently.
361+
// "No aliasing" is inferred if it is implied by any domain.
362+
363+
// memory regions domain
364+
struct jl_regions_t {
365+
MDNode *gcframe; // GC frame
366+
MDNode *stack; // Stack slot
367+
MDNode *data; // Any user data that `pointerset/ref` are allowed to alias
368+
MDNode *type_metadata; // Non-user-accessible type metadata incl. size, union selectors, etc.
369+
MDNode *constant; // Memory that is immutable by the time LLVM can see it
370+
371+
jl_regions_t(): gcframe(nullptr), stack(nullptr), data(nullptr), type_metadata(nullptr), constant(nullptr) {}
372+
373+
void initialize(llvm::LLVMContext &context) {
374+
MDBuilder mbuilder(context);
375+
MDNode *domain = mbuilder.createAliasScopeDomain("jnoalias");
376+
377+
this->gcframe = mbuilder.createAliasScope("jnoalias_gcframe", domain);
378+
this->stack = mbuilder.createAliasScope("jnoalias_stack", domain);
379+
this->data = mbuilder.createAliasScope("jnoalias_data", domain);
380+
this->type_metadata = mbuilder.createAliasScope("jnoalias_typemd", domain);
381+
this->constant = mbuilder.createAliasScope("jnoalias_const", domain);
382+
}
383+
} regions;
384+
385+
// `@aliasscope` domain
386+
struct jl_aliasscope_t {
387+
MDNode *current;
388+
389+
jl_aliasscope_t(): current(nullptr) {}
390+
391+
// No init required, this->current is only used to store the currently active aliasscope
392+
void initialize(llvm::LLVMContext &context) {}
393+
} aliasscope;
394+
395+
bool initialized;
396+
397+
jl_noaliascache_t(): regions(), aliasscope(), initialized(false) {}
398+
399+
void initialize(llvm::LLVMContext &context) {
400+
if (initialized) {
401+
assert(&regions.constant->getContext() == &context);
402+
return;
403+
}
404+
initialized = true;
405+
regions.initialize(context);
406+
aliasscope.initialize(context);
407+
}
408+
};
409+
359410
struct jl_debugcache_t {
360411
// Basic DITypes
361412
DIDerivedType *jl_pvalue_dillvmt;
@@ -1276,6 +1327,69 @@ static bool deserves_sret(jl_value_t *dt, Type *T)
12761327
return (size_t)jl_datatype_size(dt) > sizeof(void*) && !T->isFloatingPointTy() && !T->isVectorTy();
12771328
}
12781329

1330+
// Alias Analysis Info (analogous to llvm::AAMDNodes)
1331+
struct jl_aliasinfo_t {
1332+
MDNode *tbaa = nullptr; // '!tbaa': Struct-path TBAA. TBAA graph forms a tree (indexed by offset).
1333+
// Two pointers do not alias if they are not transitive parents
1334+
// (effectively, subfields) of each other or equal.
1335+
MDNode *tbaa_struct = nullptr; // '!tbaa.struct': Describes memory layout of struct.
1336+
MDNode *scope = nullptr; // '!alias.scope': Generic "noalias" memory access sets.
1337+
// If alias.scope(inst_a) ⊆ noalias(inst_b) (in any "domain")
1338+
// => inst_a, inst_b do not alias.
1339+
MDNode *noalias = nullptr; // '!noalias': See '!alias.scope' above.
1340+
1341+
enum class Region { unknown, gcframe, stack, data, constant, type_metadata }; // See jl_regions_t
1342+
1343+
explicit jl_aliasinfo_t() = default;
1344+
explicit jl_aliasinfo_t(jl_codectx_t &ctx, Region r, MDNode *tbaa);
1345+
explicit jl_aliasinfo_t(MDNode *tbaa, MDNode *tbaa_struct, MDNode *scope, MDNode *noalias)
1346+
: tbaa(tbaa), tbaa_struct(tbaa_struct), scope(scope), noalias(noalias) {}
1347+
jl_aliasinfo_t(const jl_aliasinfo_t &) = default;
1348+
1349+
// Add !tbaa, !tbaa.struct, !alias.scope, !noalias annotations to an instruction.
1350+
//
1351+
// Also adds `invariant.load` to load instructions in the constant !noalias scope.
1352+
Instruction *decorateInst(Instruction *inst) const {
1353+
1354+
if (this->tbaa)
1355+
inst->setMetadata(LLVMContext::MD_tbaa, this->tbaa);
1356+
if (this->tbaa_struct)
1357+
inst->setMetadata(LLVMContext::MD_tbaa_struct, this->tbaa_struct);
1358+
if (this->scope)
1359+
inst->setMetadata(LLVMContext::MD_alias_scope, this->scope);
1360+
if (this->noalias)
1361+
inst->setMetadata(LLVMContext::MD_noalias, this->noalias);
1362+
1363+
if (this->scope && isa<LoadInst>(inst)) {
1364+
// If this is in the read-only region, mark the load with "!invariant.load"
1365+
if (this->scope->getNumOperands() == 1) {
1366+
MDNode *operand = cast<MDNode>(this->scope->getOperand(0));
1367+
auto scope_name = cast<MDString>(operand->getOperand(0))->getString();
1368+
if (scope_name == "jnoalias_const")
1369+
inst->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(inst->getContext(), None));
1370+
}
1371+
}
1372+
1373+
return inst;
1374+
}
1375+
1376+
// Merge two sets of alias information.
1377+
jl_aliasinfo_t merge(const jl_aliasinfo_t &other) const {
1378+
jl_aliasinfo_t result;
1379+
result.tbaa = MDNode::getMostGenericTBAA(this->tbaa, other.tbaa);
1380+
result.tbaa_struct = nullptr;
1381+
result.scope = MDNode::getMostGenericAliasScope(this->scope, other.scope);
1382+
result.noalias = MDNode::intersect(this->noalias, other.noalias);
1383+
return result;
1384+
}
1385+
1386+
// Create alias information based on the provided TBAA metadata.
1387+
//
1388+
// This function only exists to help transition to using !noalias to encode
1389+
// memory region non-aliasing. It should be deleted once the TBAA metadata
1390+
// is improved to encode only memory layout and *not* memory regions.
1391+
static jl_aliasinfo_t fromTBAA(jl_codectx_t &ctx, MDNode *tbaa);
1392+
};
12791393

12801394
// metadata tracking for a llvm Value* during codegen
12811395
struct jl_cgval_t {
@@ -1441,6 +1555,7 @@ class jl_codectx_t {
14411555
jl_module_t *module = NULL;
14421556
jl_typecache_t type_cache;
14431557
jl_tbaacache_t tbaa_cache;
1558+
jl_noaliascache_t aliasscope_cache;
14441559
jl_method_instance_t *linfo = NULL;
14451560
jl_value_t *rettype = NULL;
14461561
jl_code_info_t *source = NULL;
@@ -1452,7 +1567,6 @@ class jl_codectx_t {
14521567
Value *spvals_ptr = NULL;
14531568
Value *argArray = NULL;
14541569
Value *argCount = NULL;
1455-
MDNode *aliasscope = NULL;
14561570
std::string funcName;
14571571
int vaSlot = -1; // name of vararg argument
14581572
int nReqArgs = 0;
@@ -1491,6 +1605,11 @@ class jl_codectx_t {
14911605
return tbaa_cache;
14921606
}
14931607

1608+
jl_noaliascache_t &noalias() {
1609+
aliasscope_cache.initialize(builder.getContext());
1610+
return aliasscope_cache;
1611+
}
1612+
14941613
~jl_codectx_t() {
14951614
// Transfer local delayed calls to the global queue
14961615
for (auto call_target : call_targets)
@@ -1502,6 +1621,77 @@ GlobalVariable *JuliaVariable::realize(jl_codectx_t &ctx) {
15021621
return realize(jl_Module);
15031622
}
15041623

1624+
jl_aliasinfo_t::jl_aliasinfo_t(jl_codectx_t &ctx, Region r, MDNode *tbaa): tbaa(tbaa), tbaa_struct(nullptr) {
1625+
MDNode *alias_scope;
1626+
jl_noaliascache_t::jl_regions_t regions = ctx.noalias().regions;
1627+
switch (r) {
1628+
case Region::unknown:
1629+
alias_scope = nullptr;
1630+
break;
1631+
case Region::gcframe:
1632+
alias_scope = regions.gcframe;
1633+
break;
1634+
case Region::stack:
1635+
alias_scope = regions.stack;
1636+
break;
1637+
case Region::data:
1638+
alias_scope = regions.data;
1639+
break;
1640+
case Region::constant:
1641+
alias_scope = regions.constant;
1642+
break;
1643+
case Region::type_metadata:
1644+
alias_scope = regions.type_metadata;
1645+
break;
1646+
}
1647+
1648+
MDNode *all_scopes[5] = { regions.gcframe, regions.stack, regions.data, regions.type_metadata, regions.constant };
1649+
if (alias_scope) {
1650+
// The matching region is added to !alias.scope
1651+
// All other regions are added to !noalias
1652+
1653+
int i = 0;
1654+
Metadata *scopes[1] = { alias_scope };
1655+
Metadata *noaliases[4];
1656+
for (auto const &scope: all_scopes) {
1657+
if (scope == alias_scope) continue;
1658+
noaliases[i++] = scope;
1659+
}
1660+
1661+
this->scope = MDNode::get(ctx.builder.getContext(), ArrayRef<Metadata*>(scopes));
1662+
this->noalias = MDNode::get(ctx.builder.getContext(), ArrayRef<Metadata*>(noaliases));
1663+
}
1664+
}
1665+
1666+
jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) {
1667+
auto cache = ctx.tbaa();
1668+
1669+
// Each top-level TBAA node has a corresponding !alias.scope scope
1670+
MDNode *tbaa_srcs[5] = { cache.tbaa_gcframe, cache.tbaa_stack, cache.tbaa_data, cache.tbaa_array, cache.tbaa_const };
1671+
Region regions[5] = { Region::gcframe, Region::stack, Region::data, Region::type_metadata, Region::constant };
1672+
1673+
if (tbaa != nullptr) {
1674+
MDNode *node = cast<MDNode>(tbaa->getOperand(1));
1675+
if (cast<MDString>(node->getOperand(0))->getString() != "jtbaa") {
1676+
1677+
// Climb up to node just before root
1678+
MDNode *parent_node = cast<MDNode>(node->getOperand(1));
1679+
while (cast<MDString>(parent_node->getOperand(0))->getString() != "jtbaa") {
1680+
node = parent_node;
1681+
parent_node = cast<MDNode>(node->getOperand(1));
1682+
}
1683+
1684+
// Find the matching node's index
1685+
for (int i = 0; i < 5; i++) {
1686+
if (cast<MDNode>(tbaa_srcs[i]->getOperand(1)) == node)
1687+
return jl_aliasinfo_t(ctx, regions[i], tbaa);
1688+
}
1689+
}
1690+
}
1691+
1692+
return jl_aliasinfo_t(ctx, Region::unknown, tbaa);
1693+
}
1694+
15051695
static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL);
15061696
static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure);
15071697
static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1);
@@ -3254,7 +3444,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
32543444
*ret = emit_unionload(ctx, data, ptindex, ety, elsz, al, ctx.tbaa().tbaa_arraybuf, true, union_max, ctx.tbaa().tbaa_arrayselbyte);
32553445
}
32563446
else {
3257-
MDNode *aliasscope = (f == jl_builtin_const_arrayref) ? ctx.aliasscope : nullptr;
3447+
MDNode *aliasscope = (f == jl_builtin_const_arrayref) ? ctx.noalias().aliasscope.current : nullptr;
32583448
*ret = typed_load(ctx,
32593449
emit_arrayptr(ctx, ary, ary_ex),
32603450
idx, ety,
@@ -3369,7 +3559,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
33693559
emit_arrayptr(ctx, ary, ary_ex, isboxed),
33703560
idx, val, jl_cgval_t(), ety,
33713561
isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
3372-
ctx.aliasscope,
3562+
ctx.noalias().aliasscope.current,
33733563
data_owner,
33743564
isboxed,
33753565
isboxed ? AtomicOrdering::Release : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
@@ -7679,7 +7869,7 @@ static jl_llvm_functions_t
76797869
ctx.builder.SetCurrentDebugLocation(linetable.at(debuginfoloc).loc);
76807870
coverageVisitStmt(debuginfoloc);
76817871
}
7682-
ctx.aliasscope = aliasscopes[cursor];
7872+
ctx.noalias().aliasscope.current = aliasscopes[cursor];
76837873
jl_value_t *stmt = jl_array_ptr_ref(stmts, cursor);
76847874
jl_expr_t *expr = jl_is_expr(stmt) ? (jl_expr_t*)stmt : nullptr;
76857875
if (jl_is_returnnode(stmt)) {

0 commit comments

Comments
 (0)