Skip to content

Commit

Permalink
intrinsics: optimize several atomic intrinsics (#41563)
Browse files Browse the repository at this point in the history
(cherry picked from commit 430e5e0)
  • Loading branch information
vtjnash authored and KristofferC committed Jul 20, 2021
1 parent 687c214 commit 1bfd4dc
Show file tree
Hide file tree
Showing 4 changed files with 314 additions and 93 deletions.
195 changes: 177 additions & 18 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,25 +584,21 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
jl_value_t *ety = jl_tparam0(aty);
if (jl_is_typevar(ety))
return emit_runtime_pointerref(ctx, argv);
if (!jl_is_datatype(ety))
ety = (jl_value_t*)jl_any_type;
if (!is_valid_intrinsic_elptr(ety)) {
emit_error(ctx, "pointerref: invalid pointer type");
return jl_cgval_t();
}

Value *idx = emit_unbox(ctx, T_size, i, (jl_value_t*)jl_long_type);
Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(T_size, 1));

if (ety == (jl_value_t*)jl_any_type) {
Value *thePtr = emit_unbox(ctx, T_pprjlvalue, e, e.typ);
return mark_julia_type(
ctx,
ctx.builder.CreateAlignedLoad(ctx.builder.CreateInBoundsGEP(T_prjlvalue, thePtr, im1), Align(align_nb)),
true,
ety);
LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.builder.CreateInBoundsGEP(T_prjlvalue, thePtr, im1), Align(align_nb));
tbaa_decorate(tbaa_data, load);
return mark_julia_type(ctx, load, true, ety);
}
else if (!jl_isbits(ety)) {
if (!jl_is_structtype(ety) || jl_is_array_type(ety) || !jl_is_concrete_type(ety)) {
emit_error(ctx, "pointerref: invalid pointer type");
return jl_cgval_t();
}
assert(jl_is_datatype(ety));
uint64_t size = jl_datatype_size(ety);
Value *strct = emit_allocobj(ctx, size,
Expand Down Expand Up @@ -656,8 +652,8 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
return emit_runtime_pointerset(ctx, argv);
if (align.constant == NULL || !jl_is_long(align.constant))
return emit_runtime_pointerset(ctx, argv);
if (!jl_is_datatype(ety))
ety = (jl_value_t*)jl_any_type;
if (!is_valid_intrinsic_elptr(ety))
emit_error(ctx, "pointerset: invalid pointer type");
emit_typecheck(ctx, x, ety, "pointerset");

Value *idx = emit_unbox(ctx, T_size, i, (jl_value_t*)jl_long_type);
Expand All @@ -673,10 +669,6 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
tbaa_decorate(tbaa_data, store);
}
else if (!jl_isbits(ety)) {
if (!jl_is_structtype(ety) || jl_is_array_type(ety) || !jl_is_concrete_type(ety)) {
emit_error(ctx, "pointerset: invalid pointer type");
return jl_cgval_t();
}
thePtr = emit_unbox(ctx, T_pint8, e, e.typ);
uint64_t size = jl_datatype_size(ety);
im1 = ctx.builder.CreateMul(im1, ConstantInt::get(T_size,
Expand All @@ -696,6 +688,170 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
return e;
}

static jl_cgval_t emit_atomicfence(jl_codectx_t &ctx, jl_cgval_t *argv)
{
const jl_cgval_t &ord = argv[0];
if (ord.constant && jl_is_symbol(ord.constant)) {
enum jl_memory_order order = jl_get_atomic_order((jl_sym_t*)ord.constant, false, false);
if (order == jl_memory_order_invalid) {
emit_atomic_error(ctx, "invalid atomic ordering");
return jl_cgval_t(); // unreachable
}
if (order > jl_memory_order_monotonic)
ctx.builder.CreateFence(get_llvm_atomic_order(order));
return ghostValue(jl_nothing_type);
}
return emit_runtime_call(ctx, atomic_fence, argv, 1);
}

static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
{
const jl_cgval_t &e = argv[0];
const jl_cgval_t &ord = argv[1];
jl_value_t *aty = e.typ;
if (!jl_is_cpointer_type(aty) || !ord.constant || !jl_is_symbol(ord.constant))
return emit_runtime_call(ctx, atomic_pointerref, argv, 2);
jl_value_t *ety = jl_tparam0(aty);
if (jl_is_typevar(ety))
return emit_runtime_call(ctx, atomic_pointerref, argv, 2);
enum jl_memory_order order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
if (order == jl_memory_order_invalid) {
emit_atomic_error(ctx, "invalid atomic ordering");
return jl_cgval_t(); // unreachable
}
AtomicOrdering llvm_order = get_llvm_atomic_order(order);

if (ety == (jl_value_t*)jl_any_type) {
Value *thePtr = emit_unbox(ctx, T_pprjlvalue, e, e.typ);
LoadInst *load = ctx.builder.CreateAlignedLoad(thePtr, Align(sizeof(jl_value_t*)));
tbaa_decorate(tbaa_data, load);
load->setOrdering(llvm_order);
return mark_julia_type(ctx, load, true, ety);
}

if (!is_valid_intrinsic_elptr(ety)) {
emit_error(ctx, "atomic_pointerref: invalid pointer type");
return jl_cgval_t();
}

size_t nb = jl_datatype_size(ety);
if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
emit_error(ctx, "atomic_pointerref: invalid pointer for atomic operation");
return jl_cgval_t();
}

if (!jl_isbits(ety)) {
assert(jl_is_datatype(ety));
uint64_t size = jl_datatype_size(ety);
Value *strct = emit_allocobj(ctx, size,
literal_pointer_val(ctx, ety));
Value *thePtr = emit_unbox(ctx, T_pint8, e, e.typ);
Type *loadT = Type::getIntNTy(jl_LLVMContext, nb * 8);
thePtr = emit_bitcast(ctx, thePtr, loadT->getPointerTo());
MDNode *tbaa = best_tbaa(ety);
LoadInst *load = ctx.builder.CreateAlignedLoad(loadT, thePtr, Align(nb));
tbaa_decorate(tbaa, load);
load->setOrdering(llvm_order);
thePtr = emit_bitcast(ctx, strct, thePtr->getType());
StoreInst *store = ctx.builder.CreateAlignedStore(load, thePtr, Align(julia_alignment(ety)));
tbaa_decorate(tbaa, store);
return mark_julia_type(ctx, strct, true, ety);
}
else {
bool isboxed;
Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
assert(!isboxed);
if (!type_is_ghost(ptrty)) {
Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
return typed_load(ctx, thePtr, nullptr, ety, tbaa_data, nullptr, isboxed, llvm_order, true, nb);
}
else {
if (order > jl_memory_order_monotonic)
ctx.builder.CreateFence(llvm_order);
return ghostValue(ety);
}
}
}

// e[i] = x (set)
// e[i] <= x (swap)
// e[i] y => x (replace)
static jl_cgval_t emit_atomic_pointerset(jl_codectx_t &ctx, intrinsic f, const jl_cgval_t *argv, int nargs)
{
bool issetfield = f == atomic_pointerset;
bool isreplacefield = f == atomic_pointerreplace;
const jl_cgval_t undefval;
const jl_cgval_t &e = argv[0];
const jl_cgval_t &x = isreplacefield ? argv[2] : argv[1];
const jl_cgval_t &y = isreplacefield ? argv[1] : undefval;
const jl_cgval_t &ord = isreplacefield ? argv[3] : argv[2];
const jl_cgval_t &failord = isreplacefield ? argv[4] : undefval;

jl_value_t *aty = e.typ;
if (!jl_is_cpointer_type(aty) || !ord.constant || !jl_is_symbol(ord.constant))
return emit_runtime_call(ctx, f, argv, nargs);
if (isreplacefield) {
if (!failord.constant || !jl_is_symbol(failord.constant))
return emit_runtime_call(ctx, f, argv, nargs);
}
jl_value_t *ety = jl_tparam0(aty);
if (jl_is_typevar(ety))
return emit_runtime_call(ctx, f, argv, nargs);
enum jl_memory_order order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
enum jl_memory_order failorder = isreplacefield ? jl_get_atomic_order((jl_sym_t*)failord.constant, true, false) : order;
if (order == jl_memory_order_invalid || failorder == jl_memory_order_invalid || failorder > order) {
emit_atomic_error(ctx, "invalid atomic ordering");
return jl_cgval_t(); // unreachable
}
AtomicOrdering llvm_order = get_llvm_atomic_order(order);
AtomicOrdering llvm_failorder = get_llvm_atomic_order(failorder);

if (ety == (jl_value_t*)jl_any_type) {
// unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots.
// n.b.: the expected value (y) must be rooted, but not the others
Value *thePtr = emit_unbox(ctx, T_pprjlvalue, e, e.typ);
bool isboxed = true;
jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, tbaa_data, nullptr, nullptr, isboxed,
llvm_order, llvm_failorder, sizeof(jl_value_t*), false, issetfield, isreplacefield, false);
if (issetfield)
ret = e;
return ret;
}

if (!is_valid_intrinsic_elptr(ety)) {
std::string msg(StringRef(jl_intrinsic_name((int)f)));
msg += ": invalid pointer type";
emit_error(ctx, msg);
return jl_cgval_t();
}
emit_typecheck(ctx, x, ety, std::string(jl_intrinsic_name((int)f)));

size_t nb = jl_datatype_size(ety);
if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
std::string msg(StringRef(jl_intrinsic_name((int)f)));
msg += ": invalid pointer for atomic operation";
emit_error(ctx, msg);
return jl_cgval_t();
}

if (!jl_isbits(ety)) {
//Value *thePtr = emit_unbox(ctx, T_pint8, e, e.typ);
//uint64_t size = jl_datatype_size(ety);
return emit_runtime_call(ctx, f, argv, nargs); // TODO: optimizations
}
else {
bool isboxed;
Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
assert(!isboxed);
Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, tbaa_data, nullptr, nullptr, isboxed,
llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, false);
if (issetfield)
ret = e;
return ret;
}
}

static Value *emit_checked_srem_int(jl_codectx_t &ctx, Value *x, Value *den)
{
Type *t = den->getType();
Expand Down Expand Up @@ -924,11 +1080,14 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
case pointerset:
return emit_pointerset(ctx, argv);
case atomic_fence:
return emit_atomicfence(ctx, argv);
case atomic_pointerref:
return emit_atomic_pointerref(ctx, argv);
case atomic_pointerset:
case atomic_pointerswap:
case atomic_pointermodify:
case atomic_pointerreplace:
return emit_atomic_pointerset(ctx, f, argv, nargs);
case atomic_pointermodify:
return emit_runtime_call(ctx, f, argv, nargs);
case bitcast:
return generic_bitcast(ctx, argv);
Expand Down
4 changes: 4 additions & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,10 @@ extern JL_DLLEXPORT jl_value_t *jl_segv_exception;
JL_DLLEXPORT const char *jl_intrinsic_name(int f) JL_NOTSAFEPOINT;
unsigned jl_intrinsic_nargs(int f) JL_NOTSAFEPOINT;

STATIC_INLINE int is_valid_intrinsic_elptr(jl_value_t *ety)
{
return ety == (jl_value_t*)jl_any_type || (jl_is_concrete_type(ety) && !jl_is_layout_opaque(((jl_datatype_t*)ety)->layout));
}
JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v);
JL_DLLEXPORT jl_value_t *jl_pointerref(jl_value_t *p, jl_value_t *i, jl_value_t *align);
JL_DLLEXPORT jl_value_t *jl_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t *align, jl_value_t *i);
Expand Down
Loading

0 comments on commit 1bfd4dc

Please sign in to comment.