Skip to content

Commit dfe6a13

Browse files
authored
precompile: don't waste memory on useless inferred code (#56749)
We never have a reason to reference this data again since we already have native code generated for it, so it is simply wasting memory and download space. $ du -sh {old,new}/usr/share/julia/compiled 256M old 227M new
1 parent e0656ac commit dfe6a13

File tree

3 files changed

+33
-6
lines changed

3 files changed

+33
-6
lines changed

Compiler/src/effects.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ is_inaccessiblemem_or_argmemonly(effects::Effects) = effects.inaccessiblememonly
335335

336336
is_consistent_overlay(effects::Effects) = effects.nonoverlayed === CONSISTENT_OVERLAY
337337

338+
# (sync this with codegen.cpp and staticdata.c effects_foldable functions)
338339
function encode_effects(e::Effects)
339340
return ((e.consistent % UInt32) << 0) |
340341
((e.effect_free % UInt32) << 3) |

src/codegen.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10124,10 +10124,10 @@ jl_llvm_functions_t jl_emit_codeinst(
1012410124
else if (jl_is_method(def) && // don't delete toplevel code
1012510125
def->source != NULL && // don't delete code from optimized opaque closures that can't be reconstructed
1012610126
inferred != jl_nothing && // and there is something to delete (test this before calling jl_ir_inlining_cost)
10127-
!effects_foldable(jl_atomic_load_relaxed(&codeinst->ipo_purity_bits)) && // don't delete code we may want for irinterp
10128-
((jl_ir_inlining_cost(inferred) == UINT16_MAX) || // don't delete inlineable code
10129-
jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) && // unless it is constant
10130-
!(params.imaging_mode || jl_options.incremental)) { // don't delete code when generating a precompile file
10127+
((!effects_foldable(jl_atomic_load_relaxed(&codeinst->ipo_purity_bits)) && // don't delete code we may want for irinterp
10128+
(jl_ir_inlining_cost(inferred) == UINT16_MAX) && // don't delete inlineable code
10129+
!jl_generating_output()) || // don't delete code when generating a precompile file, trading memory in the short term for avoiding likely duplicating inference work for aotcompile
10130+
jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr)) { // unless it is constant (although this shouldn't have had code in the first place)
1013110131
// Never end up in a situation where the codeinst has no invoke, but also no source, so we never fall
1013210132
// through the cracks of SOURCE_MODE_ABI.
1013310133
jl_callptr_t expected = NULL;

src/staticdata.c

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,16 @@ static uintptr_t jl_fptr_id(void *fptr)
769769
return *(uintptr_t*)pbp;
770770
}
771771

772+
static int effects_foldable(uint32_t effects)
773+
{
774+
// N.B.: This needs to be kept in sync with Core.Compiler.is_foldable(effects, true)
775+
return ((effects & 0x7) == 0) && // is_consistent(effects)
776+
(((effects >> 10) & 0x03) == 0) && // is_noub(effects)
777+
(((effects >> 3) & 0x03) == 0) && // is_effect_free(effects)
778+
((effects >> 6) & 0x01); // is_terminates(effects)
779+
}
780+
781+
772782
// `jl_queue_for_serialization` adds items to `serialization_order`
773783
#define jl_queue_for_serialization(s, v) jl_queue_for_serialization_((s), (jl_value_t*)(v), 1, 0)
774784
static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED;
@@ -908,8 +918,24 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
908918
// TODO: if (ci in ci->defs->cache)
909919
record_field_change((jl_value_t**)&ci->next, NULL);
910920
}
911-
if (jl_atomic_load_relaxed(&ci->inferred) && !is_relocatable_ci(&relocatable_ext_cis, ci))
912-
record_field_change((jl_value_t**)&ci->inferred, jl_nothing);
921+
jl_value_t *inferred = jl_atomic_load_relaxed(&ci->inferred);
922+
if (inferred && inferred != jl_nothing) { // disregard if there is nothing here to delete (e.g. builtins, unspecialized)
923+
if (!is_relocatable_ci(&relocatable_ext_cis, ci))
924+
record_field_change((jl_value_t**)&ci->inferred, jl_nothing);
925+
else if (jl_is_method(ci->def->def.method) && // don't delete toplevel code
926+
ci->def->def.method->source) { // don't delete code from optimized opaque closures that can't be reconstructed (and builtins)
927+
if (jl_atomic_load_relaxed(&ci->max_world) != ~(size_t)0 || // delete all code that cannot run
928+
jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) { // delete all code that just returns a constant
929+
record_field_change((jl_value_t**)&ci->inferred, jl_nothing);
930+
}
931+
else if (native_functions && // don't delete any code if making a ji file
932+
!effects_foldable(jl_atomic_load_relaxed(&ci->ipo_purity_bits)) && // don't delete code we may want for irinterp
933+
jl_ir_inlining_cost(inferred) == UINT16_MAX) { // don't delete inlineable code
934+
// delete the code now: if we thought it was worth keeping, it would have been converted to object code
935+
record_field_change((jl_value_t**)&ci->inferred, jl_nothing);
936+
}
937+
}
938+
}
913939
}
914940

915941
if (immediate) // must be things that can be recursively handled, and valid as type parameters

0 commit comments

Comments
 (0)