Skip to content

Commit 5239727

Browse files
committed
staticdata: Some refactors for clarity
I was reading this code as part of looking into #52797. To recap, when we serialize objects for package images, we classify each method as either internal or external, depending on whether the method extends a function in the ambient set of modules ( system image, other previously loaded package images, etc) or whether it is private to the module we're currently serializing (in which case we call it internal). One of my primary confusions in reading this code was that the `new_specializations` object holds only external code instances in most of the code, but towards the end of loading, we used to also add any internal code instances that had dependencies on other external method instances in order to share the code instance validation code between the two cases. I don't think this design is that great. Conceptually, internal CodeInstances are already in the CI cache at the point that validation happens (their max_world is just set to 1, so they're not eligible to run). We do guard the cache insertion by a check whether a code instance already exists, which also catches this case, but I think it's cleaner to just not add any internal code instances to the list and instead simply re-activate the code instance in situ. Another issue with the old code that I didn't observe, but I think is possible in theory is that any CodeInstances that are not part of the cache hierarchy (e.g. because they were created by external abstract interpreters) should not accidentally get added to the cache hierarchy by this code path. I think this was possible in the old code, but I didn't observe it. With this change, `new_specializations` now always only holds external cis, so rename it appropriately. While we're at it, also do some other clarity changes.
1 parent 2afc20c commit 5239727

File tree

4 files changed

+97
-81
lines changed

4 files changed

+97
-81
lines changed

src/precompile_utils.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ static void *jl_precompile(int all)
279279
return native_code;
280280
}
281281

282-
static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_methods, jl_array_t *new_specializations)
282+
static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_methods, jl_array_t *new_ext_cis)
283283
{
284284
if (!worklist)
285285
return NULL;
@@ -310,9 +310,9 @@ static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_met
310310
}
311311
}
312312
}
313-
n = jl_array_nrows(new_specializations);
313+
n = jl_array_nrows(new_ext_cis);
314314
for (i = 0; i < n; i++) {
315-
jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_specializations, i);
315+
jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_ext_cis, i);
316316
precompile_enq_specialization_(ci->def, m);
317317
}
318318
void *native_code = jl_precompile_(m, 1);

src/staticdata.c

+50-48
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ External links:
8888
#include "valgrind.h"
8989
#include "julia_assert.h"
9090

91+
static const size_t WORLD_AGE_REVALIDATION_SENTINEL = 0x1;
92+
9193
#include "staticdata_utils.c"
9294
#include "precompile_utils.c"
9395

@@ -501,6 +503,8 @@ typedef struct {
501503
jl_array_t *link_ids_gvars;
502504
jl_array_t *link_ids_external_fnvars;
503505
jl_ptls_t ptls;
506+
// Set (implemented has a hasmap of MethodInstances to themselves) of which MethodInstances have (forward) edges
507+
// to other MethodInstances.
504508
htable_t callers_with_edges;
505509
jl_image_t *image;
506510
int8_t incremental;
@@ -1596,45 +1600,45 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
15961600
}
15971601
else if (jl_is_code_instance(v)) {
15981602
assert(f == s->s);
1603+
15991604
// Handle the native-code pointers
1600-
assert(f == s->s);
1601-
jl_code_instance_t *m = (jl_code_instance_t*)v;
1602-
jl_code_instance_t *newm = (jl_code_instance_t*)&f->buf[reloc_offset];
1605+
jl_code_instance_t *ci = (jl_code_instance_t*)v;
1606+
jl_code_instance_t *newci = (jl_code_instance_t*)&f->buf[reloc_offset];
16031607

16041608
if (s->incremental) {
16051609
arraylist_push(&s->fixup_objs, (void*)reloc_offset);
1606-
if (m->min_world > 1)
1607-
newm->min_world = ~(size_t)0; // checks that we reprocess this upon deserialization
1608-
if (m->max_world != ~(size_t)0)
1609-
newm->max_world = 0;
1610+
if (ci->min_world > 1)
1611+
newci->min_world = ~(size_t)0; // checks that we reprocess this upon deserialization
1612+
if (ci->max_world != ~(size_t)0)
1613+
newci->max_world = 0;
16101614
else {
1611-
if (jl_atomic_load_relaxed(&m->inferred) && ptrhash_has(&s->callers_with_edges, m->def))
1612-
newm->max_world = 1; // sentinel value indicating this will need validation
1613-
if (m->min_world > 0 && jl_atomic_load_relaxed(&m->inferred) ) {
1615+
if (jl_atomic_load_relaxed(&ci->inferred) && ptrhash_has(&s->callers_with_edges, ci->def))
1616+
newci->max_world = WORLD_AGE_REVALIDATION_SENTINEL;
1617+
if (ci->min_world > 0 && jl_atomic_load_relaxed(&ci->inferred) ) {
16141618
// TODO: also check if this object is part of the codeinst cache
16151619
// will check on deserialize if this cache entry is still valid
16161620
}
16171621
}
16181622
}
1619-
jl_atomic_store_relaxed(&newm->invoke, NULL);
1620-
jl_atomic_store_relaxed(&newm->specsigflags, 0);
1621-
jl_atomic_store_relaxed(&newm->specptr.fptr, NULL);
1623+
jl_atomic_store_relaxed(&newci->invoke, NULL);
1624+
jl_atomic_store_relaxed(&newci->specsigflags, 0);
1625+
jl_atomic_store_relaxed(&newci->specptr.fptr, NULL);
16221626
int8_t fptr_id = JL_API_NULL;
16231627
int8_t builtin_id = 0;
1624-
if (jl_atomic_load_relaxed(&m->invoke) == jl_fptr_const_return) {
1628+
if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) {
16251629
fptr_id = JL_API_CONST;
16261630
}
16271631
else {
1628-
if (jl_is_method(m->def->def.method)) {
1629-
builtin_id = jl_fptr_id(jl_atomic_load_relaxed(&m->specptr.fptr));
1632+
if (jl_is_method(ci->def->def.method)) {
1633+
builtin_id = jl_fptr_id(jl_atomic_load_relaxed(&ci->specptr.fptr));
16301634
if (builtin_id) { // found in the table of builtins
16311635
assert(builtin_id >= 2);
16321636
fptr_id = JL_API_BUILTIN;
16331637
}
16341638
else {
16351639
int32_t invokeptr_id = 0;
16361640
int32_t specfptr_id = 0;
1637-
jl_get_function_id(native_functions, m, &invokeptr_id, &specfptr_id); // see if we generated code for it
1641+
jl_get_function_id(native_functions, ci, &invokeptr_id, &specfptr_id); // see if we generated code for it
16381642
if (invokeptr_id) {
16391643
if (invokeptr_id == -1) {
16401644
fptr_id = JL_API_BOXED;
@@ -1666,7 +1670,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
16661670
}
16671671
}
16681672
}
1669-
jl_atomic_store_relaxed(&newm->invoke, NULL); // relocation offset
1673+
jl_atomic_store_relaxed(&newci->invoke, NULL); // relocation offset
16701674
if (fptr_id != JL_API_NULL) {
16711675
assert(fptr_id < BuiltinFunctionTag && "too many functions to serialize");
16721676
arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_code_instance_t, invoke))); // relocation location
@@ -2514,7 +2518,7 @@ JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert)
25142518
}
25152519

25162520
static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *newly_inferred, uint64_t worklist_key,
2517-
/* outputs */ jl_array_t **extext_methods, jl_array_t **new_specializations,
2521+
/* outputs */ jl_array_t **extext_methods, jl_array_t **new_ext_cis,
25182522
jl_array_t **method_roots_list, jl_array_t **ext_targets, jl_array_t **edges)
25192523
{
25202524
// extext_methods: [method1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist
@@ -2526,7 +2530,7 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
25262530
assert(edges_map == NULL);
25272531

25282532
// Save the inferred code from newly inferred, external methods
2529-
*new_specializations = queue_external_cis(newly_inferred);
2533+
*new_ext_cis = queue_external_cis(newly_inferred);
25302534

25312535
// Collect method extensions and edges data
25322536
JL_GC_PUSH1(&edges_map);
@@ -2552,9 +2556,9 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
25522556
*ext_targets = jl_alloc_vec_any(0);
25532557
*edges = jl_alloc_vec_any(0);
25542558
*method_roots_list = jl_alloc_vec_any(0);
2555-
// Collect the new method roots
2556-
jl_collect_new_roots(*method_roots_list, *new_specializations, worklist_key);
2557-
jl_collect_edges(*edges, *ext_targets, *new_specializations, world);
2559+
// Collect the new method roots for external specializations
2560+
jl_collect_new_roots(*method_roots_list, *new_ext_cis, worklist_key);
2561+
jl_collect_edges(*edges, *ext_targets, *new_ext_cis, world);
25582562
}
25592563
assert(edges_map == NULL); // jl_collect_edges clears this when done
25602564

@@ -2564,7 +2568,7 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
25642568
// In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage
25652569
static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
25662570
jl_array_t *worklist, jl_array_t *extext_methods,
2567-
jl_array_t *new_specializations, jl_array_t *method_roots_list,
2571+
jl_array_t *new_ext_cis, jl_array_t *method_roots_list,
25682572
jl_array_t *ext_targets, jl_array_t *edges) JL_GC_DISABLED
25692573
{
25702574
htable_new(&field_replace, 0);
@@ -2680,7 +2684,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
26802684
// Queue method extensions
26812685
jl_queue_for_serialization(&s, extext_methods);
26822686
// Queue the new specializations
2683-
jl_queue_for_serialization(&s, new_specializations);
2687+
jl_queue_for_serialization(&s, new_ext_cis);
26842688
// Queue the new roots
26852689
jl_queue_for_serialization(&s, method_roots_list);
26862690
// Queue the edges
@@ -2836,7 +2840,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
28362840
}
28372841
jl_write_value(&s, jl_module_init_order);
28382842
jl_write_value(&s, extext_methods);
2839-
jl_write_value(&s, new_specializations);
2843+
jl_write_value(&s, new_ext_cis);
28402844
jl_write_value(&s, method_roots_list);
28412845
jl_write_value(&s, ext_targets);
28422846
jl_write_value(&s, edges);
@@ -2924,24 +2928,24 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
29242928
ff = f;
29252929
}
29262930

2927-
jl_array_t *mod_array = NULL, *extext_methods = NULL, *new_specializations = NULL;
2931+
jl_array_t *mod_array = NULL, *extext_methods = NULL, *new_ext_cis = NULL;
29282932
jl_array_t *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
29292933
int64_t checksumpos = 0;
29302934
int64_t checksumpos_ff = 0;
29312935
int64_t datastartpos = 0;
2932-
JL_GC_PUSH6(&mod_array, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges);
2936+
JL_GC_PUSH6(&mod_array, &extext_methods, &new_ext_cis, &method_roots_list, &ext_targets, &edges);
29332937

29342938
if (worklist) {
29352939
mod_array = jl_get_loaded_modules(); // __toplevel__ modules loaded in this session (from Base.loaded_modules_array)
29362940
// Generate _native_data`
29372941
if (_native_data != NULL) {
29382942
jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
2939-
&extext_methods, &new_specializations, NULL, NULL, NULL);
2943+
&extext_methods, &new_ext_cis, NULL, NULL, NULL);
29402944
jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1);
2941-
*_native_data = jl_precompile_worklist(worklist, extext_methods, new_specializations);
2945+
*_native_data = jl_precompile_worklist(worklist, extext_methods, new_ext_cis);
29422946
jl_precompile_toplevel_module = NULL;
29432947
extext_methods = NULL;
2944-
new_specializations = NULL;
2948+
new_ext_cis = NULL;
29452949
}
29462950
jl_write_header_for_incremental(f, worklist, mod_array, udeps, srctextpos, &checksumpos);
29472951
if (emit_split) {
@@ -2964,7 +2968,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
29642968
ct->reentrant_timing |= 0b1000;
29652969
if (worklist) {
29662970
jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
2967-
&extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges);
2971+
&extext_methods, &new_ext_cis, &method_roots_list, &ext_targets, &edges);
29682972
if (!emit_split) {
29692973
write_int32(f, 0); // No clone_targets
29702974
write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f));
@@ -2976,7 +2980,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
29762980
}
29772981
if (_native_data != NULL)
29782982
native_functions = *_native_data;
2979-
jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges);
2983+
jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_ext_cis, method_roots_list, ext_targets, edges);
29802984
if (_native_data != NULL)
29812985
native_functions = NULL;
29822986
// make sure we don't run any Julia code concurrently before this point
@@ -3058,7 +3062,7 @@ extern void export_jl_small_typeof(void);
30583062
static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum,
30593063
/* outputs */ jl_array_t **restored, jl_array_t **init_order,
30603064
jl_array_t **extext_methods,
3061-
jl_array_t **new_specializations, jl_array_t **method_roots_list,
3065+
jl_array_t **new_ext_cis, jl_array_t **method_roots_list,
30623066
jl_array_t **ext_targets, jl_array_t **edges,
30633067
char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED
30643068
{
@@ -3120,7 +3124,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
31203124
ios_seek(f, LLT_ALIGN(ios_pos(f), 8));
31213125
assert(!ios_eof(f));
31223126
s.s = f;
3123-
uintptr_t offset_restored = 0, offset_init_order = 0, offset_extext_methods = 0, offset_new_specializations = 0, offset_method_roots_list = 0;
3127+
uintptr_t offset_restored = 0, offset_init_order = 0, offset_extext_methods = 0, offset_new_ext_cis = 0, offset_method_roots_list = 0;
31243128
uintptr_t offset_ext_targets = 0, offset_edges = 0;
31253129
if (!s.incremental) {
31263130
size_t i;
@@ -3152,7 +3156,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
31523156
offset_restored = jl_read_offset(&s);
31533157
offset_init_order = jl_read_offset(&s);
31543158
offset_extext_methods = jl_read_offset(&s);
3155-
offset_new_specializations = jl_read_offset(&s);
3159+
offset_new_ext_cis = jl_read_offset(&s);
31563160
offset_method_roots_list = jl_read_offset(&s);
31573161
offset_ext_targets = jl_read_offset(&s);
31583162
offset_edges = jl_read_offset(&s);
@@ -3181,16 +3185,14 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
31813185
uint32_t external_fns_begin = read_uint32(f);
31823186
jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list);
31833187
if (s.incremental) {
3184-
assert(restored && init_order && extext_methods && new_specializations && method_roots_list && ext_targets && edges);
3188+
assert(restored && init_order && extext_methods && new_ext_cis && method_roots_list && ext_targets && edges);
31853189
*restored = (jl_array_t*)jl_delayed_reloc(&s, offset_restored);
31863190
*init_order = (jl_array_t*)jl_delayed_reloc(&s, offset_init_order);
31873191
*extext_methods = (jl_array_t*)jl_delayed_reloc(&s, offset_extext_methods);
3188-
*new_specializations = (jl_array_t*)jl_delayed_reloc(&s, offset_new_specializations);
3192+
*new_ext_cis = (jl_array_t*)jl_delayed_reloc(&s, offset_new_ext_cis);
31893193
*method_roots_list = (jl_array_t*)jl_delayed_reloc(&s, offset_method_roots_list);
31903194
*ext_targets = (jl_array_t*)jl_delayed_reloc(&s, offset_ext_targets);
31913195
*edges = (jl_array_t*)jl_delayed_reloc(&s, offset_edges);
3192-
if (!*new_specializations)
3193-
*new_specializations = jl_alloc_vec_any(0);
31943196
}
31953197
s.s = NULL;
31963198

@@ -3454,8 +3456,6 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
34543456
jl_code_instance_t *ci = (jl_code_instance_t*)obj;
34553457
assert(s.incremental);
34563458
ci->min_world = world;
3457-
if (ci->max_world != 0)
3458-
jl_array_ptr_1d_push(*new_specializations, (jl_value_t*)ci);
34593459
}
34603460
else if (jl_is_globalref(obj)) {
34613461
continue; // wait until all the module binding tables have been initialized
@@ -3601,11 +3601,11 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i
36013601
assert(datastartpos > 0 && datastartpos < dataendpos);
36023602
needs_permalloc = jl_options.permalloc_pkgimg || needs_permalloc;
36033603
jl_value_t *restored = NULL;
3604-
jl_array_t *init_order = NULL, *extext_methods = NULL, *new_specializations = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
3604+
jl_array_t *init_order = NULL, *extext_methods = NULL, *new_ext_cis = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
36053605
jl_svec_t *cachesizes_sv = NULL;
36063606
char *base;
36073607
arraylist_t ccallable_list;
3608-
JL_GC_PUSH8(&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &cachesizes_sv);
3608+
JL_GC_PUSH8(&restored, &init_order, &extext_methods, &new_ext_cis, &method_roots_list, &ext_targets, &edges, &cachesizes_sv);
36093609

36103610
{ // make a permanent in-memory copy of f (excluding the header)
36113611
ios_bufmode(f, bm_none);
@@ -3629,17 +3629,18 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i
36293629
ios_close(f);
36303630
ios_static_buffer(f, sysimg, len);
36313631
pkgcachesizes cachesizes;
3632-
jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &base, &ccallable_list, &cachesizes);
3632+
jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_ext_cis, &method_roots_list,
3633+
&ext_targets, &edges, &base, &ccallable_list, &cachesizes);
36333634
JL_SIGATOMIC_END();
36343635

36353636
// Insert method extensions
36363637
jl_insert_methods(extext_methods);
3637-
// No special processing of `new_specializations` is required because recaching handled it
3638+
// No special processing of `new_ext_cis` is required because recaching handled it
36383639
// Add roots to methods
36393640
jl_copy_roots(method_roots_list, jl_worklist_key((jl_array_t*)restored));
36403641
// Handle edges
36413642
size_t world = jl_atomic_load_acquire(&jl_world_counter);
3642-
jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)new_specializations, world); // restore external backedges (needs to be last)
3643+
jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)new_ext_cis, world); // restore external backedges (needs to be last)
36433644
// reinit ccallables
36443645
jl_reinit_ccallable(&ccallable_list, base, pkgimage_handle);
36453646
arraylist_free(&ccallable_list);
@@ -3653,7 +3654,8 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i
36533654
jl_svecset(cachesizes_sv, 4, jl_box_long(cachesizes.reloclist));
36543655
jl_svecset(cachesizes_sv, 5, jl_box_long(cachesizes.gvarlist));
36553656
jl_svecset(cachesizes_sv, 6, jl_box_long(cachesizes.fptrlist));
3656-
restored = (jl_value_t*)jl_svec(8, restored, init_order, extext_methods, new_specializations, method_roots_list,
3657+
restored = (jl_value_t*)jl_svec(8, restored, init_order, extext_methods,
3658+
new_ext_cis ? (jl_value_t*)new_ext_cis : jl_nothing, method_roots_list,
36573659
ext_targets, edges, cachesizes_sv);
36583660
}
36593661
else {

0 commit comments

Comments
 (0)