Skip to content

Commit b43d3e0

Browse files
committed
Encode link_id in tagged linkage
On 64-bit, we have enough space to encode (1) the tag, (2) the `depmods` index, and (3) the offset all in a single 64-bit pointer field. This means we don't need the external `link_id` arrays, which reduces the size of many pkgimages by ~5%. On 32-bit, we don't have enough bits to implement this strategy. However, most linkages seem to be against the sysimage, and so by giving that a separate tag we can achieve similar compression because the `link_id` lists will be much shorter.
1 parent 4dad6d3 commit b43d3e0

File tree

2 files changed

+145
-25
lines changed

2 files changed

+145
-25
lines changed

src/staticdata.c

Lines changed: 92 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,12 @@ typedef struct {
356356
arraylist_t fixup_types; // a list of locations of types requiring (re)caching
357357
arraylist_t fixup_objs; // a list of locations of objects requiring (re)caching
358358
arraylist_t ccallable_list; // @ccallable entry points to install
359+
#ifdef _P64
360+
// On 64-bit, we can encode the "`depmods` index" in the reftag, so all we need is a mapping between
361+
// the buildid_idx & depmods_idx:
362+
jl_array_t *buildid_depmods_idxs;
363+
// On 64bit, all the `link_ids_*` arrays below will be NULL.
364+
#endif
359365
// record of build_ids for all external linkages, in order of serialization for the current sysimg/pkgimg
360366
// conceptually, the base pointer for the jth externally-linked item is determined from
361367
// i = findfirst(==(link_ids[j]), jl_build_ids)
@@ -386,7 +392,8 @@ enum RefTags {
386392
SymbolRef, // symbols
387393
FunctionRef, // generic functions
388394
BuiltinFunctionRef, // builtin functions
389-
ExternalLinkage // items defined externally (used when serializing packages)
395+
SysimageLinkage, // reference to the sysimage (from pkgimage)
396+
ExternalLinkage // reference to some other pkgimage
390397
};
391398

392399
// calling conventions for internal entry points.
@@ -404,9 +411,11 @@ typedef enum {
404411

405412
#ifdef _P64
406413
#define RELOC_TAG_OFFSET 61
414+
#define DEPS_IDX_OFFSET 40 // only on 64-bit can we encode the dependency-index as part of the tagged reloc
407415
#else
408416
// this supports up to 8 RefTags, 512MB of pointer data, and 4/2 (64/32-bit) GB of constant data.
409417
#define RELOC_TAG_OFFSET 29
418+
#define DEPS_IDX_OFFSET RELOC_TAG_OFFSET
410419
#endif
411420

412421
#if RELOC_TAG_OFFSET <= 32
@@ -758,7 +767,7 @@ done_fields: ;
758767
}
759768
arraylist_push(&serialization_queue, (void*) v);
760769
size_t idx = serialization_queue.len - 1;
761-
assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize");
770+
assert(serialization_queue.len < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "too many items to serialize");
762771

763772
*bp = (void*)((char*)HT_NOTFOUND + 1 + idx);
764773
}
@@ -863,29 +872,40 @@ static void write_pointer(ios_t *s) JL_NOTSAFEPOINT
863872
static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) {
864873
size_t i = external_blob_index(v);
865874
if (i < n_linkage_blobs()) {
875+
// We found the sysimg/pkg that this item links against
876+
// Compute the relocation code
877+
size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i];
878+
offset /= sizeof(void*);
879+
assert(offset < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to external image too large");
880+
881+
if (i == 0)
882+
return ((uintptr_t)SysimageLinkage << RELOC_TAG_OFFSET) + offset; // sysimage
866883
assert(link_ids && jl_is_array(link_ids));
884+
#ifdef _P64
885+
uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids);
886+
return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET)
887+
+ (((uintptr_t)link_id_data[i]) << DEPS_IDX_OFFSET) + offset; // on 64-bit, link_ids stores the mapping from build_id_idx -> deps_idx
888+
#else
889+
// On 32bit, we store the image key in `link_ids`
867890
assert(jl_build_ids && jl_is_array(jl_build_ids));
868891
uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids);
869-
// We found the sysimg/pkg that this item links against
870-
// Store the image key in `link_ids`
871892
jl_array_grow_end(link_ids, 1);
872-
uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids);
893+
uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids); // wait until after the `grow`
873894
link_id_data[jl_array_len(link_ids)-1] = build_id_data[i];
874-
// Compute the relocation code
875-
size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i];
876-
offset /= sizeof(void*);
877-
assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to external image too large");
878-
// jl_printf(JL_STDOUT, "External link %ld against blob %d with key %ld at position 0x%lx with offset 0x%lx to \n", jl_array_len(link_ids), i, build_id_data[i>>1], ios_pos(s->s), offset);
879-
// jl_(v);
880895
return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET) + offset;
896+
#endif
881897
}
882898
return 0;
883899
}
884900

885901
// Return the integer `id` for `v`. Generically this is looked up in `serialization_order`,
886902
// but symbols, small integers, and a couple of special items (`nothing` and the root Task)
887903
// have special handling.
904+
#ifdef _P64
905+
#define backref_id(s, v, link_ids) _backref_id(s, (jl_value_t*)(v), s->buildid_depmods_idxs)
906+
#else
888907
#define backref_id(s, v, link_ids) _backref_id(s, (jl_value_t*)(v), link_ids)
908+
#endif
889909
static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) JL_NOTSAFEPOINT
890910
{
891911
assert(v != NULL && "cannot get backref to NULL object");
@@ -898,7 +918,7 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *
898918
write_uint32(s->symbols, l);
899919
ios_write(s->symbols, jl_symbol_name((jl_sym_t*)v), l + 1);
900920
size_t offset = ++nsym_tag;
901-
assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many symbols");
921+
assert(offset < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "too many symbols");
902922
idx = (void*)((char*)HT_NOTFOUND + ((uintptr_t)SymbolRef << RELOC_TAG_OFFSET) + offset);
903923
*pidx = idx;
904924
}
@@ -1143,7 +1163,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
11431163
// write data and relocations
11441164
newa->data = NULL; // relocation offset
11451165
data /= sizeof(void*);
1146-
assert(data < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to constant data too large");
1166+
assert(data < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to constant data too large");
11471167
arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
11481168
arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
11491169
if (jl_is_cpointer_type(et)) {
@@ -1246,7 +1266,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
12461266
uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), 8);
12471267
write_padding(s->const_data, data - ios_pos(s->const_data));
12481268
data /= sizeof(void*);
1249-
assert(data < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to constant data too large");
1269+
assert(data < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to constant data too large");
12501270
arraylist_push(&s->relocs_list, (void*)(reloc_offset + 8)); // relocation location
12511271
arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
12521272
void *pdata = jl_unbox_voidpointer(jl_get_nth_field(v, 2));
@@ -1533,6 +1553,8 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
15331553
case FunctionRef:
15341554
assert(offset < JL_API_MAX && "unknown function pointer id");
15351555
break;
1556+
case SysimageLinkage:
1557+
break;
15361558
case ExternalLinkage:
15371559
break;
15381560
default:
@@ -1599,15 +1621,25 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
15991621
//default:
16001622
assert("corrupt relocation item id");
16011623
}
1624+
case SysimageLinkage:
1625+
assert(jl_linkage_blobs.len > 0);
1626+
return (uintptr_t)jl_linkage_blobs.items[0] + offset*sizeof(void*);
16021627
case ExternalLinkage:
16031628
assert(link_ids);
1604-
assert(link_index);
16051629
assert(jl_build_ids);
16061630
uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids);
16071631
uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids);
1632+
#ifdef _P64
1633+
size_t depsidx = offset >> DEPS_IDX_OFFSET;
1634+
offset &= ((size_t)1 << DEPS_IDX_OFFSET) - 1;
1635+
assert(depsidx < jl_array_len(link_ids)); // on 64-bit, here link_ids is a build_id lookup by depmod index
1636+
uint64_t build_id = link_id_data[depsidx];
1637+
#else
1638+
assert(link_index);
16081639
assert(0 <= *link_index && *link_index < jl_array_len(link_ids));
16091640
uint64_t build_id = link_id_data[*link_index];
16101641
*link_index += 1;
1642+
#endif
16111643
size_t i = 0, nids = jl_array_len(jl_build_ids);
16121644
while (i < nids) {
16131645
if (build_id == build_id_data[i])
@@ -1799,8 +1831,12 @@ static jl_value_t *jl_delayed_reloc(jl_serializer_state *s, uintptr_t offset) JL
17991831
uintptr_t base = (uintptr_t)&s->s->buf[0];
18001832
size_t size = s->s->size;
18011833
int link_index = 0;
1834+
#ifdef _P64
1835+
jl_value_t *ret = (jl_value_t*)get_item_for_reloc(s, base, size, offset, s->buildid_depmods_idxs, &link_index);
1836+
#else
18021837
jl_value_t *ret = (jl_value_t*)get_item_for_reloc(s, base, size, offset, s->link_ids_relocs, &link_index);
18031838
assert(link_index < jl_array_len(s->link_ids_relocs));
1839+
#endif
18041840
return ret;
18051841
}
18061842

@@ -1890,13 +1926,24 @@ static void jl_update_all_gvars(jl_serializer_state *s, jl_image_t *image, uint3
18901926
reloc_t *gvars = (reloc_t*)&s->gvar_record->buf[0];
18911927
int gvar_link_index = 0;
18921928
int external_fns_link_index = 0;
1929+
jl_array_t *link_ids = NULL;
18931930
for (i = 0; i < l; i++) {
18941931
uintptr_t offset = gvars[i];
18951932
uintptr_t v = 0;
18961933
if (i < external_fns_begin) {
1897-
v = get_item_for_reloc(s, base, size, offset, s->link_ids_gvars, &gvar_link_index);
1934+
#ifdef _P64
1935+
link_ids = s->buildid_depmods_idxs;
1936+
#else
1937+
link_ids = s->link_ids_gvars;
1938+
#endif
1939+
v = get_item_for_reloc(s, base, size, offset, link_ids, &gvar_link_index);
18981940
} else {
1899-
v = get_item_for_reloc(s, base, size, offset, s->link_ids_external_fnvars, &external_fns_link_index);
1941+
#ifdef _P64
1942+
link_ids = s->buildid_depmods_idxs;
1943+
#else
1944+
link_ids = s->link_ids_external_fnvars;
1945+
#endif
1946+
v = get_item_for_reloc(s, base, size, offset, link_ids, &external_fns_link_index);
19001947
}
19011948
uintptr_t *gv = sysimg_gvars(image->gvars_base, image->gvars_offsets, i);
19021949
*gv = v;
@@ -2221,7 +2268,7 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
22212268
}
22222269

22232270
// In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage
2224-
static void jl_save_system_image_to_stream(ios_t *f,
2271+
static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *udeps,
22252272
jl_array_t *worklist, jl_array_t *extext_methods,
22262273
jl_array_t *new_specializations, jl_array_t *method_roots_list,
22272274
jl_array_t *ext_targets, jl_array_t *edges) JL_GC_DISABLED
@@ -2269,10 +2316,15 @@ static void jl_save_system_image_to_stream(ios_t *f,
22692316
arraylist_new(&s.fixup_types, 0);
22702317
arraylist_new(&s.fixup_objs, 0);
22712318
arraylist_new(&s.ccallable_list, 0);
2319+
#ifdef _P64
2320+
s.buildid_depmods_idxs = image_to_depmodidx(jl_build_ids, udeps);
2321+
s.link_ids_relocs = s.link_ids_gctags = s.link_ids_gvars = s.link_ids_external_fnvars = NULL;
2322+
#else
22722323
s.link_ids_relocs = jl_alloc_array_1d(jl_array_uint64_type, 0);
22732324
s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, 0);
22742325
s.link_ids_gvars = jl_alloc_array_1d(jl_array_uint64_type, 0);
22752326
s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_uint64_type, 0);
2327+
#endif
22762328
htable_new(&s.callers_with_edges, 0);
22772329
jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL;
22782330

@@ -2371,21 +2423,21 @@ static void jl_save_system_image_to_stream(ios_t *f,
23712423
jl_write_relocations(&s);
23722424
}
23732425

2374-
if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
2426+
if (sysimg.size > ((uintptr_t)1 << DEPS_IDX_OFFSET)) {
23752427
jl_printf(
23762428
JL_STDERR,
23772429
"ERROR: system image too large: sysimg.size is %jd but the limit is %" PRIxPTR "\n",
23782430
(intmax_t)sysimg.size,
2379-
((uintptr_t)1 << RELOC_TAG_OFFSET)
2431+
((uintptr_t)1 << DEPS_IDX_OFFSET)
23802432
);
23812433
jl_exit(1);
23822434
}
2383-
if (const_data.size / sizeof(void*) > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
2435+
if (const_data.size / sizeof(void*) > ((uintptr_t)1 << DEPS_IDX_OFFSET)) {
23842436
jl_printf(
23852437
JL_STDERR,
23862438
"ERROR: system image too large: const_data.size is %jd but the limit is %" PRIxPTR "\n",
23872439
(intmax_t)const_data.size,
2388-
((uintptr_t)1 << RELOC_TAG_OFFSET)*sizeof(void*)
2440+
((uintptr_t)1 << DEPS_IDX_OFFSET)*sizeof(void*)
23892441
);
23902442
jl_exit(1);
23912443
}
@@ -2461,6 +2513,7 @@ static void jl_save_system_image_to_stream(ios_t *f,
24612513
jl_write_value(&s, ext_targets);
24622514
jl_write_value(&s, edges);
24632515
}
2516+
#ifndef _P64
24642517
write_uint32(f, jl_array_len(s.link_ids_gctags));
24652518
ios_write(f, (char*)jl_array_data(s.link_ids_gctags), jl_array_len(s.link_ids_gctags)*sizeof(uint64_t));
24662519
write_uint32(f, jl_array_len(s.link_ids_relocs));
@@ -2469,6 +2522,7 @@ static void jl_save_system_image_to_stream(ios_t *f,
24692522
ios_write(f, (char*)jl_array_data(s.link_ids_gvars), jl_array_len(s.link_ids_gvars)*sizeof(uint64_t));
24702523
write_uint32(f, jl_array_len(s.link_ids_external_fnvars));
24712524
ios_write(f, (char*)jl_array_data(s.link_ids_external_fnvars), jl_array_len(s.link_ids_external_fnvars)*sizeof(uint64_t));
2525+
#endif
24722526
write_uint32(f, external_fns_begin);
24732527
jl_write_arraylist(s.s, &s.ccallable_list);
24742528
}
@@ -2585,7 +2639,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
25852639
*_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL);
25862640
}
25872641
native_functions = *_native_data;
2588-
jl_save_system_image_to_stream(ff, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges);
2642+
jl_save_system_image_to_stream(ff, *udeps, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges);
25892643
native_functions = NULL;
25902644
if (worklist) {
25912645
// Re-enable running julia code for postoutput hooks, atexit, etc.
@@ -2758,6 +2812,9 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
27582812
offset_ext_targets = jl_read_offset(&s);
27592813
offset_edges = jl_read_offset(&s);
27602814
}
2815+
#ifdef _P64
2816+
s.buildid_depmods_idxs = depmod_to_imageidx(depmods);
2817+
#else
27612818
size_t nlinks_gctags = read_uint32(f);
27622819
if (nlinks_gctags > 0) {
27632820
s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, nlinks_gctags);
@@ -2778,6 +2835,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
27782835
s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_uint64_type, nlinks_external_fnvars);
27792836
ios_read(f, (char*)jl_array_data(s.link_ids_external_fnvars), nlinks_external_fnvars * sizeof(uint64_t));
27802837
}
2838+
#endif
27812839
uint32_t external_fns_begin = read_uint32(f);
27822840
jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list);
27832841
if (s.incremental) {
@@ -2804,10 +2862,19 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
28042862
*base = image_base;
28052863

28062864
s.s = &sysimg;
2807-
jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD); // gctags
2865+
#ifdef _P64
2866+
jl_array_t *link_ids = NULL;
2867+
#else
2868+
jl_array_t *link_ids = s.link_ids_gctags;
2869+
#endif
2870+
jl_read_reloclist(&s, link_ids, GC_OLD); // gctags
28082871
size_t sizeof_tags = ios_pos(&relocs);
28092872
(void)sizeof_tags;
2810-
jl_read_reloclist(&s, s.link_ids_relocs, 0); // general relocs
2873+
#ifdef _P64
2874+
#else
2875+
link_ids = s.link_ids_relocs;
2876+
#endif
2877+
jl_read_reloclist(&s, link_ids, 0); // general relocs
28112878
// s.link_ids_gvars will be processed in `jl_update_all_gvars`
28122879
// s.link_ids_external_fns will be processed in `jl_update_all_gvars`
28132880
jl_update_all_gvars(&s, image, external_fns_begin); // gvars relocs

src/staticdata_utils.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,3 +1270,56 @@ JL_DLLEXPORT uint64_t jl_read_verify_header(ios_t *s, uint8_t *pkgimage, int64_t
12701270
}
12711271
return checksum;
12721272
}
1273+
1274+
// Returns `depmodidxs` where `udeps[depmodidxs[i]]` corresponds to `build_ids[i]`
1275+
static jl_array_t *image_to_depmodidx(jl_array_t *build_ids, jl_array_t *udeps)
1276+
{
1277+
if (!udeps || !build_ids)
1278+
return NULL;
1279+
size_t j = 0, lbids = jl_array_len(build_ids), ldeps = jl_array_len(udeps);
1280+
uint64_t *bids = (uint64_t*)jl_array_data(build_ids);
1281+
jl_array_t *depmodidxs = jl_alloc_array_1d(jl_array_int32_type, lbids);
1282+
int32_t *dmidxs = (int32_t*)jl_array_data(depmodidxs);
1283+
for (size_t i = 0; i < lbids; i++) {
1284+
dmidxs[i] = -1;
1285+
uint64_t bid = bids[i];
1286+
j = 0; // sad that this is of O(M*N)
1287+
while (j < ldeps) {
1288+
jl_value_t *deptuple = jl_array_ptr_ref(udeps, j);
1289+
jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0); // evaluating module
1290+
jl_module_t *depmod_top = depmod;
1291+
while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top)
1292+
depmod_top = depmod_top->parent;
1293+
if (depmod_top == jl_base_module) {
1294+
dmidxs[i] = 0;
1295+
break;
1296+
}
1297+
if (depmod_top->build_id.lo == bid) {
1298+
dmidxs[i] = j;
1299+
break;
1300+
}
1301+
j++;
1302+
}
1303+
assert(dmidxs[i] >= 0);
1304+
}
1305+
return depmodidxs;
1306+
}
1307+
1308+
// Returns `imageidxs` where `imageidxs[i]` is the blob corresponding to `depmods[i]`
1309+
static jl_array_t *depmod_to_imageidx(jl_array_t *depmods)
1310+
{
1311+
if (!depmods)
1312+
return NULL;
1313+
size_t ldeps = jl_array_len(depmods);
1314+
jl_array_t *imageidxs = jl_alloc_array_1d(jl_array_int32_type, ldeps);
1315+
int32_t *imgidxs = (int32_t*)jl_array_data(imageidxs);
1316+
for (size_t i = 0; i < ldeps; i++) {
1317+
imgidxs[i] = -1;
1318+
jl_value_t *depmod = jl_array_ptr_ref(depmods, i);
1319+
assert(jl_is_module(depmod));
1320+
size_t j = external_blob_index(depmod);
1321+
assert(j < 1<<31);
1322+
imgidxs[i] = (int32_t)j;
1323+
}
1324+
return imageidxs;
1325+
}

0 commit comments

Comments
 (0)