Skip to content

Commit d47cbf6

Browse files
authored
Allow for querying of build_id from objects (#53943)
For GPUCompiler we would like to support a native on disk cache of LLVM IR. One of the longstanding issues has been the cache invalidation of such an on disk cache. With #52233 we now have an integrated cache for the inference results and we can rely on `CodeInstance` to be stable across sessions. Due to #52119 we can also rely on the `objectid` to be stable. My inital thought was to key the native disk cache in GPUCompiler on the objectid of the corresponding CodeInstance (+ some compilation parameters). While discussing this with @rayegun yesterday we noted that having a CodeInstance with the same objectid might not be enough provenance. E.g we are not gurantueed that the CodeInstance is from the same build artifact and the same precise source code. For the package images we are tracking this during loading and validate all contents at once, and we keep explicitly track of the provenance chain. This PR adds a lookup up table where we map from "external_blobs" e.g. loaded images, to the corresponding top module of each image, and uses this to determine the build_id of the package image.
1 parent be3bc9a commit d47cbf6

File tree

8 files changed

+61
-2
lines changed

8 files changed

+61
-2
lines changed

base/loading.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3061,6 +3061,14 @@ function module_build_id(m::Module)
30613061
return (UInt128(hi) << 64) | lo
30623062
end
30633063

3064+
function object_build_id(obj)
3065+
mod = ccall(:jl_object_top_module, Any, (Any,), obj)
3066+
if mod === nothing
3067+
return nothing
3068+
end
3069+
return module_build_id(mod::Module)
3070+
end
3071+
30643072
function isvalid_cache_header(f::IOStream)
30653073
pkgimage = Ref{UInt8}()
30663074
checksum = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid}, Ptr{UInt8}, Ptr{Int64}, Ptr{Int64}), f.ios, pkgimage, Ref{Int64}(), Ref{Int64}()) # returns checksum id or zero

src/init.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
826826

827827
arraylist_new(&jl_linkage_blobs, 0);
828828
arraylist_new(&jl_image_relocs, 0);
829+
arraylist_new(&jl_top_mods, 0);
829830
arraylist_new(&eytzinger_image_tree, 0);
830831
arraylist_new(&eytzinger_idxs, 0);
831832
arraylist_push(&eytzinger_idxs, (void*)0);

src/julia.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2078,6 +2078,7 @@ JL_DLLEXPORT void jl_create_system_image(void **, jl_array_t *worklist, bool_t e
20782078
JL_DLLEXPORT void jl_restore_system_image(const char *fname);
20792079
JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len);
20802080
JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete, const char *pkgimage);
2081+
JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT;
20812082

20822083
JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred);
20832084
JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *ci);

src/julia_internal.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m);
333333
extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
334334
JL_DLLEXPORT extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages
335335
JL_DLLEXPORT extern arraylist_t jl_image_relocs; // external linkage: sysimg/pkgimages
336+
JL_DLLEXPORT extern arraylist_t jl_top_mods; // external linkage: sysimg/pkgimages
336337
extern arraylist_t eytzinger_image_tree;
337338
extern arraylist_t eytzinger_idxs;
338339

@@ -1012,7 +1013,8 @@ STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
10121013

10131014
size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;
10141015

1015-
uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
1016+
// Query if this object is perm-allocated in an image.
1017+
JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
10161018

10171019
// the first argument to jl_idtable_rehash is used to return a value
10181020
// make sure it is rooted if it is used after the function returns

src/staticdata.c

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,8 @@ static arraylist_t object_worklist; // used to mimic recursion by jl_serialize_
337337
// jl_linkage_blobs.items[2i:2i+1] correspond to build_ids[i] (0-offset indexing)
338338
arraylist_t jl_linkage_blobs;
339339
arraylist_t jl_image_relocs;
340+
// Keep track of which image corresponds to which top module.
341+
arraylist_t jl_top_mods;
340342

341343
// Eytzinger tree of images. Used for very fast jl_object_in_image queries
342344
// See https://algorithmica.org/en/eytzinger
@@ -451,11 +453,23 @@ size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
451453
return idx;
452454
}
453455

454-
uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
456+
JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
455457
{
456458
return eyt_obj_in_img(obj);
457459
}
458460

461+
// Map an object to it's "owning" top module
462+
JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT
463+
{
464+
size_t idx = external_blob_index(v);
465+
size_t lbids = n_linkage_blobs();
466+
if (idx < lbids) {
467+
return (jl_value_t*)jl_top_mods.items[idx];
468+
}
469+
// The object is runtime allocated
470+
return (jl_value_t*)jl_nothing;
471+
}
472+
459473
// hash of definitions for predefined function pointers
460474
static htable_t fptr_to_id;
461475
void *native_functions; // opaque jl_native_code_desc_t blob used for fetching data from LLVM
@@ -3550,6 +3564,15 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
35503564
arraylist_push(&jl_linkage_blobs, (void*)image_base);
35513565
arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg));
35523566
arraylist_push(&jl_image_relocs, (void*)relocs_base);
3567+
if (restored == NULL) {
3568+
arraylist_push(&jl_top_mods, (void*)jl_top_module);
3569+
} else {
3570+
size_t len = jl_array_nrows(*restored);
3571+
assert(len > 0);
3572+
jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(*restored, len-1);
3573+
assert(jl_is_module(topmod));
3574+
arraylist_push(&jl_top_mods, (void*)topmod);
3575+
}
35533576
jl_timing_counter_inc(JL_TIMING_COUNTER_ImageSize, sizeof_sysimg + sizeof(uintptr_t));
35543577
rebuild_image_blob_tree();
35553578

test/precompile.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ FooBase_module = :FooBase4b3a94a1a081a8cb
1515
end
1616
using .ConflictingBindings
1717

18+
@testset "object_build_id" begin
19+
@test Base.object_build_id([1]) === nothing
20+
@test Base.object_build_id(Base) == Base.module_build_id(Base)
21+
end
22+
1823
# method root provenance
1924

2025
rootid(m::Module) = Base.module_build_id(Base.parentmodule(m)) % UInt64
@@ -350,6 +355,9 @@ precompile_test_harness(false) do dir
350355
@test objectid(Foo.a_vec_int) === Foo.oid_vec_int
351356
@test objectid(Foo.a_mat_int) === Foo.oid_mat_int
352357
@test Foo.oid_vec_int !== Foo.oid_mat_int
358+
@test Base.object_build_id(Foo.a_vec_int) == Base.object_build_id(Foo.a_mat_int)
359+
@test Base.object_build_id(Foo) == Base.module_build_id(Foo)
360+
@test Base.object_build_id(Foo.a_vec_int) == Base.module_build_id(Foo)
353361
end
354362

355363
@eval begin function ccallable_test()

test/precompile_absint1.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,14 @@ precompile_test_harness() do load_path
4444
@test isdefined(ci, :next)
4545
@test ci.owner === nothing
4646
@test ci.max_world == typemax(UInt)
47+
@test Base.module_build_id(TestAbsIntPrecompile1) ==
48+
Base.object_build_id(ci)
4749
ci = ci.next
4850
@test !isdefined(ci, :next)
4951
@test ci.owner === cache_owner
5052
@test ci.max_world == typemax(UInt)
53+
@test Base.module_build_id(TestAbsIntPrecompile1) ==
54+
Base.object_build_id(ci)
5155
end
5256
let m = only(methods(sum, (Vector{Float64},)))
5357
found = false
@@ -57,10 +61,14 @@ precompile_test_harness() do load_path
5761
@test isdefined(ci, :next)
5862
@test ci.owner === cache_owner
5963
@test ci.max_world == typemax(UInt)
64+
@test Base.module_build_id(TestAbsIntPrecompile1) ==
65+
Base.object_build_id(ci)
6066
ci = ci.next
6167
@test !isdefined(ci, :next)
6268
@test ci.owner === nothing
6369
@test ci.max_world == typemax(UInt)
70+
@test Base.module_build_id(TestAbsIntPrecompile1) ==
71+
Base.object_build_id(ci)
6472
found = true
6573
break
6674
end

test/precompile_absint2.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,14 @@ precompile_test_harness() do load_path
6767
@test isdefined(ci, :next)
6868
@test ci.owner === nothing
6969
@test ci.max_world == typemax(UInt)
70+
@test Base.module_build_id(TestAbsIntPrecompile2) ==
71+
Base.object_build_id(ci)
7072
ci = ci.next
7173
@test !isdefined(ci, :next)
7274
@test ci.owner === cache_owner
7375
@test ci.max_world == typemax(UInt)
76+
@test Base.module_build_id(TestAbsIntPrecompile2) ==
77+
Base.object_build_id(ci)
7478
end
7579
let m = only(methods(sum, (Vector{Float64},)))
7680
found = false
@@ -80,10 +84,14 @@ precompile_test_harness() do load_path
8084
@test isdefined(ci, :next)
8185
@test ci.owner === cache_owner
8286
@test ci.max_world == typemax(UInt)
87+
@test Base.module_build_id(TestAbsIntPrecompile2) ==
88+
Base.object_build_id(ci)
8389
ci = ci.next
8490
@test !isdefined(ci, :next)
8591
@test ci.owner === nothing
8692
@test ci.max_world == typemax(UInt)
93+
@test Base.module_build_id(TestAbsIntPrecompile2) ==
94+
Base.object_build_id(ci)
8795
found = true
8896
break
8997
end

0 commit comments

Comments
 (0)