Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add [Obj.uniquely_reachable_words] #1705

Merged
merged 9 commits into from
Aug 30, 2023
138 changes: 57 additions & 81 deletions ocaml/runtime/extern.c
Original file line number Diff line number Diff line change
Expand Up @@ -1152,45 +1152,19 @@ CAMLexport void caml_serialize_block_float_8(void * data, intnat len)
}

enum reachable_words_node_state {
/* This node will be ignored in all traversals. This can happen for two reasons:
* - it is reachable from at least two distinct roots, so doesn't have a unique owner
* - we are in mode ComputeUnique and this node has already been counted */
Ignored = -1,
/* This node is reachable from at least two distinct roots, so it doesn't
* have a unique owner and will be ignored in all future traversals. */
Shared = -1,
/* This node is one of the roots and has not been visited yet (i.e. the computation
* starting at that root still hasn't ran */
RootUnprocessed = -2,
/* This node is one of the roots and the computation for that root has already ran */
RootProcessed = -3,
/* States that are positive integers indicate that a node has only been visited
/* States that are non-negative integers indicate that a node has only been visited
* starting from a single root. The state is then equal to the identifier of the
* root that we reached it from */
};

enum reachable_words_traversal_mode {
/* Performs the main traversal, for each descendant recording that it is
* reachable from the current root.
*
* Upon encountering a node that hasn't been visited yet, we mark it as visited,
* recording our identifier to ensure we avoid double counting it. If it has already
* been visited, this means it does not uniquely belong to any owner, so we mark
* it as Ignored.
*
* Returns the total size of elements marked, that is ones that are reachable
* from the current root and can be reached by at most one root from the ones
* that already ran. */
IncrementReachedCount,
/* Performs the final traversal, summing up the sizes of descandants that are
* only reachable from the current root.
*
* Assumes that we have previously ran [IncrementReachedCount] to completion,
* i.e. our root is in state [RootProcessed] and every other reachable node either
* has our identifier or is [Ignored]. While running, we mark all visited nodes
* as [Ignored].
*
* Returns the total size of elements that are marked with our identifier. */
ComputeUnique,
};

/* Performs traversal through the OCaml object reachability graph to deterime
apilatjs marked this conversation as resolved.
Show resolved Hide resolved
how much memory an object has access to.

Expand All @@ -1201,8 +1175,19 @@ enum reachable_words_traversal_mode {

For each value node visited, we record its traversal status in the [pos] field
of its entry in [position_table.entries]. The statuses are described in detail
in the [reachable_words_node_state] enum. */
intnat reachable_words_once(value root, enum reachable_words_traversal_mode mode, intnat identifier) {
in the [reachable_words_node_state] enum.

Returns the total size of elements marked, that is ones that are reachable
from the current root and can be reached by at most one root from the ones
that already ran.

If [sizes_by_root_id] is not [NULL], we expect it to be an OCaml array
apilatjs marked this conversation as resolved.
Show resolved Hide resolved
with length equal to the number of roots. Then during the traversal we will
update the number of words uniquely reachable from each root.
That is, when we visit a node for the first time, we add its size to the
corresponding root identifier, and when we visit it for the second time, we
undo this addition. */
intnat reachable_words_once(value root, intnat identifier, value sizes_by_root_id) {
struct extern_item * sp;
intnat size;
uintnat mark, new_mark;
Expand All @@ -1223,46 +1208,22 @@ intnat reachable_words_once(value root, enum reachable_words_traversal_mode mode
so we end up counting out-of-heap blocks too. */
} else {
previously_marked = extern_lookup_position(v, &mark, &h);
if (mode == IncrementReachedCount) {
if (!previously_marked) {
/* Invariant: v != root as we have marked roots before running this function.
* So we can safely assign new_mark to identifier */
should_traverse = 1;
new_mark = identifier;
} else if (mark == RootUnprocessed && v == root) {
should_traverse = 1;
new_mark = RootProcessed;
} else if (mark == Ignored || mark == RootUnprocessed || mark == RootProcessed) {
should_traverse = 0;
} else if (mark == identifier) {
should_traverse = 0;
} else {
/* mark is some other root's identifier */
should_traverse = 1;
new_mark = Ignored;
}
} else if (mode == ComputeUnique) {
if (!previously_marked) {
caml_failwith("reachable_words_once in ComputeUnique mode encountered an unvisited node. "
"This is a bug in the standard library implementation.");
} else if (mark == RootUnprocessed) {
caml_failwith("reachable_words_once in ComputeUnique mode encountered an unprocessed root. "
"This is a bug in the standard library implementation.");
} else if (mark == RootProcessed && v == root) {
should_traverse = 1;
new_mark = Ignored;
} else if (mark == Ignored || mark == RootProcessed) {
should_traverse = 0;
} else if (mark == identifier) {
should_traverse = 1;
new_mark = Ignored;
} else {
caml_failwith("reachable_words_once in ComputeUnique mode node with identifier of different root. "
"This is a bug in the standard library implementation.");
}
if (!previously_marked) {
/* Invariant: v != root as we have marked roots before running this function.
* So we can safely assign new_mark to identifier */
should_traverse = 1;
new_mark = identifier;
} else if (mark == RootUnprocessed && v == root) {
should_traverse = 1;
new_mark = RootProcessed;
} else if (mark == Shared || mark == RootUnprocessed || mark == RootProcessed) {
should_traverse = 0;
} else if (mark == identifier) {
should_traverse = 0;
} else {
caml_failwith("reachable_words_once encountered unknown mode. "
"This is a bug in the standard library implementation.");
/* mark is some other root's identifier */
should_traverse = 1;
new_mark = Shared;
}

if (should_traverse) {
Expand All @@ -1279,8 +1240,26 @@ intnat reachable_words_once(value root, enum reachable_words_traversal_mode mode
} else {
extern_update_location_with_data(h, new_mark);
}

/* The block contributes to the total size */
size += 1 + sz; /* header word included */
apilatjs marked this conversation as resolved.
Show resolved Hide resolved
if (sizes_by_root_id) {
apilatjs marked this conversation as resolved.
Show resolved Hide resolved
if (new_mark == Shared) {
#pragma GCC diagnostic push
/* GCC complains that [mark] could be uninitialized. This can only happen
* if previously_marked returns false. But then new_mark is set to
* identifier which by assumption is != Shared. */
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
stedolan marked this conversation as resolved.
Show resolved Hide resolved
/* mark is identifier of some other root that we counted this node
* as contributing to. Since it is evidently not uniquely reachable, we
* undo this contribution */
/* Need to shift left by 1 to respect the OCaml representation of [int] values */
Field(sizes_by_root_id, mark) -= (1 + sz) << 1;
apilatjs marked this conversation as resolved.
Show resolved Hide resolved
#pragma GCC diagnostic pop
} else {
apilatjs marked this conversation as resolved.
Show resolved Hide resolved
Field(sizes_by_root_id, identifier) += (1 + sz) << 1;
apilatjs marked this conversation as resolved.
Show resolved Hide resolved
}
}
if (tag < No_scan_tag) {
/* i is the position of the first field to traverse recursively */
uintnat i =
Expand Down Expand Up @@ -1320,7 +1299,7 @@ void reachable_words_mark_root(value v)
{
uintnat h, mark;
extern_lookup_position(v, &mark, &h);
extern_record_location_with_data(v, h, -2);
extern_record_location_with_data(v, h, RootUnprocessed);
}

void reachable_words_cleanup()
Expand All @@ -1336,7 +1315,7 @@ CAMLprim value caml_obj_reachable_words(value v)

reachable_words_init();
reachable_words_mark_root(v);
size = Val_long(reachable_words_once(v, IncrementReachedCount, 1));
size = Val_long(reachable_words_once(v, 0, 0));
apilatjs marked this conversation as resolved.
Show resolved Hide resolved
reachable_words_cleanup();

CAMLreturn(size);
Expand All @@ -1345,25 +1324,22 @@ CAMLprim value caml_obj_reachable_words(value v)
CAMLprim value caml_obj_uniquely_reachable_words(value v)
{
CAMLparam1(v);
CAMLlocal1(ret);
CAMLlocal1(sizes_by_root_id);

intnat length;

length = Wosize_val(v);
ret = caml_alloc(length, 0);
sizes_by_root_id = caml_alloc(length, 0);

reachable_words_init();
for (intnat i = 0; i < length; i++) {
reachable_words_mark_root(Field(v, i));
Field(sizes_by_root_id, i) = Val_int(0);
}
for (intnat i = 0; i < length; i++) {
reachable_words_once(Field(v, i), IncrementReachedCount, i + 1);
}
for (intnat i = 0; i < length; i++) {
intnat size = reachable_words_once(Field(v, i), ComputeUnique, i + 1);
Store_field(ret, i, Val_int(size));
reachable_words_once(Field(v, i), i, sizes_by_root_id);
}
reachable_words_cleanup();

CAMLreturn(ret);
CAMLreturn(sizes_by_root_id);
}
4 changes: 3 additions & 1 deletion ocaml/testsuite/tests/lib-obj/uniquely_reachable_words.ml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ let native =
let sizes xs = Obj.uniquely_reachable_words (List.map Obj.repr xs |> Array.of_list)
|> Array.to_list

(* We make object with id i have size about 100 * 2 ** i which allows us to
* (approximately) deduce the reachable node ids just from the their total size. *)
let deduce_reachable size =
let reachable = ref []
and cur = ref 0
and binary = ref ((size + 50) / 100) in
and binary = ref (size / 100) in
while !binary > 0 do
if !binary land 1 = 1 then
reachable := !cur :: !reachable;
Expand Down