Skip to content

Commit

Permalink
flambda-backend: Retainer profiling for 5.x runtime (ocaml-flambda#2000)
Browse files Browse the repository at this point in the history
* Add [Obj.uniquely_reachable_words]

Cherry-picked from ocaml-flambda#1705

* Fixed ISO C99 warning introduced in ocaml-flambda#1705

Cherry-picked from ocaml-flambda#1787

* Fix compilation after cherry pick

* Add `volatile` keyword to helper function

Avoid warnings when passing the result of `Field` to a helper that takes a
pointer. For future-proofing, added a comment warning that one shouldn't read
this to mean the function is multicore-safe.

* Add CR

---------

Co-authored-by: apilatjs <141742870+apilatjs@users.noreply.github.com>
Co-authored-by: Mark Shinwell <mshinwell@pm.me>
  • Loading branch information
3 people authored Nov 13, 2023
1 parent 86c6eb1 commit c79579e
Showing 1 changed file with 210 additions and 40 deletions.
250 changes: 210 additions & 40 deletions runtime/extern.c
Original file line number Diff line number Diff line change
Expand Up @@ -323,9 +323,9 @@ static void extern_resize_position_table(struct caml_extern_state *s)

/* Determine whether the given object [obj] is in the hash table.
If so, set [*pos_out] to its position in the output and return 1.
If not, set [*h_out] to the hash value appropriate for
[extern_record_location] and return 0. */

If not, return 0.
Either way, set [*h_out] to the hash value appropriate for
[extern_record_location]. */
Caml_inline int extern_lookup_position(struct caml_extern_state *s, value obj,
uintnat * pos_out, uintnat * h_out)
{
Expand All @@ -336,29 +336,46 @@ Caml_inline int extern_lookup_position(struct caml_extern_state *s, value obj,
return 0;
}
if (s->pos_table.entries[h].obj == obj) {
*h_out = h;
*pos_out = s->pos_table.entries[h].pos;
return 1;
}
h = (h + 1) & s->pos_table.mask;
}
}

/* Record the output position for the given object [obj]. */
/* Record the given object [obj] in the hashmap, associated to the specified data [data]. */
/* The [h] parameter is the index in the hash table where the object
must be inserted. It was determined during lookup. */

static void extern_record_location(struct caml_extern_state* s,
value obj, uintnat h)
static void extern_record_location_with_data(struct caml_extern_state* s,
value obj, uintnat h, uintnat data)
{
if (s->extern_flags & NO_SHARING) return;
bitvect_set(s->pos_table.present, h);
s->pos_table.entries[h].obj = obj;
s->pos_table.entries[h].pos = s->obj_counter;
s->pos_table.entries[h].pos = data;
s->obj_counter++;
if (s->obj_counter >= s->pos_table.threshold)
extern_resize_position_table(s);
}

/* Record the output position for the given object [obj]. */
/* The [h] parameter is the index in the hash table where the object
must be inserted. It was determined during lookup. */
static void extern_record_location(struct caml_extern_state* s,
value obj, uintnat h)
{
extern_record_location_with_data(s, obj, h, s->obj_counter);
}

/* Update the data associated with the given object [obj]. */
static void extern_update_location_with_data(struct caml_extern_state* s,
uintnat h, uintnat data)
{
if (s->extern_flags & NO_SHARING) return;
s->pos_table.entries[h].pos = data;
}

/* To buffer the output */

static void init_extern_output(struct caml_extern_state* s)
Expand Down Expand Up @@ -1334,56 +1351,144 @@ CAMLexport void caml_serialize_block_float_8(void * data, intnat len)
#endif
}

CAMLprim value caml_obj_reachable_words(value v)
{
intnat size;
struct extern_item * sp;
uintnat h = 0;
uintnat pos = 0;
struct caml_extern_state *s = get_extern_state ();
enum reachable_words_node_state {
/* This node is reachable from at least two distinct roots, so it doesn't
* have a unique owner and will be ignored in all future traversals. */
Shared = -1,
/* This node is one of the roots and has not been visited yet (i.e. the computation
* starting at that root still hasn't ran */
RootUnprocessed = -2,
/* This node is one of the roots and the computation for that root has already ran */
RootProcessed = -3,
/* Sentinel value for a state that should never be observed */
Invalid = -4,
/* States that are non-negative integers indicate that a node has only been visited
* starting from a single root. The state is then equal to the identifier of the
* root that we reached it from */
};

s->obj_counter = 0;
s->extern_flags = 0;
extern_init_position_table(s);
/* CR ocaml 5 runtime (mshinwell): think about what to do here */
/* Not multicore-safe (the [volatile] just lets us use this with the [Field] macro) */
static void add_to_long_value(volatile value *v, intnat x) {
*v = Val_long(Long_val(*v) + x);
}

/* Performs traversal through the OCaml object reachability graph to deterime
how much memory an object has access to.
Assumes that the position_table has already been initialized using
[reachable_words_init]. We can run this function multiple times
without clearing the position table to share data between runs starting
from different roots. Identifiers must be positive integers.
For each value node visited, we record its traversal status in the [pos] field
of its entry in [position_table.entries]. The statuses are described in detail
in the [reachable_words_node_state] enum.
Returns the total size of elements marked, that is ones that are reachable
from the current root and can be reached by at most one root from the ones
that already ran.
[shared_size] is incremented by the total size of elements that were newly
marked [Shared], that is ones that we just found out are reachable from at least
two roots.
If [sizes_by_root_id] is not [Val_unit], we expect it to be an OCaml array
with length equal to the number of roots. Then during the traversal we will
update the number of words uniquely reachable from each root.
That is, when we visit a node for the first time, we add its size to the
corresponding root identifier, and when we visit it for the second time, we
undo this addition. */
intnat reachable_words_once(struct caml_extern_state *s,
value root, intnat identifier, value sizes_by_root_id,
intnat *shared_size) {
struct extern_item * sp;
intnat size;
uintnat mark = Invalid, new_mark;
value v = root;
uintnat h;
int previously_marked, should_traverse;
sp = s->extern_stack;
size = 0;

CAMLassert(identifier >= 0);

/* In Multicore OCaml, we don't distinguish between major heap blocks and
* out-of-heap blocks, so we end up counting out-of-heap blocks too. */
while (1) {
if (Is_long(v)) {
/* Tagged integers contribute 0 to the size, nothing to do */
} else if (extern_lookup_position(s, v, &pos, &h)) {
/* Already seen and counted, nothing to do */
} else {
header_t hd = Hd_val(v);
tag_t tag = Tag_hd(hd);
mlsize_t sz = Wosize_hd(hd);
intnat sz_with_header = 1 + sz;
/* Infix pointer: go back to containing closure */
if (tag == Infix_tag) {
v = v - Infix_offset_hd(hd);
continue;
}
/* Remember that we've visited this block */
extern_record_location(s, v, h);
/* The block contributes to the total size */
size += 1 + sz; /* header word included */
if (tag < No_scan_tag) {
/* i is the position of the first field to traverse recursively */
uintnat i =
tag == Closure_tag ? Start_env_closinfo(Closinfo_val(v)) : 0;
if (i < sz) {
if (i < sz - 1) {
/* Remember that we need to count fields i + 1 ... sz - 1 */
sp++;
if (sp >= s->extern_stack_limit)
sp = extern_resize_stack(s, sp);
sp->v = &Field(v, i + 1);
sp->count = sz - i - 1;

previously_marked = extern_lookup_position(s, v, &mark, &h);
if (!previously_marked) {
/* All roots must have been marked by [reachable_words_mark_root] before
* calling this function so we can safely assign new_mark to
* identifier */
CAMLassert(v != root);
should_traverse = 1;
new_mark = identifier;
} else if (mark == RootUnprocessed && v == root) {
should_traverse = 1;
new_mark = RootProcessed;
} else if (mark == Shared || mark == RootUnprocessed || mark == RootProcessed) {
should_traverse = 0;
} else if (mark == identifier) {
should_traverse = 0;
} else {
CAMLassert(mark != Invalid);
/* mark is some other root's identifier */
should_traverse = 1;
new_mark = Shared;
}

if (should_traverse) {
if (!previously_marked) {
extern_record_location_with_data(s, v, h, new_mark);
} else {
extern_update_location_with_data(s, h, new_mark);
}

/* The block contributes to the total size */
size += sz_with_header; /* header word included */
if (sizes_by_root_id != Val_unit) {
if (new_mark == Shared) {
/* mark is identifier of some other root that we counted this node
* as contributing to. Since it is evidently not uniquely reachable, we
* undo this contribution */
add_to_long_value(&Field(sizes_by_root_id, mark), -sz_with_header);
*shared_size += sz_with_header;
} else {
CAMLassert(new_mark == identifier || (v == root && new_mark == RootProcessed));
add_to_long_value(&Field(sizes_by_root_id, identifier), sz_with_header);
}
}
if (tag < No_scan_tag) {
/* i is the position of the first field to traverse recursively */
uintnat i =
tag == Closure_tag ? Start_env_closinfo(Closinfo_val(v)) : 0;
if (i < sz) {
if (i < sz - 1) {
/* Remember that we need to count fields i + 1 ... sz - 1 */
sp++;
if (sp >= s->extern_stack_limit)
sp = extern_resize_stack(s, sp);
sp->v = &Field(v, i + 1);
sp->count = sz - i - 1;
}
/* Continue with field i */
v = Field(v, i);
continue;
}
/* Continue with field i */
v = Field(v, i);
continue;
}
}
}
Expand All @@ -1392,7 +1497,72 @@ CAMLprim value caml_obj_reachable_words(value v)
v = *((sp->v)++);
if (--(sp->count) == 0) sp--;
}

return size;
}

struct caml_extern_state* reachable_words_init(void)
{
struct caml_extern_state *s = get_extern_state ();
s->obj_counter = 0;
s->extern_flags = 0;
extern_init_position_table(s);
return s;
}

void reachable_words_mark_root(struct caml_extern_state *s, value v)
{
uintnat h, mark;
extern_lookup_position(s, v, &mark, &h);
extern_record_location_with_data(s, v, h, RootUnprocessed);
}

void reachable_words_cleanup(struct caml_extern_state *s)
{
extern_free_stack(s);
extern_free_position_table(s);
return Val_long(size);
}

CAMLprim value caml_obj_reachable_words(value v)
{
struct caml_extern_state *s;
CAMLparam1(v);
CAMLlocal1(size);

intnat shared_size = 0;

s = reachable_words_init();
reachable_words_mark_root(s, v);
size = Val_long(reachable_words_once(s, v, 0, Val_unit, &shared_size));
reachable_words_cleanup(s);

CAMLreturn(size);
}

CAMLprim value caml_obj_uniquely_reachable_words(value v)
{
struct caml_extern_state *s;
CAMLparam1(v);
CAMLlocal2(sizes_by_root_id, ret);

intnat length, shared_size;

length = Wosize_val(v);
sizes_by_root_id = caml_alloc(length, 0);
shared_size = 0;

s = reachable_words_init();
for (intnat i = 0; i < length; i++) {
reachable_words_mark_root(s, Field(v, i));
Field(sizes_by_root_id, i) = Val_int(0);
}
for (intnat i = 0; i < length; i++) {
reachable_words_once(s, Field(v, i), i, sizes_by_root_id, &shared_size);
}
reachable_words_cleanup(s);

ret = caml_alloc_small(2, 0);
Field(ret, 0) = sizes_by_root_id;
Field(ret, 1) = Val_long(shared_size);
CAMLreturn(ret);
}

0 comments on commit c79579e

Please sign in to comment.