Skip to content

Commit bd72dcb

Browse files
committed
flambda-backend: Prefetching optimisations for sweeping (ocaml#9934)
(cherry picked from commit 8a90546)
1 parent 27fed7e commit bd72dcb

File tree

3 files changed

+27
-12
lines changed

3 files changed

+27
-12
lines changed

runtime/caml/misc.h

+11
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,17 @@ CAMLdeprecated_typedef(addr, char *);
113113
#error "How do I align values on this platform?"
114114
#endif
115115

116+
/* Prefetching */
117+
118+
#ifdef CAML_INTERNALS
119+
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
120+
#define caml_prefetch(p) __builtin_prefetch((p), 1, 3)
121+
/* 1 = intent to write; 3 = all cache levels */
122+
#else
123+
#define caml_prefetch(p)
124+
#endif
125+
#endif
126+
116127
/* CAMLunused is preserved for compatibility reasons.
117128
Instead of the legacy GCC/Clang-only
118129
CAMLunused foo;

runtime/freelist.c

+1
Original file line numberDiff line numberDiff line change
@@ -1662,6 +1662,7 @@ static header_t *bf_merge_block (value bp, char *limit)
16621662
}
16631663
caml_fl_cur_wsz += Whsize_val (cur);
16641664
next:
1665+
caml_prefetch(Hp_val(cur + 4096));
16651666
cur = Next_in_mem (cur);
16661667
if (Hp_val (cur) >= (header_t *) limit){
16671668
CAMLassert (Hp_val (cur) == (header_t *) limit);

runtime/major_gc.c

+15-12
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ extern value caml_fl_merge; /* Defined in freelist.c. */
7272
redarkening required */
7373
static char *redarken_first_chunk = NULL;
7474

75-
static char *sweep_chunk, *sweep_limit;
75+
static char *sweep_chunk;
7676
static double p_backlog = 0.0; /* backlog for the gc speedup parameter */
7777

7878
int caml_gc_subphase; /* Subphase_{mark_roots,mark_main,mark_final} */
@@ -397,7 +397,6 @@ static void init_sweep_phase(void)
397397
caml_gc_phase = Phase_sweep;
398398
sweep_chunk = caml_heap_start;
399399
caml_gc_sweep_hp = sweep_chunk;
400-
sweep_limit = sweep_chunk + Chunk_size (sweep_chunk);
401400
caml_fl_wsz_at_phase_change = caml_fl_cur_wsz;
402401
if (caml_major_gc_hook) (*caml_major_gc_hook)();
403402
}
@@ -698,21 +697,24 @@ static void clean_slice (intnat work)
698697

699698
static void sweep_slice (intnat work)
700699
{
701-
char *hp;
700+
char *hp, *sweep_hp, *limit;
702701
header_t hd;
703702

704703
caml_gc_message (0x40, "Sweeping %"
705704
ARCH_INTNAT_PRINTF_FORMAT "d words\n", work);
705+
sweep_hp = caml_gc_sweep_hp;
706+
limit = sweep_chunk + Chunk_size(sweep_chunk);
706707
while (work > 0){
707-
if (caml_gc_sweep_hp < sweep_limit){
708-
hp = caml_gc_sweep_hp;
708+
if (sweep_hp < limit){
709+
caml_prefetch(sweep_hp + 4000);
710+
hp = sweep_hp;
709711
hd = Hd_hp (hp);
710712
work -= Whsize_hd (hd);
711-
caml_gc_sweep_hp += Bhsize_hd (hd);
713+
sweep_hp += Bhsize_hd (hd);
712714
switch (Color_hd (hd)){
713715
case Caml_white:
714-
caml_gc_sweep_hp =
715-
(char *)caml_fl_merge_block(Val_hp (hp), sweep_limit);
716+
caml_gc_sweep_hp = sweep_hp;
717+
sweep_hp = (char *) caml_fl_merge_block (Val_hp (hp), limit);
716718
break;
717719
case Caml_blue:
718720
/* Only the blocks of the free-list are blue. See [freelist.c]. */
@@ -723,21 +725,23 @@ static void sweep_slice (intnat work)
723725
Hd_hp (hp) = Whitehd_hd (hd);
724726
break;
725727
}
726-
CAMLassert (caml_gc_sweep_hp <= sweep_limit);
728+
CAMLassert (sweep_hp <= limit);
727729
}else{
728730
sweep_chunk = Chunk_next (sweep_chunk);
729731
if (sweep_chunk == NULL){
730732
/* Sweeping is done. */
733+
caml_gc_sweep_hp = sweep_hp;
731734
++ Caml_state->stat_major_collections;
732735
work = 0;
733736
caml_gc_phase = Phase_idle;
734737
caml_request_minor_gc ();
735738
}else{
736-
caml_gc_sweep_hp = sweep_chunk;
737-
sweep_limit = sweep_chunk + Chunk_size (sweep_chunk);
739+
sweep_hp = sweep_chunk;
740+
limit = sweep_chunk + Chunk_size (sweep_chunk);
738741
}
739742
}
740743
}
744+
caml_gc_sweep_hp = sweep_hp;
741745
}
742746

743747
/* The main entry point for the major GC. Called about once for each
@@ -1085,7 +1089,6 @@ void caml_finalise_heap (void)
10851089
caml_gc_phase = Phase_sweep;
10861090
sweep_chunk = caml_heap_start;
10871091
caml_gc_sweep_hp = sweep_chunk;
1088-
sweep_limit = sweep_chunk + Chunk_size (sweep_chunk);
10891092
while (caml_gc_phase == Phase_sweep)
10901093
sweep_slice (LONG_MAX);
10911094
}

0 commit comments

Comments
 (0)