Skip to content

Commit

Permalink
Version 2.0.1 release
Browse files Browse the repository at this point in the history
  • Loading branch information
neboat committed Sep 2, 2022
1 parent 73b06fb commit 67e0a07
Show file tree
Hide file tree
Showing 10 changed files with 206 additions and 102 deletions.
5 changes: 4 additions & 1 deletion include/cilk/cilk_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
#include <stddef.h> /* size_t */

#ifdef __cplusplus
#define __CILKRTS_NOTHROW noexcept
extern "C" {
#else
#define __CILKRTS_NOTHROW
#endif

extern int __cilkrts_is_initialized(void);
Expand All @@ -20,7 +23,7 @@ typedef struct __cilkrts_pedigree {
struct __cilkrts_pedigree *parent;
} __cilkrts_pedigree;
extern __cilkrts_pedigree __cilkrts_get_pedigree(void);
extern void __cilkrts_bump_worker_rank(void);
extern void __cilkrts_bump_worker_rank(void) __CILKRTS_NOTHROW;
extern void __cilkrts_dprand_set_seed(uint64_t seed);
extern void __cilkrts_init_dprng(void);
extern uint64_t __cilkrts_get_dprand(void);
Expand Down
2 changes: 2 additions & 0 deletions include/cilk/cilk_stub.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
#define cilk_spawn /* empty */
#define cilk_sync /* empty */
#define cilk_scope /* empty */

#define cilk_reducer(I,R) /* empty */
9 changes: 9 additions & 0 deletions runtime/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ static local_state *worker_local_init(local_state *l, global_state *g) {
l->state = WORKER_IDLE;
l->provably_good_steal = false;
l->rand_next = 0; /* will be reset in scheduler loop */
l->wake_val = 0;
cilk_sched_stats_init(&(l->stats));

return l;
Expand Down Expand Up @@ -118,6 +119,7 @@ __cilkrts_worker *__cilkrts_init_tls_worker(worker_id i, global_state *g) {
return w;
}

#if ENABLE_WORKER_PINNING
#ifdef CPU_SETSIZE
static void move_bit(int cpu, cpu_set_t *to, cpu_set_t *from) {
if (CPU_ISSET(cpu, from)) {
Expand All @@ -126,11 +128,13 @@ static void move_bit(int cpu, cpu_set_t *to, cpu_set_t *from) {
}
}
#endif
#endif // ENABLE_WORKER_PINNING

static void threads_init(global_state *g) {
/* TODO: Mac OS has a better interface allowing the application
to request that two threads run as far apart as possible by
giving them distinct "affinity tags". */
#if ENABLE_WORKER_PINNING
#ifdef CPU_SETSIZE
// Affinity setting, from cilkplus-rts
cpu_set_t process_mask;
Expand Down Expand Up @@ -171,13 +175,15 @@ static void threads_init(global_state *g) {
break;
}
#endif
#endif // ENABLE_WORKER_PINNING
int n_threads = g->nworkers;
CILK_ASSERT_G(n_threads > 0);

/* TODO: Apple supports thread affinity using a different interface. */

cilkrts_alert(BOOT, NULL, "(threads_init) Setting up threads");

#if ENABLE_WORKER_PINNING
#ifdef CPU_SETSIZE
/* Three cases: core count at least twice worker count, allocate
groups of floor(worker count / core count) CPUs.
Expand All @@ -201,6 +207,7 @@ static void threads_init(global_state *g) {
}
}
#endif
#endif // ENABLE_WORKER_PINNING
int worker_start =
#if BOSS_THIEF
1
Expand All @@ -216,6 +223,7 @@ static void threads_init(global_state *g) {
cilkrts_bug(NULL, "Cilk: thread creation (%u) failed: %s", w,
strerror(status));

#if ENABLE_WORKER_PINNING
#ifdef CPU_SETSIZE
if (available_cores > 0) {
/* Skip to the next active CPU ID. */
Expand Down Expand Up @@ -243,6 +251,7 @@ static void threads_init(global_state *g) {
CILK_ASSERT_G(err == 0);
}
#endif
#endif // ENABLE_WORKER_PINNING
}
}

Expand Down
1 change: 1 addition & 0 deletions runtime/local.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ struct local_state {
unsigned short state; /* __cilkrts_worker_state */
bool provably_good_steal;
unsigned int rand_next;
uint32_t wake_val;

jmpbuf rts_ctx;
struct cilk_fiber_pool fiber_pool;
Expand Down
16 changes: 10 additions & 6 deletions runtime/pedigree-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,22 @@ typedef struct __pedigree_frame {
int64_t dprng_depth;
} __pedigree_frame;

typedef struct __pedigree_frame_storage_t {
size_t next_pedigree_frame;
__pedigree_frame* frames;
} __pedigree_frame_storage_t;


///////////////////////////////////////////////////////////////////////////
// Helper methods

static inline __attribute__((malloc)) __pedigree_frame *
push_pedigree_frame(__cilkrts_worker *w) {
#if ENABLE_EXTENSION
return __cilkrts_push_ext_stack(w, sizeof(__pedigree_frame));
#else
return NULL;
#endif
}

static inline void pop_pedigree_frame(__cilkrts_worker *w) {
#if ENABLE_EXTENSION
__cilkrts_pop_ext_stack(w, sizeof(__pedigree_frame));
#endif
}

static inline uint64_t __cilkrts_dprng_swap_halves(uint64_t x) {
Expand Down Expand Up @@ -63,11 +63,15 @@ static inline uint64_t __cilkrts_dprng_sum_mod_p(uint64_t a, uint64_t b) {
// Helper method to advance the pedigree and dprng states.
static inline __attribute__((always_inline)) __pedigree_frame *
bump_worker_rank(void) {
#if ENABLE_EXTENSION
__pedigree_frame *frame = (__pedigree_frame *)(__cilkrts_get_extension());
frame->rank++;
frame->dprng_dotproduct = __cilkrts_dprng_sum_mod_p(
frame->dprng_dotproduct, __pedigree_dprng_m_array[frame->dprng_depth]);
return frame;
#else
return NULL;
#endif
}

#endif // _PEDIGREE_INTERNAL_H
8 changes: 1 addition & 7 deletions runtime/rts-config.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,7 @@

#define ENABLE_EXTENSION 1

#if defined __linux__
#define CILK_PAGE_SIZE 0 /* page size not available at compile time */
#elif defined __APPLE__
#define CILK_PAGE_SIZE 4096 /* Apple implies x86 or ARM */
#else
#include <machine/param.h>
#endif
#define ENABLE_WORKER_PINNING 0

#define MIN_NUM_PAGES_PER_STACK 4
#define MAX_NUM_PAGES_PER_STACK 2000
Expand Down
40 changes: 22 additions & 18 deletions runtime/sched_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "internal-malloc-impl.h"
#include "local.h"
#include "sched_stats.h"
#include "types.h"

#if SCHED_STATS
static const char *enum_to_str(enum timing_type t) {
Expand Down Expand Up @@ -157,14 +158,15 @@ void cilk_exit_worker_timing(struct global_state *g) {

static void sched_stats_reset_worker(__cilkrts_worker *w,
void *data __attribute__((unused))) {
local_state *l = w->l;
for (int t = 0; t < NUMBER_OF_STATS; t++) {
w->l->stats.time[t] = 0;
w->l->stats.count[t] = 0;
l->stats.time[t] = 0;
l->stats.count[t] = 0;
}
w->l->stats.steals = 0;
w->l->stats.repos = 0;
w->l->stats.reeng_rqsts = 0;
w->l->stats.onesen_rqsts = 0;
l->stats.steals = 0;
l->stats.repos = 0;
l->stats.reeng_rqsts = 0;
l->stats.onesen_rqsts = 0;
}

#define COL_DESC "%15s"
Expand All @@ -177,22 +179,24 @@ static void sched_stats_reset_worker(__cilkrts_worker *w,
static void sched_stats_print_worker(__cilkrts_worker *w, void *data) {
FILE *fp = (FILE *)data;
fprintf(fp, WORKER_HDR_DESC, "Worker", w->self);
global_state *g = w->g;
local_state *l = w->l;
for (int t = 0; t < NUMBER_OF_STATS; t++) {
double tmp = nsec_to_sec(w->l->stats.time[t]);
w->g->stats.time[t] += (double)tmp;
uint64_t tmp_count = w->l->stats.count[t];
w->g->stats.count[t] += tmp_count;
double tmp = nsec_to_sec(l->stats.time[t]);
g->stats.time[t] += (double)tmp;
uint64_t tmp_count = l->stats.count[t];
g->stats.count[t] += tmp_count;
fprintf(fp, FIELD_DESC, tmp, tmp_count);
}
w->g->stats.steals += w->l->stats.steals;
w->g->stats.repos += w->l->stats.repos;
w->g->stats.reeng_rqsts += w->l->stats.reeng_rqsts;
w->g->stats.onesen_rqsts += w->l->stats.onesen_rqsts;
g->stats.steals += l->stats.steals;
g->stats.repos += l->stats.repos;
g->stats.reeng_rqsts += l->stats.reeng_rqsts;
g->stats.onesen_rqsts += l->stats.onesen_rqsts;

fprintf(stderr, COUNT_DESC, w->l->stats.steals);
fprintf(stderr, COUNT_DESC, w->l->stats.repos);
fprintf(stderr, COUNT_DESC, w->l->stats.reeng_rqsts);
fprintf(stderr, COUNT_DESC, w->l->stats.onesen_rqsts);
fprintf(stderr, COUNT_DESC, l->stats.steals);
fprintf(stderr, COUNT_DESC, l->stats.repos);
fprintf(stderr, COUNT_DESC, l->stats.reeng_rqsts);
fprintf(stderr, COUNT_DESC, l->stats.onesen_rqsts);
fprintf(fp, "\n");
}

Expand Down
32 changes: 17 additions & 15 deletions runtime/scheduler.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,14 @@ static void setup_for_sync(__cilkrts_worker *w, Closure *t) {
CILK_ASSERT_POINTER_EQUAL(w, w->current_stack_frame, t->frame);

SP(t->frame) = (void *)t->orig_rsp;
if (USE_EXTENSION) {
// Set the worker's extension (analogous to updating the worker's stack
// pointer).
w->extension = t->frame->extension;
// Set the worker's extension stack to be the start of the saved
// extension fiber.
w->ext_stack = sysdep_get_stack_start(t->ext_fiber);
}
t->orig_rsp = NULL; // unset once we have sync-ed
atomic_store_explicit(&t->frame->worker, w, memory_order_relaxed);
}
Expand Down Expand Up @@ -1513,10 +1521,9 @@ void worker_scheduler(__cilkrts_worker *w) {
// Get the number of workers. We don't currently support changing the
// number of workers dynamically during execution of a Cilkified region.
unsigned int nworkers = rts->nworkers;
// Initialize count of consecutive failed steal attempts. Effectively,
// every worker is active upon entering this routine.
unsigned int fails = 0;
unsigned int request_threshold = SENTINEL_THRESHOLD;
// Initialize count of consecutive failed steal attempts.
unsigned int fails = init_fails(w->l->wake_val, rts);
unsigned int sample_threshold = SENTINEL_THRESHOLD;
// Local history information of the state of the system, for sentinel
// workers to use to determine when to disengage and how many workers to
// reengage.
Expand Down Expand Up @@ -1566,19 +1573,14 @@ void worker_scheduler(__cilkrts_worker *w) {
index_to_worker[get_rand(rand_state) % stealable];
rand_state = update_rand_state(rand_state);
while (victim == self) {
busy_loop_pause();
victim = index_to_worker[get_rand(rand_state) % stealable];
rand_state = update_rand_state(rand_state);
}
// Attempt to steal from that victim.
t = Closure_steal(workers, deques, w, victim);
if (!t) {
// Pause inside this busy loop. We perform many pause
// instructions in order to limit how much memory bandwidth
// the theif consumes.
for (int i = 0; i < STEAL_BUSY_PAUSE; ++i) {
busy_loop_pause();
}
// Pause inside this busy loop.
steal_short_pause();
}
} while (!t && --attempt > 0);

Expand All @@ -1593,7 +1595,7 @@ void worker_scheduler(__cilkrts_worker *w) {
}
#endif
fails = go_to_sleep_maybe(
rts, self, nworkers, w, t, fails, &request_threshold,
rts, self, nworkers, w, t, fails, &sample_threshold,
&inefficient_history, &efficient_history,
sentinel_count_history, &sentinel_count_history_tail,
&recent_sentinel_count);
Expand Down Expand Up @@ -1622,14 +1624,14 @@ void worker_scheduler(__cilkrts_worker *w) {
// Decrement the count of failed steal attempts based on the
// amount of work done.
fails = decrease_fails_by_work(rts, w, fails, elapsed,
&request_threshold);
&sample_threshold);
if (fails < SENTINEL_THRESHOLD) {
inefficient_history = 0;
efficient_history = 0;
}
} else {
fails = 0;
request_threshold = SENTINEL_THRESHOLD;
sample_threshold = SENTINEL_THRESHOLD;
}
#endif // ENABLE_THIEF_SLEEP
t = NULL;
Expand Down Expand Up @@ -1696,7 +1698,7 @@ void *scheduler_thread_proc(void *arg) {
#endif
if (thief_should_wait(rts)) {
disengage_worker(rts, nworkers, self);
thief_wait(rts);
w->l->wake_val = thief_wait(rts);
reengage_worker(rts, nworkers, self);
}
#if !BOSS_THIEF
Expand Down
Loading

0 comments on commit 67e0a07

Please sign in to comment.