Skip to content

Commit bd2a659

Browse files
committed
Add thread-based RCU
Add the Linux Kernel style thread-based simple RCU. It supports sparse checking[1]. With sparse, it will report: main.c: note: in included file: thrd_rcu.h:95:42: warning: Using plain integer as NULL pointer thrd_rcu.h:95:42: warning: Using plain integer as NULL pointer It is from the pthread_mutex_t initializer, we can ignore it. thread-rcu target to use Linux Kernel Memory Model (LKMM). C11 memory model may no compatible to LKMM. We need to be careful about ower architecture. you can check the paper[2] to see more detail. [1] https://www.kernel.org/doc/html/latest/dev-tools/sparse.html [2] http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0124r6.html
1 parent 5737c46 commit bd2a659

File tree

4 files changed

+345
-0
lines changed

4 files changed

+345
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ purpose of these programs is to be illustrative and educational.
3030
- [qsbr](qsbr/): An implementation of Quiescent state based reclamation (QSBR).
3131
- [list-move](list-move/): Evaluation of two concurrent linked lists: QSBR and lock-based.
3232
- [rcu\_queue](rcu_queue/): An efficient concurrent queue based on QSBR.
33+
- [thread-rcu](thread-rcu/): A Linux Kernel style thread-based simple RCU.
3334
* Applications
3435
- [httpd](httpd/): A multi-threaded web server.
3536
- [map-reduce](map-reduce/): word counting using MapReduce.

thread-rcu/Makefile

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
CFLAGS = -Wall
2+
CFLAGS += -g
3+
CFLAGS += -std=c11
4+
CFLAGS += -D'N_READERS=10'
5+
CFLAGS += -fsanitize=thread
6+
LDFLAGS += -lpthread
7+
8+
all:
9+
$(CC) -o main main.c $(CFLAGS) $(LDFLAGS)
10+
11+
# Semantic Checker
12+
# https://www.kernel.org/doc/html/latest/dev-tools/sparse.html
13+
sparse:
14+
sparse main.c $(CFLAGS) $(LDFLAGS)
15+
16+
indent:
17+
clang-format -i *.[ch]
18+
19+
clean:
20+
rm -f main

thread-rcu/main.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#include <pthread.h>
2+
#include <stdio.h>
3+
#include <stdlib.h>
4+
5+
#include "rcu.h"
6+
7+
struct test {
8+
unsigned int count;
9+
};
10+
11+
static struct test __rcu *dut;
12+
13+
static void *reader_side(void *argv)
14+
{
15+
struct test *tmp;
16+
17+
if (rcu_init())
18+
abort();
19+
20+
rcu_read_lock();
21+
22+
tmp = rcu_dereference(dut);
23+
24+
printf("[reader %u] %u\n", current_tid(), tmp->count);
25+
26+
rcu_read_unlock();
27+
28+
pthread_exit(NULL);
29+
}
30+
31+
static void *updater_side(void *argv)
32+
{
33+
struct test *oldp;
34+
struct test *newval = malloc(sizeof(struct test));
35+
36+
newval->count = current_tid();
37+
38+
printf("[updater %u]\n", newval->count);
39+
40+
oldp = rcu_assign_pointer(dut, newval);
41+
42+
synchronize_rcu();
43+
free(oldp);
44+
45+
pthread_exit(NULL);
46+
}
47+
48+
int main(int argc, char *argv[])
49+
{
50+
pthread_t reader[N_READERS];
51+
pthread_t updater;
52+
int i;
53+
54+
dut = (struct test __rcu *) malloc(sizeof(struct test));
55+
rcu_uncheck(dut)->count = 0;
56+
57+
for (i = 0; i < N_READERS / 2; i++)
58+
pthread_create(&reader[i], NULL, reader_side, NULL);
59+
60+
pthread_create(&updater, NULL, updater_side, NULL);
61+
62+
for (i = N_READERS / 2; i < N_READERS; i++)
63+
pthread_create(&reader[i], NULL, reader_side, NULL);
64+
65+
for (i = 0; i < N_READERS; i++)
66+
pthread_join(reader[i], NULL);
67+
68+
pthread_join(updater, NULL);
69+
70+
free(rcu_uncheck(dut));
71+
rcu_clean();
72+
73+
return 0;
74+
}

thread-rcu/rcu.h

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
/* Target to use Linux Kernel Memory Model (LKMM) for thread-rcu,
2+
* C11 memory model may no compatible to LKMM.
3+
* Be careful about the architecture you use.
4+
* you can check the paper to see more detail:
5+
*
6+
* http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0124r6.html
7+
*/
8+
9+
#ifndef __RCU_H__
10+
#define __RCU_H__
11+
12+
/* lock primitives from pthread and compiler primitives */
13+
14+
#include <pthread.h>
15+
#include <stdatomic.h>
16+
17+
typedef pthread_mutex_t spinlock_t;
18+
19+
#define SPINLOCK_INIT PTHREAD_MUTEX_INITIALIZER
20+
static inline void spin_lock(spinlock_t *sp)
21+
{
22+
int ret;
23+
24+
ret = pthread_mutex_lock(sp);
25+
if (ret != 0) {
26+
fprintf(stderr, "spin_lock:pthread_mutex_lock %d\n", ret);
27+
abort();
28+
}
29+
}
30+
31+
static inline void spin_unlock(spinlock_t *sp)
32+
{
33+
int ret;
34+
35+
ret = pthread_mutex_unlock(sp);
36+
if (ret != 0) {
37+
fprintf(stderr, "spin_unlock:pthread_mutex_unlock %d\n", ret);
38+
abort();
39+
}
40+
}
41+
42+
#define current_tid() (unsigned int) pthread_self()
43+
44+
/* Be careful here, since the C11 terms do no have the same sequential
45+
* consistency for the smp_mb(). Here we use the closely C11 terms,
46+
* memory_order_seq_cst.
47+
*/
48+
#define smp_mb() atomic_thread_fence(memory_order_seq_cst)
49+
50+
/* Compiler barrier, prevent the compiler to move the memory access from side
51+
* to the other side.
52+
*/
53+
#define barrier() __asm__ __volatile__("" : : : "memory")
54+
55+
/* Don't use it directly. Use READ_ONCE() or WRITE_ONCE(). */
56+
#define ACCESS_ONCE(x) (*(volatile __typeof__(x) *) &(x))
57+
58+
/* READ_ONCE() close to those of a C11 volatile memory_order_relaxed atomic read
59+
* . However, for address, data, or control dependency chain, it is more like
60+
* memory_order_consume. But, presently most of implement promote those kind of
61+
* thing to memory_order_aquire.
62+
*/
63+
#define READ_ONCE(x) \
64+
({ \
65+
barrier(); \
66+
__typeof__(x) ___x = \
67+
atomic_load_explicit(&ACCESS_ONCE(x), memory_order_consume); \
68+
barrier(); \
69+
___x; \
70+
})
71+
72+
/* WRITE_ONCE() quite close to C11 volatile memory_order_relaxed atomic store */
73+
#define WRITE_ONCE(x, val) \
74+
do { \
75+
atomic_store_explicit(&ACCESS_ONCE(x), (val), memory_order_relaxed); \
76+
} while (0)
77+
78+
#include <errno.h>
79+
#include <stdatomic.h>
80+
#include <stdio.h>
81+
#include <stdlib.h>
82+
#include <unistd.h>
83+
84+
#ifdef __CHECKER__
85+
#define __rcu __attribute__((noderef, address_space(__rcu)))
86+
#define rcu_check_sparse(p, space) ((void) (((typeof(*p) space *) p) == p))
87+
#define __force __attribute__((force))
88+
#define rcu_uncheck(p) ((__typeof__(*p) __force *) p)
89+
#define rcu_check(p) ((__typeof__(*p) __force __rcu *) p)
90+
#else
91+
#define __rcu
92+
#define rcu_check_sparse(p, space)
93+
#define __force
94+
#define rcu_uncheck(p) p
95+
#define rcu_check(p) p
96+
#endif /* __CHECKER__ */
97+
98+
/* Avoid false sharing */
99+
#define __rcu_aligned __attribute__((aligned(128)))
100+
101+
struct rcu_node {
102+
unsigned int tid;
103+
int rcu_nesting[2];
104+
struct rcu_node *next;
105+
} __rcu_aligned;
106+
107+
struct rcu_data {
108+
unsigned int nr_thread;
109+
struct rcu_node *head;
110+
unsigned int rcu_thread_nesting_idx;
111+
spinlock_t sp;
112+
};
113+
114+
/* Easy to use */
115+
#define __rcu_thread_idx rcu_data.rcu_thread_nesting_idx
116+
#define __rcu_thread_nesting(ptr) \
117+
ptr->rcu_nesting[READ_ONCE(__rcu_thread_idx) & 0x01]
118+
#define rcu_thread_nesting __rcu_thread_nesting(__rcu_per_thread_ptr)
119+
120+
static struct rcu_data rcu_data = {
121+
.nr_thread = 0,
122+
.head = NULL,
123+
.rcu_thread_nesting_idx = 0,
124+
.sp = SPINLOCK_INIT,
125+
};
126+
static __thread struct rcu_node *__rcu_per_thread_ptr;
127+
128+
static inline struct rcu_node *__rcu_node_add(unsigned int tid)
129+
{
130+
struct rcu_node **indirect = &rcu_data.head;
131+
struct rcu_node *node = malloc(sizeof(struct rcu_node));
132+
133+
if (!node) {
134+
fprintf(stderr, "__rcu_node_add: malloc failed\n");
135+
abort();
136+
}
137+
138+
node->tid = tid;
139+
node->rcu_nesting[0] = 0;
140+
node->rcu_nesting[1] = 0;
141+
node->next = NULL;
142+
143+
spin_lock(&rcu_data.sp);
144+
145+
while (*indirect) {
146+
if ((*indirect)->tid == node->tid) {
147+
spin_unlock(&rcu_data.sp);
148+
free(node);
149+
return NULL;
150+
}
151+
indirect = &(*indirect)->next;
152+
}
153+
154+
*indirect = node;
155+
rcu_data.nr_thread++;
156+
157+
spin_unlock(&rcu_data.sp);
158+
159+
smp_mb();
160+
161+
return node;
162+
}
163+
164+
static inline int rcu_init(void)
165+
{
166+
unsigned int tid = current_tid();
167+
168+
__rcu_per_thread_ptr = __rcu_node_add(tid);
169+
170+
return (__rcu_per_thread_ptr == NULL) ? -ENOMEM : 0;
171+
}
172+
173+
static inline void rcu_clean(void)
174+
{
175+
struct rcu_node *node, *tmp;
176+
177+
spin_lock(&rcu_data.sp);
178+
179+
for (node = rcu_data.head; node; node = tmp) {
180+
tmp = node->next;
181+
free(node);
182+
}
183+
184+
rcu_data.head = NULL;
185+
rcu_data.nr_thread = 0;
186+
187+
spin_unlock(&rcu_data.sp);
188+
}
189+
190+
/* The per-thread reference count will only modified by their owner
191+
* thread but will read by other threads. So here we use WRITE_ONCE().
192+
*/
193+
static inline void rcu_read_lock(void)
194+
{
195+
WRITE_ONCE(rcu_thread_nesting, 1);
196+
}
197+
198+
static inline void rcu_read_unlock(void)
199+
{
200+
WRITE_ONCE(rcu_thread_nesting, 0);
201+
}
202+
203+
static inline void synchronize_rcu(void)
204+
{
205+
struct rcu_node *node;
206+
207+
smp_mb();
208+
209+
spin_lock(&rcu_data.sp);
210+
211+
/* When the rcu_thread_nesting is odd, which means that the LSB set 1,
212+
* that thread is in the read-side critical section. Also, we need
213+
* to skip the read side when it is in the new grace period.
214+
*/
215+
for (node = rcu_data.head; node; node = node->next) {
216+
while (READ_ONCE(__rcu_thread_nesting(node)) & 0x1) {
217+
barrier();
218+
}
219+
}
220+
221+
/* Going to next grace period
222+
*
223+
* It would be great to put smp_mb() (here implemented by memory_seq_cst)
224+
* after and before of C11 atomics.
225+
*/
226+
atomic_fetch_add_explicit(&__rcu_thread_idx, 1, memory_order_release);
227+
228+
spin_unlock(&rcu_data.sp);
229+
230+
smp_mb();
231+
}
232+
233+
#define rcu_dereference(p) \
234+
({ \
235+
__typeof__(*p) *__r_d_p = (__typeof__(*p) __force *) READ_ONCE(p); \
236+
rcu_check_sparse(p, __rcu); \
237+
__r_d_p; \
238+
})
239+
240+
#define rcu_assign_pointer(p, v) \
241+
({ \
242+
__typeof__(*p) *__r_a_p = \
243+
(__typeof__(*p) __force *) atomic_exchange_explicit( \
244+
&(p), (__typeof__(*(p)) __force __rcu *) v, \
245+
memory_order_release); \
246+
rcu_check_sparse(p, __rcu); \
247+
__r_a_p; \
248+
})
249+
250+
#endif /* __RCU_H__ */

0 commit comments

Comments
 (0)