Skip to content

Commit 70216e1

Browse files
compudjIngo Molnar
authored andcommitted
membarrier: Provide core serializing command, *_SYNC_CORE
Provide core serializing membarrier command to support memory reclaim by JIT. Each architecture needs to explicitly opt into that support by documenting in their architecture code how they provide the core serializing instructions required when returning from the membarrier IPI, and after the scheduler has updated the curr->mm pointer (before going back to user-space). They should then select ARCH_HAS_MEMBARRIER_SYNC_CORE to enable support for that command on their architecture. Architectures selecting this feature need to either document that they issue core serializing instructions when returning to user-space, or implement their architecture-specific sync_core_before_usermode(). Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrea Parri <parri.andrea@gmail.com> Cc: Andrew Hunter <ahh@google.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Avi Kivity <avi@scylladb.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Dave Watson <davejwatson@fb.com> Cc: David Sehr <sehr@google.com> Cc: Greg Hackmann <ghackmann@google.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Maged Michael <maged.michael@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/20180129202020.8515-9-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent ac1ab12 commit 70216e1

File tree

5 files changed

+106
-18
lines changed

5 files changed

+106
-18
lines changed

include/linux/sched/mm.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/sched.h>
88
#include <linux/mm_types.h>
99
#include <linux/gfp.h>
10+
#include <linux/sync_core.h>
1011

1112
/*
1213
* Routines for handling mm_structs
@@ -223,12 +224,26 @@ enum {
223224
MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1),
224225
MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2),
225226
MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3),
227+
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4),
228+
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5),
229+
};
230+
231+
enum {
232+
MEMBARRIER_FLAG_SYNC_CORE = (1U << 0),
226233
};
227234

228235
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
229236
#include <asm/membarrier.h>
230237
#endif
231238

239+
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
240+
{
241+
if (likely(!(atomic_read(&mm->membarrier_state) &
242+
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
243+
return;
244+
sync_core_before_usermode();
245+
}
246+
232247
static inline void membarrier_execve(struct task_struct *t)
233248
{
234249
atomic_set(&t->mm->membarrier_state, 0);
@@ -244,6 +259,9 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
244259
static inline void membarrier_execve(struct task_struct *t)
245260
{
246261
}
262+
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
263+
{
264+
}
247265
#endif
248266

249267
#endif /* _LINUX_SCHED_MM_H */

include/uapi/linux/membarrier.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
* to and return from the system call
7474
* (non-running threads are de facto in such a
7575
* state). This only covers threads from the
76-
* same processes as the caller thread. This
76+
* same process as the caller thread. This
7777
* command returns 0 on success. The
7878
* "expedited" commands complete faster than
7979
* the non-expedited ones, they never block,
@@ -86,6 +86,34 @@
8686
* Register the process intent to use
8787
* MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
8888
* returns 0.
89+
* @MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
90+
* In addition to provide memory ordering
91+
* guarantees described in
92+
* MEMBARRIER_CMD_PRIVATE_EXPEDITED, ensure
93+
* the caller thread, upon return from system
94+
* call, that all its running threads siblings
95+
* have executed a core serializing
96+
* instruction. (architectures are required to
97+
* guarantee that non-running threads issue
98+
* core serializing instructions before they
99+
* resume user-space execution). This only
100+
* covers threads from the same process as the
101+
* caller thread. This command returns 0 on
102+
* success. The "expedited" commands complete
103+
* faster than the non-expedited ones, they
104+
* never block, but have the downside of
105+
* causing extra overhead. If this command is
106+
* not implemented by an architecture, -EINVAL
107+
* is returned. A process needs to register its
108+
* intent to use the private expedited sync
109+
* core command prior to using it, otherwise
110+
* this command returns -EPERM.
111+
* @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
112+
* Register the process intent to use
113+
* MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE.
114+
* If this command is not implemented by an
115+
* architecture, -EINVAL is returned.
116+
* Returns 0 on success.
89117
* @MEMBARRIER_CMD_SHARED:
90118
* Alias to MEMBARRIER_CMD_GLOBAL. Provided for
91119
* header backward compatibility.
@@ -101,6 +129,8 @@ enum membarrier_cmd {
101129
MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED = (1 << 2),
102130
MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3),
103131
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4),
132+
MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 5),
133+
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 6),
104134

105135
/* Alias for header backward compatibility. */
106136
MEMBARRIER_CMD_SHARED = MEMBARRIER_CMD_GLOBAL,

init/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,9 @@ config USERFAULTFD
14151415
config ARCH_HAS_MEMBARRIER_CALLBACKS
14161416
bool
14171417

1418+
config ARCH_HAS_MEMBARRIER_SYNC_CORE
1419+
bool
1420+
14181421
config EMBEDDED
14191422
bool "Embedded system"
14201423
option allnoconfig_y

kernel/sched/core.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2704,13 +2704,21 @@ static struct rq *finish_task_switch(struct task_struct *prev)
27042704

27052705
fire_sched_in_preempt_notifiers(current);
27062706
/*
2707-
* When transitioning from a kernel thread to a userspace
2708-
* thread, mmdrop()'s implicit full barrier is required by the
2709-
* membarrier system call, because the current ->active_mm can
2710-
* become the current mm without going through switch_mm().
2707+
* When switching through a kernel thread, the loop in
2708+
* membarrier_{private,global}_expedited() may have observed that
2709+
* kernel thread and not issued an IPI. It is therefore possible to
2710+
* schedule between user->kernel->user threads without passing though
2711+
* switch_mm(). Membarrier requires a barrier after storing to
2712+
* rq->curr, before returning to userspace, so provide them here:
2713+
*
2714+
* - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
2715+
* provided by mmdrop(),
2716+
* - a sync_core for SYNC_CORE.
27112717
*/
2712-
if (mm)
2718+
if (mm) {
2719+
membarrier_mm_sync_core_before_usermode(mm);
27132720
mmdrop(mm);
2721+
}
27142722
if (unlikely(prev_state == TASK_DEAD)) {
27152723
if (prev->sched_class->task_dead)
27162724
prev->sched_class->task_dead(prev);

kernel/sched/membarrier.c

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,20 @@
2626
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
2727
* except MEMBARRIER_CMD_QUERY.
2828
*/
29+
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
30+
#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
31+
(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
32+
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
33+
#else
34+
#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0
35+
#endif
36+
2937
#define MEMBARRIER_CMD_BITMASK \
3038
(MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
3139
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
3240
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \
33-
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
41+
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
42+
| MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
3443

3544
static void ipi_mb(void *info)
3645
{
@@ -104,15 +113,23 @@ static int membarrier_global_expedited(void)
104113
return 0;
105114
}
106115

107-
static int membarrier_private_expedited(void)
116+
static int membarrier_private_expedited(int flags)
108117
{
109118
int cpu;
110119
bool fallback = false;
111120
cpumask_var_t tmpmask;
112121

113-
if (!(atomic_read(&current->mm->membarrier_state)
114-
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
115-
return -EPERM;
122+
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
123+
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
124+
return -EINVAL;
125+
if (!(atomic_read(&current->mm->membarrier_state) &
126+
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
127+
return -EPERM;
128+
} else {
129+
if (!(atomic_read(&current->mm->membarrier_state) &
130+
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
131+
return -EPERM;
132+
}
116133

117134
if (num_online_cpus() == 1)
118135
return 0;
@@ -205,29 +222,37 @@ static int membarrier_register_global_expedited(void)
205222
return 0;
206223
}
207224

208-
static int membarrier_register_private_expedited(void)
225+
static int membarrier_register_private_expedited(int flags)
209226
{
210227
struct task_struct *p = current;
211228
struct mm_struct *mm = p->mm;
229+
int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY;
230+
231+
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
232+
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
233+
return -EINVAL;
234+
state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
235+
}
212236

213237
/*
214238
* We need to consider threads belonging to different thread
215239
* groups, which use the same mm. (CLONE_VM but not
216240
* CLONE_THREAD).
217241
*/
218-
if (atomic_read(&mm->membarrier_state)
219-
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
242+
if (atomic_read(&mm->membarrier_state) & state)
220243
return 0;
221244
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
245+
if (flags & MEMBARRIER_FLAG_SYNC_CORE)
246+
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE,
247+
&mm->membarrier_state);
222248
if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
223249
/*
224250
* Ensure all future scheduler executions will observe the
225251
* new thread flag state for this process.
226252
*/
227253
synchronize_sched();
228254
}
229-
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
230-
&mm->membarrier_state);
255+
atomic_or(state, &mm->membarrier_state);
231256
return 0;
232257
}
233258

@@ -283,9 +308,13 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
283308
case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
284309
return membarrier_register_global_expedited();
285310
case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
286-
return membarrier_private_expedited();
311+
return membarrier_private_expedited(0);
287312
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
288-
return membarrier_register_private_expedited();
313+
return membarrier_register_private_expedited(0);
314+
case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
315+
return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
316+
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
317+
return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
289318
default:
290319
return -EINVAL;
291320
}

0 commit comments

Comments
 (0)