Skip to content

Commit

Permalink
Merge tag 'clone3-v5.3' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/brauner/linux

Pull clone3 system call from Christian Brauner:
 "This adds the clone3 syscall which is an extensible successor to clone
  after we snagged the last flag with CLONE_PIDFD during the 5.2 merge
  window for clone(). It cleanly supports all of the flags from clone()
  and thus all legacy workloads.

  There are few user visible differences between clone3 and clone.
  First, CLONE_DETACHED will cause EINVAL with clone3 so we can reuse
  this flag. Second, the CSIGNAL flag is deprecated and will cause
  EINVAL to be reported. It is superseeded by a dedicated "exit_signal"
  argument in struct clone_args thus freeing up even more flags. And
  third, clone3 gives CLONE_PIDFD a dedicated return argument in struct
  clone_args instead of abusing CLONE_PARENT_SETTID's parent_tidptr
  argument.

  The clone3 uapi is designed to be easy to handle on 32- and 64 bit:

    /* uapi */
    struct clone_args {
            __aligned_u64 flags;
            __aligned_u64 pidfd;
            __aligned_u64 child_tid;
            __aligned_u64 parent_tid;
            __aligned_u64 exit_signal;
            __aligned_u64 stack;
            __aligned_u64 stack_size;
            __aligned_u64 tls;
    };

  and a separate kernel struct is used that uses proper kernel typing:

    /* kernel internal */
    struct kernel_clone_args {
            u64 flags;
            int __user *pidfd;
            int __user *child_tid;
            int __user *parent_tid;
            int exit_signal;
            unsigned long stack;
            unsigned long stack_size;
            unsigned long tls;
    };

  The system call comes with a size argument which enables the kernel to
  detect what version of clone_args userspace is passing in. clone3
  validates that any additional bytes a given kernel does not know about
  are set to zero and that the size never exceeds a page.

  A nice feature is that this patchset allowed us to cleanup and
  simplify various core kernel codepaths in kernel/fork.c by making the
  internal _do_fork() function take struct kernel_clone_args even for
  legacy clone().

  This patch also unblocks the time namespace patchset which wants to
  introduce a new CLONE_TIMENS flag.

  Note, that clone3 has only been wired up for x86{_32,64}, arm{64}, and
  xtensa. These were the architectures that did not require special
  massaging.

  Other architectures treat fork-like system calls individually and
  after some back and forth neither Arnd nor I felt confident that we
  dared to add clone3 unconditionally to all architectures. We agreed to
  leave this up to individual architecture maintainers. This is why
  there's an additional patch that introduces __ARCH_WANT_SYS_CLONE3
  which any architecture can set once it has implemented support for
  clone3. The patch also adds a cond_syscall(clone3) for architectures
  such as nios2 or h8300 that generate their syscall table by simply
  including asm-generic/unistd.h. The hope is to get rid of
  __ARCH_WANT_SYS_CLONE3 and cond_syscall() rather soon"

* tag 'clone3-v5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
  arch: handle arches who do not yet define clone3
  arch: wire-up clone3() syscall
  fork: add clone3
  • Loading branch information
torvalds committed Jul 11, 2019
2 parents 5450e8a + d68dbb0 commit 8f6ccf6
Show file tree
Hide file tree
Showing 17 changed files with 218 additions and 41 deletions.
1 change: 1 addition & 0 deletions arch/arm/include/asm/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_CLONE
#define __ARCH_WANT_SYS_CLONE3

/*
* Unimplemented (or alternatively implemented) syscalls
Expand Down
1 change: 1 addition & 0 deletions arch/arm/tools/syscall.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -448,3 +448,4 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
3 changes: 2 additions & 1 deletion arch/arm64/include/asm/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)

#define __NR_compat_syscalls 435
#define __NR_compat_syscalls 436
#endif

#define __ARCH_WANT_SYS_CLONE
#define __ARCH_WANT_SYS_CLONE3

#ifndef __COMPAT_SYSCALL_NR
#include <uapi/asm/unistd.h>
Expand Down
2 changes: 2 additions & 0 deletions arch/arm64/include/asm/unistd32.h
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,8 @@ __SYSCALL(__NR_fsmount, sys_fsmount)
__SYSCALL(__NR_fspick, sys_fspick)
#define __NR_pidfd_open 434
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)

/*
* Please add new compat syscalls above this comment and update
Expand Down
1 change: 1 addition & 0 deletions arch/microblaze/kernel/syscalls/syscall.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -440,3 +440,4 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
1 change: 1 addition & 0 deletions arch/x86/entry/syscalls/syscall_32.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,4 @@
432 i386 fsmount sys_fsmount __ia32_sys_fsmount
433 i386 fspick sys_fspick __ia32_sys_fspick
434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open
435 i386 clone3 sys_clone3 __ia32_sys_clone3
1 change: 1 addition & 0 deletions arch/x86/entry/syscalls/syscall_64.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@
432 common fsmount __x64_sys_fsmount
433 common fspick __x64_sys_fspick
434 common pidfd_open __x64_sys_pidfd_open
435 common clone3 __x64_sys_clone3/ptregs

#
# x32-specific system call numbers start at 512 to avoid cache impact
Expand Down
12 changes: 10 additions & 2 deletions arch/x86/ia32/sys_ia32.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,14 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
unsigned long, newsp, int __user *, parent_tidptr,
unsigned long, tls_val, int __user *, child_tidptr)
{
return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr,
tls_val);
struct kernel_clone_args args = {
.flags = (clone_flags & ~CSIGNAL),
.child_tid = child_tidptr,
.parent_tid = parent_tidptr,
.exit_signal = (clone_flags & CSIGNAL),
.stack = newsp,
.tls = tls_val,
};

return _do_fork(&args);
}
1 change: 1 addition & 0 deletions arch/x86/include/asm/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,6 @@
# define __ARCH_WANT_SYS_FORK
# define __ARCH_WANT_SYS_VFORK
# define __ARCH_WANT_SYS_CLONE
# define __ARCH_WANT_SYS_CLONE3

#endif /* _ASM_X86_UNISTD_H */
1 change: 1 addition & 0 deletions arch/xtensa/include/asm/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#define _XTENSA_UNISTD_H

#define __ARCH_WANT_SYS_CLONE
#define __ARCH_WANT_SYS_CLONE3
#include <uapi/asm/unistd.h>

#define __ARCH_WANT_NEW_STAT
Expand Down
1 change: 1 addition & 0 deletions arch/xtensa/kernel/syscalls/syscall.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -405,3 +405,4 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
17 changes: 16 additions & 1 deletion include/linux/sched/task.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,26 @@
*/

#include <linux/sched.h>
#include <linux/uaccess.h>

struct task_struct;
struct rusage;
union thread_union;

/* All the bits taken by the old clone syscall. */
#define CLONE_LEGACY_FLAGS 0xffffffffULL

struct kernel_clone_args {
u64 flags;
int __user *pidfd;
int __user *child_tid;
int __user *parent_tid;
int exit_signal;
unsigned long stack;
unsigned long stack_size;
unsigned long tls;
};

/*
* This serializes "schedule()" and also protects
* the run-queue from deletions/modifications (but
Expand Down Expand Up @@ -73,7 +88,7 @@ extern void do_group_exit(int);
extern void exit_files(struct task_struct *);
extern void exit_itimers(struct signal_struct *);

extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
extern long _do_fork(struct kernel_clone_args *kargs);
extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
struct task_struct *fork_idle(int);
struct mm_struct *copy_init_mm(void);
Expand Down
4 changes: 4 additions & 0 deletions include/linux/syscalls.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ struct sigaltstack;
struct rseq;
union bpf_attr;
struct io_uring_params;
struct clone_args;

#include <linux/types.h>
#include <linux/aio_abi.h>
Expand Down Expand Up @@ -850,6 +851,9 @@ asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
int __user *, unsigned long);
#endif
#endif

asmlinkage long sys_clone3(struct clone_args __user *uargs, size_t size);

asmlinkage long sys_execve(const char __user *filename,
const char __user *const __user *argv,
const char __user *const __user *envp);
Expand Down
4 changes: 3 additions & 1 deletion include/uapi/asm-generic/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -846,9 +846,11 @@ __SYSCALL(__NR_fsmount, sys_fsmount)
__SYSCALL(__NR_fspick, sys_fspick)
#define __NR_pidfd_open 434
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)

#undef __NR_syscalls
#define __NR_syscalls 435
#define __NR_syscalls 436

/*
* 32 bit systems traditionally used different
Expand Down
16 changes: 16 additions & 0 deletions include/uapi/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#ifndef _UAPI_LINUX_SCHED_H
#define _UAPI_LINUX_SCHED_H

#include <linux/types.h>

/*
* cloning flags:
*/
Expand Down Expand Up @@ -31,6 +33,20 @@
#define CLONE_NEWNET 0x40000000 /* New network namespace */
#define CLONE_IO 0x80000000 /* Clone io context */

/*
* Arguments for the clone3 syscall
*/
struct clone_args {
__aligned_u64 flags;
__aligned_u64 pidfd;
__aligned_u64 child_tid;
__aligned_u64 parent_tid;
__aligned_u64 exit_signal;
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
};

/*
* Scheduling policies
*/
Expand Down
Loading

0 comments on commit 8f6ccf6

Please sign in to comment.