Skip to content

Commit

Permalink
Merge tag 'pidfd-v5.1-rc1' of git://git.kernel.org/pub/scm/linux/kern…
Browse files Browse the repository at this point in the history
…el/git/brauner/linux

Pull pidfd system call from Christian Brauner:
 "This introduces the ability to use file descriptors from /proc/<pid>/
  as stable handles on struct pid. Even if a pid is recycled the handle
  will not change. For a start these fds can be used to send signals to
  the processes they refer to.

  With the ability to use /proc/<pid> fds as stable handles on struct
  pid we can fix a long-standing issue where after a process has exited
  its pid can be reused by another process. If a caller sends a signal
  to a reused pid it will end up signaling the wrong process.

  With this patchset we enable a variety of use cases. One obvious
  example is that we can now safely delegate an important part of
  process management - sending signals - to processes other than the
  parent of a given process by sending file descriptors around via scm
  rights and not fearing that the given process will have been recycled
  in the meantime. It also allows for easy testing whether a given
  process is still alive or not by sending signal 0 to a pidfd which is
  quite handy.

  There has been some interest in this feature e.g. from systems
  management (systemd, glibc) and container managers. I have requested
  and gotten comments from glibc to make sure that this syscall is
  suitable for their needs as well. In the future I expect it to take on
  most other pid-based signal syscalls. But such features are left for
  the future once they are needed.

  This has been sitting in linux-next for quite a while and has not
  caused any issues. It comes with selftests which verify basic
  functionality and also test that a recycled pid cannot be signaled via
  a pidfd.

  Jon has written about a prior version of this patchset. It should
  cover the basic functionality since not a lot has changed since then:

      https://lwn.net/Articles/773459/

  The commit message for the syscall itself is extensively documenting
  the syscall, including it's functionality and extensibility"

* tag 'pidfd-v5.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
  selftests: add tests for pidfd_send_signal()
  signal: add pidfd_send_signal() syscall
  • Loading branch information
torvalds committed Mar 16, 2019
2 parents f67e3fb + 575a0ae commit a9dce66
Show file tree
Hide file tree
Showing 11 changed files with 538 additions and 6 deletions.
1 change: 1 addition & 0 deletions arch/x86/entry/syscalls/syscall_32.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@
421 i386 rt_sigtimedwait_time64 sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait_time64
422 i386 futex_time64 sys_futex __ia32_sys_futex
423 i386 sched_rr_get_interval_time64 sys_sched_rr_get_interval __ia32_sys_sched_rr_get_interval
424 i386 pidfd_send_signal sys_pidfd_send_signal __ia32_sys_pidfd_send_signal
425 i386 io_uring_setup sys_io_uring_setup __ia32_sys_io_uring_setup
426 i386 io_uring_enter sys_io_uring_enter __ia32_sys_io_uring_enter
427 i386 io_uring_register sys_io_uring_register __ia32_sys_io_uring_register
1 change: 1 addition & 0 deletions arch/x86/entry/syscalls/syscall_64.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@
334 common rseq __x64_sys_rseq
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
424 common pidfd_send_signal __x64_sys_pidfd_send_signal
425 common io_uring_setup __x64_sys_io_uring_setup
426 common io_uring_enter __x64_sys_io_uring_enter
427 common io_uring_register __x64_sys_io_uring_register
Expand Down
9 changes: 9 additions & 0 deletions fs/proc/base.c
Original file line number Diff line number Diff line change
Expand Up @@ -3074,6 +3074,15 @@ static const struct file_operations proc_tgid_base_operations = {
.llseek = generic_file_llseek,
};

struct pid *tgid_pidfd_to_pid(const struct file *file)
{
if (!d_is_dir(file->f_path.dentry) ||
(file->f_op != &proc_tgid_base_operations))
return ERR_PTR(-EBADF);

return proc_pid(file_inode(file));
}

static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
return proc_pident_lookup(dir, dentry,
Expand Down
6 changes: 6 additions & 0 deletions include/linux/proc_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
int (*show)(struct seq_file *, void *),
proc_write_t write,
void *data);
extern struct pid *tgid_pidfd_to_pid(const struct file *file);

#else /* CONFIG_PROC_FS */

Expand Down Expand Up @@ -114,6 +115,11 @@ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *p
#define proc_create_net(name, mode, parent, state_size, ops) ({NULL;})
#define proc_create_net_single(name, mode, parent, show, data) ({NULL;})

static inline struct pid *tgid_pidfd_to_pid(const struct file *file)
{
return ERR_PTR(-EBADF);
}

#endif /* CONFIG_PROC_FS */

struct net;
Expand Down
3 changes: 3 additions & 0 deletions include/linux/syscalls.h
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,9 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
unsigned mask, struct statx __user *buffer);
asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
int flags, uint32_t sig);
asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
siginfo_t __user *info,
unsigned int flags);

/*
* Architecture-specific system calls
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/asm-generic/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,8 @@ __SYSCALL(__NR_futex_time64, sys_futex)
__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
#endif

#define __NR_pidfd_send_signal 424
__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
#define __NR_io_uring_setup 425
__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
#define __NR_io_uring_enter 426
Expand Down
133 changes: 127 additions & 6 deletions kernel/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/cputime.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/proc_fs.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
#include <linux/coredump.h>
Expand Down Expand Up @@ -3487,6 +3489,16 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time32, compat_sigset_t __user *, uthese,
#endif
#endif

static inline void prepare_kill_siginfo(int sig, struct kernel_siginfo *info)
{
clear_siginfo(info);
info->si_signo = sig;
info->si_errno = 0;
info->si_code = SI_USER;
info->si_pid = task_tgid_vnr(current);
info->si_uid = from_kuid_munged(current_user_ns(), current_uid());
}

/**
* sys_kill - send a signal to a process
* @pid: the PID of the process
Expand All @@ -3496,16 +3508,125 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
struct kernel_siginfo info;

clear_siginfo(&info);
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_USER;
info.si_pid = task_tgid_vnr(current);
info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
prepare_kill_siginfo(sig, &info);

return kill_something_info(sig, &info, pid);
}

#ifdef CONFIG_PROC_FS
/*
* Verify that the signaler and signalee either are in the same pid namespace
* or that the signaler's pid namespace is an ancestor of the signalee's pid
* namespace.
*/
static bool access_pidfd_pidns(struct pid *pid)
{
struct pid_namespace *active = task_active_pid_ns(current);
struct pid_namespace *p = ns_of_pid(pid);

for (;;) {
if (!p)
return false;
if (p == active)
break;
p = p->parent;
}

return true;
}

static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t *info)
{
#ifdef CONFIG_COMPAT
/*
* Avoid hooking up compat syscalls and instead handle necessary
* conversions here. Note, this is a stop-gap measure and should not be
* considered a generic solution.
*/
if (in_compat_syscall())
return copy_siginfo_from_user32(
kinfo, (struct compat_siginfo __user *)info);
#endif
return copy_siginfo_from_user(kinfo, info);
}

/**
* sys_pidfd_send_signal - send a signal to a process through a task file
* descriptor
* @pidfd: the file descriptor of the process
* @sig: signal to be sent
* @info: the signal info
* @flags: future flags to be passed
*
* The syscall currently only signals via PIDTYPE_PID which covers
* kill(<positive-pid>, <signal>. It does not signal threads or process
* groups.
* In order to extend the syscall to threads and process groups the @flags
* argument should be used. In essence, the @flags argument will determine
* what is signaled and not the file descriptor itself. Put in other words,
* grouping is a property of the flags argument not a property of the file
* descriptor.
*
* Return: 0 on success, negative errno on failure
*/
SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
siginfo_t __user *, info, unsigned int, flags)
{
int ret;
struct fd f;
struct pid *pid;
kernel_siginfo_t kinfo;

/* Enforce flags be set to 0 until we add an extension. */
if (flags)
return -EINVAL;

f = fdget_raw(pidfd);
if (!f.file)
return -EBADF;

/* Is this a pidfd? */
pid = tgid_pidfd_to_pid(f.file);
if (IS_ERR(pid)) {
ret = PTR_ERR(pid);
goto err;
}

ret = -EINVAL;
if (!access_pidfd_pidns(pid))
goto err;

if (info) {
ret = copy_siginfo_from_user_any(&kinfo, info);
if (unlikely(ret))
goto err;

ret = -EINVAL;
if (unlikely(sig != kinfo.si_signo))
goto err;

if ((task_pid(current) != pid) &&
(kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) {
/* Only allow sending arbitrary signals to yourself. */
ret = -EPERM;
if (kinfo.si_code != SI_USER)
goto err;

/* Turn this into a regular kill signal. */
prepare_kill_siginfo(sig, &kinfo);
}
} else {
prepare_kill_siginfo(sig, &kinfo);
}

ret = kill_pid_info(sig, &kinfo, pid);

err:
fdput(f);
return ret;
}
#endif /* CONFIG_PROC_FS */

static int
do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info)
{
Expand Down
1 change: 1 addition & 0 deletions kernel/sys_ni.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ COND_SYSCALL(syslog);
/* kernel/sched/core.c */

/* kernel/signal.c */
COND_SYSCALL(pidfd_send_signal);

/* kernel/sys.c */
COND_SYSCALL(setregid);
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ TARGETS += net
TARGETS += netfilter
TARGETS += networking/timestamping
TARGETS += nsfs
TARGETS += pidfd
TARGETS += powerpc
TARGETS += proc
TARGETS += pstore
Expand Down
6 changes: 6 additions & 0 deletions tools/testing/selftests/pidfd/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CFLAGS += -g -I../../../../usr/include/

TEST_GEN_PROGS := pidfd_test

include ../lib.mk

Loading

0 comments on commit a9dce66

Please sign in to comment.