Skip to content
This repository was archived by the owner on May 2, 2024. It is now read-only.

Commit 400d9ea

Browse files
Merge pull request #5 from steved/release-v17.06.2
add support for non-memfd_create support
2 parents 2866c19 + e3817f4 commit 400d9ea

File tree

1 file changed

+123
-89
lines changed

1 file changed

+123
-89
lines changed
Lines changed: 123 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
/*
2+
* Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com>
3+
* Copyright (C) 2019 SUSE LLC
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
#define _GNU_SOURCE
219
#include <unistd.h>
320
#include <stdio.h>
@@ -6,6 +23,7 @@
623
#include <string.h>
724
#include <limits.h>
825
#include <fcntl.h>
26+
#include <errno.h>
927

1028
#include <sys/types.h>
1129
#include <sys/stat.h>
@@ -14,57 +32,80 @@
1432
#include <sys/sendfile.h>
1533
#include <sys/syscall.h>
1634

17-
#include <linux/magic.h>
18-
#include <linux/memfd.h>
19-
20-
#define MEMFD_COMMENT "runc_cloned:/proc/self/exe"
21-
#define MEMFD_LNKNAME "/memfd:" MEMFD_COMMENT " (deleted)"
22-
2335
/* Use our own wrapper for memfd_create. */
36+
#ifndef DISABLE_MEMFD_CREATE
2437
#if !defined(SYS_memfd_create) && defined(__NR_memfd_create)
2538
# define SYS_memfd_create __NR_memfd_create
2639
#endif
27-
#ifndef SYS_memfd_create
28-
# error "memfd_create(2) syscall not supported by this glibc version"
29-
#endif
40+
#ifdef SYS_memfd_create
41+
# define HAVE_MEMFD_CREATE
42+
/* memfd_create(2) flags -- copied from <linux/memfd.h>. */
43+
# ifndef MFD_CLOEXEC
44+
# define MFD_CLOEXEC 0x0001U
45+
# define MFD_ALLOW_SEALING 0x0002U
46+
# endif
3047
int memfd_create(const char *name, unsigned int flags)
3148
{
3249
return syscall(SYS_memfd_create, name, flags);
3350
}
51+
#endif
52+
#endif
3453

3554
/* This comes directly from <linux/fcntl.h>. */
3655
#ifndef F_LINUX_SPECIFIC_BASE
37-
# define F_LINUX_SPECIFIC_BASE 1024
56+
# define F_LINUX_SPECIFIC_BASE 1024
3857
#endif
3958
#ifndef F_ADD_SEALS
40-
# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
41-
# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
59+
# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
60+
# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
4261
#endif
4362
#ifndef F_SEAL_SEAL
44-
# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
45-
# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
46-
# define F_SEAL_GROW 0x0004 /* prevent file from growing */
47-
# define F_SEAL_WRITE 0x0008 /* prevent writes */
63+
# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
64+
# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
65+
# define F_SEAL_GROW 0x0004 /* prevent file from growing */
66+
# define F_SEAL_WRITE 0x0008 /* prevent writes */
4867
#endif
4968

69+
#define RUNC_SENDFILE_MAX 0x7FFFF000 /* sendfile(2) is limited to 2GB. */
70+
#ifdef HAVE_MEMFD_CREATE
71+
# define RUNC_MEMFD_COMMENT "runc_cloned:/proc/self/exe"
72+
# define RUNC_MEMFD_SEALS \
73+
(F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)
74+
#endif
75+
76+
static void *must_realloc(void *ptr, size_t size)
77+
{
78+
void *old = ptr;
79+
do {
80+
ptr = realloc(old, size);
81+
} while(!ptr);
82+
return ptr;
83+
}
84+
5085
/*
51-
* Verify whether we are currently in a self-cloned program. It's not really
52-
* possible to trivially identify a memfd compared to a regular tmpfs file, so
53-
* the best we can do is to check whether the readlink(2) looks okay and that
54-
* it is on a tmpfs.
86+
* Verify whether we are currently in a self-cloned program (namely, is
87+
* /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather
88+
* for shmem files), and we want to be sure it's actually sealed.
5589
*/
5690
static int is_self_cloned(void)
5791
{
58-
struct statfs statfsbuf = {0};
59-
char linkname[PATH_MAX + 1] = {0};
92+
int fd, ret, is_cloned = 0;
6093

61-
if (statfs("/proc/self/exe", &statfsbuf) < 0)
62-
return -1;
63-
if (readlink("/proc/self/exe", linkname, PATH_MAX) < 0)
64-
return -1;
94+
fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
95+
if (fd < 0)
96+
return -ENOTRECOVERABLE;
6597

66-
return statfsbuf.f_type == TMPFS_MAGIC &&
67-
!strncmp(linkname, MEMFD_LNKNAME, PATH_MAX);
98+
#ifdef HAVE_MEMFD_CREATE
99+
ret = fcntl(fd, F_GET_SEALS);
100+
is_cloned = (ret == RUNC_MEMFD_SEALS);
101+
#else
102+
struct stat statbuf = {0};
103+
ret = fstat(fd, &statbuf);
104+
if (ret >= 0)
105+
is_cloned = (statbuf.st_nlink == 0);
106+
#endif
107+
close(fd);
108+
return is_cloned;
68109
}
69110

70111
/*
@@ -77,38 +118,32 @@ static char *read_file(char *path, size_t *length)
77118
char buf[4096], *copy = NULL;
78119

79120
if (!length)
80-
goto err;
81-
*length = 0;
121+
return NULL;
82122

83-
fd = open(path, O_RDONLY|O_CLOEXEC);
123+
fd = open(path, O_RDONLY | O_CLOEXEC);
84124
if (fd < 0)
85-
goto err_free;
125+
return NULL;
86126

127+
*length = 0;
87128
for (;;) {
88129
int n;
89-
char *old = copy;
90130

91131
n = read(fd, buf, sizeof(buf));
92132
if (n < 0)
93-
goto err_fd;
133+
goto error;
94134
if (!n)
95135
break;
96136

97-
do {
98-
copy = realloc(old, (*length + n) * sizeof(*old));
99-
} while(!copy);
100-
137+
copy = must_realloc(copy, (*length + n) * sizeof(*copy));
101138
memcpy(copy + *length, buf, n);
102139
*length += n;
103140
}
104141
close(fd);
105142
return copy;
106143

107-
err_fd:
144+
error:
108145
close(fd);
109-
err_free:
110146
free(copy);
111-
err:
112147
return NULL;
113148
}
114149

@@ -122,21 +157,12 @@ static int parse_xargs(char *data, int data_length, char ***output)
122157
int num = 0;
123158
char *cur = data;
124159

125-
if (!data || *output)
160+
if (!data || *output != NULL)
126161
return -1;
127162

128-
do {
129-
*output = malloc(sizeof(**output));
130-
} while (!*output);
131-
132163
while (cur < data + data_length) {
133-
char **old = *output;
134-
135164
num++;
136-
do {
137-
*output = realloc(old, (num + 1) * sizeof(*old));
138-
} while (!*output);
139-
165+
*output = must_realloc(*output, (num + 1) * sizeof(**output));
140166
(*output)[num - 1] = cur;
141167
cur += strlen(cur) + 1;
142168
}
@@ -151,67 +177,75 @@ static int parse_xargs(char *data, int data_length, char ***output)
151177
*/
152178
static int fetchve(char ***argv, char ***envp)
153179
{
154-
char *cmdline, *environ;
180+
char *cmdline = NULL, *environ = NULL;
155181
size_t cmdline_size, environ_size;
156182

157183
cmdline = read_file("/proc/self/cmdline", &cmdline_size);
158184
if (!cmdline)
159-
goto err;
185+
goto error;
160186
environ = read_file("/proc/self/environ", &environ_size);
161187
if (!environ)
162-
goto err_free;
188+
goto error;
163189

164190
if (parse_xargs(cmdline, cmdline_size, argv) <= 0)
165-
goto err_free_both;
191+
goto error;
166192
if (parse_xargs(environ, environ_size, envp) <= 0)
167-
goto err_free_both;
193+
goto error;
168194

169195
return 0;
170196

171-
err_free_both:
197+
error:
172198
free(environ);
173-
err_free:
174199
free(cmdline);
175-
err:
176-
return -1;
200+
return -EINVAL;
177201
}
178202

179203
static int clone_binary(void)
180204
{
181-
int binfd, memfd, err;
205+
int binfd, memfd;
182206
ssize_t sent = 0;
183-
struct stat statbuf = {0};
184-
185-
binfd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
186-
if (binfd < 0)
187-
goto err;
188-
if (fstat(binfd, &statbuf) < 0)
189-
goto err_binfd;
190207

191-
memfd = memfd_create(MEMFD_COMMENT, MFD_CLOEXEC|MFD_ALLOW_SEALING);
208+
#ifdef HAVE_MEMFD_CREATE
209+
memfd = memfd_create(RUNC_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING);
210+
#else
211+
memfd = open("/tmp", O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0711);
212+
#endif
192213
if (memfd < 0)
193-
goto err_binfd;
214+
return -ENOTRECOVERABLE;
194215

195-
while (sent < statbuf.st_size) {
196-
ssize_t n = sendfile(memfd, binfd, NULL, statbuf.st_size - sent);
197-
if (n < 0)
198-
goto err_memfd;
199-
sent += n;
200-
}
216+
binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
217+
if (binfd < 0)
218+
goto error;
201219

202-
err = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_SEAL);
220+
sent = sendfile(memfd, binfd, NULL, RUNC_SENDFILE_MAX);
221+
close(binfd);
222+
if (sent < 0)
223+
goto error;
224+
225+
#ifdef HAVE_MEMFD_CREATE
226+
int err = fcntl(memfd, F_ADD_SEALS, RUNC_MEMFD_SEALS);
203227
if (err < 0)
204-
goto err_memfd;
228+
goto error;
229+
#else
230+
/* Need to re-open "memfd" as read-only to avoid execve(2) giving -EXTBUSY. */
231+
int newfd;
232+
char *fdpath = NULL;
233+
234+
if (asprintf(&fdpath, "/proc/self/fd/%d", memfd) < 0)
235+
goto error;
236+
newfd = open(fdpath, O_RDONLY | O_CLOEXEC);
237+
free(fdpath);
238+
if (newfd < 0)
239+
goto error;
205240

206-
close(binfd);
241+
close(memfd);
242+
memfd = newfd;
243+
#endif
207244
return memfd;
208245

209-
err_memfd:
246+
error:
210247
close(memfd);
211-
err_binfd:
212-
close(binfd);
213-
err:
214-
return -1;
248+
return -EIO;
215249
}
216250

217251
int ensure_cloned_binary(void)
@@ -221,16 +255,16 @@ int ensure_cloned_binary(void)
221255

222256
/* Check that we're not self-cloned, and if we are then bail. */
223257
int cloned = is_self_cloned();
224-
if (cloned != 0)
258+
if (cloned > 0 || cloned == -ENOTRECOVERABLE)
225259
return cloned;
226260

227261
if (fetchve(&argv, &envp) < 0)
228-
return -1;
262+
return -EINVAL;
229263

230264
execfd = clone_binary();
231265
if (execfd < 0)
232-
return -1;
266+
return -EIO;
233267

234268
fexecve(execfd, argv, envp);
235-
return -1;
269+
return -ENOEXEC;
236270
}

0 commit comments

Comments
 (0)