1+ /*
2+ * Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com>
3+ * Copyright (C) 2019 SUSE LLC
4+ *
5+ * Licensed under the Apache License, Version 2.0 (the "License");
6+ * you may not use this file except in compliance with the License.
7+ * You may obtain a copy of the License at
8+ *
9+ * http://www.apache.org/licenses/LICENSE-2.0
10+ *
11+ * Unless required by applicable law or agreed to in writing, software
12+ * distributed under the License is distributed on an "AS IS" BASIS,
13+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ * See the License for the specific language governing permissions and
15+ * limitations under the License.
16+ */
17+
118#define _GNU_SOURCE
219#include <unistd.h>
320#include <stdio.h>
623#include <string.h>
724#include <limits.h>
825#include <fcntl.h>
26+ #include <errno.h>
927
1028#include <sys/types.h>
1129#include <sys/stat.h>
1432#include <sys/sendfile.h>
1533#include <sys/syscall.h>
1634
17- #include <linux/magic.h>
18- #include <linux/memfd.h>
19-
20- #define MEMFD_COMMENT "runc_cloned:/proc/self/exe"
21- #define MEMFD_LNKNAME "/memfd:" MEMFD_COMMENT " (deleted)"
22-
2335/* Use our own wrapper for memfd_create. */
36+ #ifndef DISABLE_MEMFD_CREATE
2437#if !defined(SYS_memfd_create ) && defined(__NR_memfd_create )
2538# define SYS_memfd_create __NR_memfd_create
2639#endif
27- #ifndef SYS_memfd_create
28- # error "memfd_create(2) syscall not supported by this glibc version"
29- #endif
40+ #ifdef SYS_memfd_create
41+ # define HAVE_MEMFD_CREATE
42+ /* memfd_create(2) flags -- copied from <linux/memfd.h>. */
43+ # ifndef MFD_CLOEXEC
44+ # define MFD_CLOEXEC 0x0001U
45+ # define MFD_ALLOW_SEALING 0x0002U
46+ # endif
3047int memfd_create (const char * name , unsigned int flags )
3148{
3249 return syscall (SYS_memfd_create , name , flags );
3350}
51+ #endif
52+ #endif
3453
3554/* This comes directly from <linux/fcntl.h>. */
3655#ifndef F_LINUX_SPECIFIC_BASE
37- # define F_LINUX_SPECIFIC_BASE 1024
56+ # define F_LINUX_SPECIFIC_BASE 1024
3857#endif
3958#ifndef F_ADD_SEALS
40- # define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
41- # define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
59+ # define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
60+ # define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
4261#endif
4362#ifndef F_SEAL_SEAL
44- # define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
45- # define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
46- # define F_SEAL_GROW 0x0004 /* prevent file from growing */
47- # define F_SEAL_WRITE 0x0008 /* prevent writes */
63+ # define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
64+ # define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
65+ # define F_SEAL_GROW 0x0004 /* prevent file from growing */
66+ # define F_SEAL_WRITE 0x0008 /* prevent writes */
4867#endif
4968
69+ #define RUNC_SENDFILE_MAX 0x7FFFF000 /* sendfile(2) is limited to 2GB. */
70+ #ifdef HAVE_MEMFD_CREATE
71+ # define RUNC_MEMFD_COMMENT "runc_cloned:/proc/self/exe"
72+ # define RUNC_MEMFD_SEALS \
73+ (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)
74+ #endif
75+
76+ static void * must_realloc (void * ptr , size_t size )
77+ {
78+ void * old = ptr ;
79+ do {
80+ ptr = realloc (old , size );
81+ } while (!ptr );
82+ return ptr ;
83+ }
84+
5085/*
51- * Verify whether we are currently in a self-cloned program. It's not really
52- * possible to trivially identify a memfd compared to a regular tmpfs file, so
53- * the best we can do is to check whether the readlink(2) looks okay and that
54- * it is on a tmpfs.
86+ * Verify whether we are currently in a self-cloned program (namely, is
87+ * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather
88+ * for shmem files), and we want to be sure it's actually sealed.
5589 */
5690static int is_self_cloned (void )
5791{
58- struct statfs statfsbuf = {0 };
59- char linkname [PATH_MAX + 1 ] = {0 };
92+ int fd , ret , is_cloned = 0 ;
6093
61- if (statfs ("/proc/self/exe" , & statfsbuf ) < 0 )
62- return -1 ;
63- if (readlink ("/proc/self/exe" , linkname , PATH_MAX ) < 0 )
64- return -1 ;
94+ fd = open ("/proc/self/exe" , O_RDONLY |O_CLOEXEC );
95+ if (fd < 0 )
96+ return - ENOTRECOVERABLE ;
6597
66- return statfsbuf .f_type == TMPFS_MAGIC &&
67- !strncmp (linkname , MEMFD_LNKNAME , PATH_MAX );
98+ #ifdef HAVE_MEMFD_CREATE
99+ ret = fcntl (fd , F_GET_SEALS );
100+ is_cloned = (ret == RUNC_MEMFD_SEALS );
101+ #else
102+ struct stat statbuf = {0 };
103+ ret = fstat (fd , & statbuf );
104+ if (ret >= 0 )
105+ is_cloned = (statbuf .st_nlink == 0 );
106+ #endif
107+ close (fd );
108+ return is_cloned ;
68109}
69110
70111/*
@@ -77,38 +118,32 @@ static char *read_file(char *path, size_t *length)
77118 char buf [4096 ], * copy = NULL ;
78119
79120 if (!length )
80- goto err ;
81- * length = 0 ;
121+ return NULL ;
82122
83- fd = open (path , O_RDONLY | O_CLOEXEC );
123+ fd = open (path , O_RDONLY | O_CLOEXEC );
84124 if (fd < 0 )
85- goto err_free ;
125+ return NULL ;
86126
127+ * length = 0 ;
87128 for (;;) {
88129 int n ;
89- char * old = copy ;
90130
91131 n = read (fd , buf , sizeof (buf ));
92132 if (n < 0 )
93- goto err_fd ;
133+ goto error ;
94134 if (!n )
95135 break ;
96136
97- do {
98- copy = realloc (old , (* length + n ) * sizeof (* old ));
99- } while (!copy );
100-
137+ copy = must_realloc (copy , (* length + n ) * sizeof (* copy ));
101138 memcpy (copy + * length , buf , n );
102139 * length += n ;
103140 }
104141 close (fd );
105142 return copy ;
106143
107- err_fd :
144+ error :
108145 close (fd );
109- err_free :
110146 free (copy );
111- err :
112147 return NULL ;
113148}
114149
@@ -122,21 +157,12 @@ static int parse_xargs(char *data, int data_length, char ***output)
122157 int num = 0 ;
123158 char * cur = data ;
124159
125- if (!data || * output )
160+ if (!data || * output != NULL )
126161 return -1 ;
127162
128- do {
129- * output = malloc (sizeof (* * output ));
130- } while (!* output );
131-
132163 while (cur < data + data_length ) {
133- char * * old = * output ;
134-
135164 num ++ ;
136- do {
137- * output = realloc (old , (num + 1 ) * sizeof (* old ));
138- } while (!* output );
139-
165+ * output = must_realloc (* output , (num + 1 ) * sizeof (* * output ));
140166 (* output )[num - 1 ] = cur ;
141167 cur += strlen (cur ) + 1 ;
142168 }
@@ -151,67 +177,75 @@ static int parse_xargs(char *data, int data_length, char ***output)
151177 */
152178static int fetchve (char * * * argv , char * * * envp )
153179{
154- char * cmdline , * environ ;
180+ char * cmdline = NULL , * environ = NULL ;
155181 size_t cmdline_size , environ_size ;
156182
157183 cmdline = read_file ("/proc/self/cmdline" , & cmdline_size );
158184 if (!cmdline )
159- goto err ;
185+ goto error ;
160186 environ = read_file ("/proc/self/environ" , & environ_size );
161187 if (!environ )
162- goto err_free ;
188+ goto error ;
163189
164190 if (parse_xargs (cmdline , cmdline_size , argv ) <= 0 )
165- goto err_free_both ;
191+ goto error ;
166192 if (parse_xargs (environ , environ_size , envp ) <= 0 )
167- goto err_free_both ;
193+ goto error ;
168194
169195 return 0 ;
170196
171- err_free_both :
197+ error :
172198 free (environ );
173- err_free :
174199 free (cmdline );
175- err :
176- return -1 ;
200+ return - EINVAL ;
177201}
178202
179203static int clone_binary (void )
180204{
181- int binfd , memfd , err ;
205+ int binfd , memfd ;
182206 ssize_t sent = 0 ;
183- struct stat statbuf = {0 };
184-
185- binfd = open ("/proc/self/exe" , O_RDONLY |O_CLOEXEC );
186- if (binfd < 0 )
187- goto err ;
188- if (fstat (binfd , & statbuf ) < 0 )
189- goto err_binfd ;
190207
191- memfd = memfd_create (MEMFD_COMMENT , MFD_CLOEXEC |MFD_ALLOW_SEALING );
208+ #ifdef HAVE_MEMFD_CREATE
209+ memfd = memfd_create (RUNC_MEMFD_COMMENT , MFD_CLOEXEC | MFD_ALLOW_SEALING );
210+ #else
211+ memfd = open ("/tmp" , O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC , 0711 );
212+ #endif
192213 if (memfd < 0 )
193- goto err_binfd ;
214+ return - ENOTRECOVERABLE ;
194215
195- while (sent < statbuf .st_size ) {
196- ssize_t n = sendfile (memfd , binfd , NULL , statbuf .st_size - sent );
197- if (n < 0 )
198- goto err_memfd ;
199- sent += n ;
200- }
216+ binfd = open ("/proc/self/exe" , O_RDONLY | O_CLOEXEC );
217+ if (binfd < 0 )
218+ goto error ;
201219
202- err = fcntl (memfd , F_ADD_SEALS , F_SEAL_SHRINK |F_SEAL_GROW |F_SEAL_WRITE |F_SEAL_SEAL );
220+ sent = sendfile (memfd , binfd , NULL , RUNC_SENDFILE_MAX );
221+ close (binfd );
222+ if (sent < 0 )
223+ goto error ;
224+
225+ #ifdef HAVE_MEMFD_CREATE
226+ int err = fcntl (memfd , F_ADD_SEALS , RUNC_MEMFD_SEALS );
203227 if (err < 0 )
204- goto err_memfd ;
228+ goto error ;
229+ #else
230+ /* Need to re-open "memfd" as read-only to avoid execve(2) giving -EXTBUSY. */
231+ int newfd ;
232+ char * fdpath = NULL ;
233+
234+ if (asprintf (& fdpath , "/proc/self/fd/%d" , memfd ) < 0 )
235+ goto error ;
236+ newfd = open (fdpath , O_RDONLY | O_CLOEXEC );
237+ free (fdpath );
238+ if (newfd < 0 )
239+ goto error ;
205240
206- close (binfd );
241+ close (memfd );
242+ memfd = newfd ;
243+ #endif
207244 return memfd ;
208245
209- err_memfd :
246+ error :
210247 close (memfd );
211- err_binfd :
212- close (binfd );
213- err :
214- return -1 ;
248+ return - EIO ;
215249}
216250
217251int ensure_cloned_binary (void )
@@ -221,16 +255,16 @@ int ensure_cloned_binary(void)
221255
222256 /* Check that we're not self-cloned, and if we are then bail. */
223257 int cloned = is_self_cloned ();
224- if (cloned != 0 )
258+ if (cloned > 0 || cloned == - ENOTRECOVERABLE )
225259 return cloned ;
226260
227261 if (fetchve (& argv , & envp ) < 0 )
228- return -1 ;
262+ return - EINVAL ;
229263
230264 execfd = clone_binary ();
231265 if (execfd < 0 )
232- return -1 ;
266+ return - EIO ;
233267
234268 fexecve (execfd , argv , envp );
235- return -1 ;
269+ return - ENOEXEC ;
236270}
0 commit comments