@@ -70,85 +70,65 @@ std::string try_parse_int_token(std::string_view s, I *i)
7070 return {};
7171}
7272
73- // Create event ring files with rw-rw-r--
74- constexpr mode_t CreateMode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH;
75-
76- int claim_event_ring_file (
77- int const dir_fd, char const *const file_name, char const *const full_path,
78- int *const ring_fd)
73+ int claim_event_ring_file (fs::path const &ring_path)
7974{
80- *ring_fd = openat (dir_fd, file_name, O_RDONLY | O_CREAT, CreateMode);
81- if (*ring_fd == -1 ) {
82- int const rc = errno;
83- LOG_ERROR (
84- " openat failed for event ring file `{}`: {} [{}]" ,
85- full_path,
86- strerror (rc),
87- rc);
88- return rc;
75+ int ring_fd [[gnu::cleanup (cleanup_close)]] =
76+ open (ring_path.c_str (), O_RDONLY);
77+ if (ring_fd == -1 ) {
78+ // Inability to open is normal: it means there's no zombie to clean up
79+ return 0 ;
8980 }
90- if (flock (* ring_fd, LOCK_EX | LOCK_NB) == -1 ) {
81+ if (flock (ring_fd, LOCK_EX | LOCK_NB) == -1 ) {
9182 int const saved_errno = errno;
9283 if (saved_errno == EWOULDBLOCK) {
9384 pid_t owner_pid = 0 ;
9485 size_t owner_pid_size = 1 ;
9586
9687 // Another process has an exclusive lock; find out who it is
9788 (void )monad_event_ring_find_writer_pids (
98- * ring_fd, &owner_pid, &owner_pid_size);
89+ ring_fd, &owner_pid, &owner_pid_size);
9990 if (owner_pid == 0 ) {
10091 LOG_ERROR (
10192 " event ring file `{}` is owned by an unknown other process" ,
102- full_path );
93+ ring_path. c_str () );
10394 }
10495 else {
10596 LOG_ERROR (
10697 " event ring file `{}` is owned by pid {}" ,
107- full_path ,
98+ ring_path. c_str () ,
10899 owner_pid);
109100 }
110- return saved_errno ;
101+ return EWOULDBLOCK ;
111102 }
112103 LOG_ERROR (
113104 " flock on event ring file `{}` failed: {} ({})" ,
114- full_path,
115- strerror (saved_errno),
116- saved_errno);
117- return saved_errno;
118- }
119- // Note: truncate(2) not ftruncate(2), because we deliberately opened the
120- // fd with O_RDONLY; even though we're going to destroy this file soon
121- // anyway, we explicitly truncate to zero so that space-constrained
122- // filesystems like hugetlbfs can drop the committed pages if they're not
123- // mapped anywhere. We will initialize the replacement for this file before
124- // destroying it, so for a moment both will exist.
125- if (truncate (full_path, 0 ) == -1 ) {
126- int const saved_errno = errno;
127- LOG_ERROR (
128- " truncate to zero failed for event ring file `{}` ({})" ,
129- full_path,
105+ ring_path.c_str (),
130106 strerror (saved_errno),
131107 saved_errno);
132108 return saved_errno;
133109 }
110+ (void )unlink (ring_path.c_str ()); // what we now own is a zombie; destroy it
134111 return 0 ;
135112}
136113
137114int allocate_event_ring_file (
138- monad_event_ring_simple_config const *const simple_cfg, int const dir_fd,
139- char const *const file_name, char const *const full_path,
140- int *const init_ring_fd)
115+ monad_event_ring_simple_config const &simple_cfg, fs::path const &init_path,
116+ fs::path const &ring_path, int *const init_ring_fd)
141117{
118+ // Create event ring files with rw-rw-r--
119+ constexpr mode_t CreateMode =
120+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH;
121+
142122 *init_ring_fd =
143- openat (dir_fd, file_name , O_RDWR | O_CREAT | O_EXCL, CreateMode);
123+ open (init_path. c_str () , O_RDWR | O_CREAT | O_EXCL, CreateMode);
144124 if (*init_ring_fd == -1 ) {
145125 int const rc = errno;
146126 LOG_ERROR (
147127 " could not create event ring temporary initialization file `{}` "
148128 " (for {}): "
149129 " {} [{}]" ,
150- file_name ,
151- full_path ,
130+ init_path. c_str () ,
131+ ring_path. c_str () ,
152132 strerror (rc),
153133 rc);
154134 return rc;
@@ -158,14 +138,14 @@ int allocate_event_ring_file(
158138 LOG_ERROR (
159139 " flock on event ring file temporary initialization file `{}` (for "
160140 " {}) failed: {} ({})" ,
161- file_name ,
162- full_path ,
141+ init_path. c_str () ,
142+ ring_path. c_str () ,
163143 strerror (saved_errno),
164144 saved_errno);
165145 return saved_errno;
166146 }
167147 if (int const rc = monad_event_ring_init_simple (
168- simple_cfg, *init_ring_fd, 0 , full_path )) {
148+ & simple_cfg, *init_ring_fd, 0 , init_path. c_str () )) {
169149 LOG_ERROR (
170150 " event library error -- {}" , monad_event_ring_get_last_error ());
171151 return rc;
@@ -180,96 +160,69 @@ int allocate_event_ring_file(
180160// which gives confusing errors.
181161//
182162// This will create a new locked file that is fully initialized, and then
183- // atomically replaces the original file using Linux's renameat2(2)
184- // RENAME_EXCHANGE feature, which can atomically swap two paths .
163+ // rename it to the correct name using Linux's renameat2(2) RENAME_NOREPLACE
164+ // feature.
185165//
186- // 1. First we take possession of the file's name (on an advisory basis using
187- // flock(2)) via the helper function `claim_event_ring_file`. That function
188- // opens the file with O_RDONLY, to avoid triggering anyone watching with
189- // `monad_event_ring_find_writer_pids` (the file will still appear to be
190- // a zombie). It places a LOCK_EX flock(2) to claim ownership of the file
191- // initialization process, so that the rest of the steps can deal with
192- // another daemon racing against us. Note that we do this _even though_
193- // we're only open with O_RDONLY, which Linux allows .
166+ // 1. First we try to take possession of the file's name (on an advisory
167+ // basis using flock(2)) via the helper function `claim_event_ring_file`.
168+ // That function opens the file with O_RDONLY, to avoid triggering anyone
169+ // watching with `monad_event_ring_find_writer_pids`. It places a LOCK_EX
170+ // flock(2) to claim ownership of the file initialization process, and
171+ // returns EWOULDBLOCK if it appears another process owns the file. If we
172+ // claim the file, then (1) it existed and (2) was un-owned and was
173+ // therefore a zombie from a crashed process. We destroy it .
194174//
195175// 2. Next, we use the helper function `allocate_event_ring_file` to create
196176// the real file (called the "init" file) with the temporary file name
197177// `<file-name>.<our-pid>`; when this returns successfully, the file is
198178// advisory-locked and initialized
199179//
200- // 3. Finally, we atomically exchange the two filenames in the filesystem,
201- // then delete the "init" file's name (which now refers to the truncated
202- // "name-reservation" file)
203- //
204- // These functions use dir_fd relative functions like openat(2), linkat(2),
205- // etc., because renameat2(2) is the only syscall that can use RENAME_EXCHANGE
180+ // 3. Finally, we rename the init file to its correct filename
206181int create_owned_event_ring (
207182 fs::path const &ring_file_path,
208- monad_event_ring_simple_config const * simple_cfg, int *ring_fd)
183+ monad_event_ring_simple_config const & simple_cfg, int *ring_fd)
209184{
210- std::string const file_name = ring_file_path.filename ().string ();
211- std::string const init_file_name =
212- std::format (" {}.{}" , file_name, getpid ());
213-
214- int dir_fd [[gnu::cleanup (cleanup_close)]] =
215- ring_file_path.has_parent_path ()
216- ? open (ring_file_path.parent_path ().c_str (), O_DIRECTORY | O_PATH)
217- : AT_FDCWD;
218- if (dir_fd == -1 ) {
219- int const rc = errno;
220- LOG_ERROR (
221- " open of event ring file parent directory {} failed" ,
222- ring_file_path.parent_path ().c_str ());
223- return rc;
224- }
225- if (int const rc = claim_event_ring_file (
226- dir_fd, file_name.c_str (), ring_file_path.c_str (), ring_fd)) {
185+ fs::path init_file_path = ring_file_path;
186+ init_file_path.replace_filename (
187+ std::format (" {}.{}" , ring_file_path.filename ().c_str (), getpid ()));
188+
189+ if (int const rc = claim_event_ring_file (ring_file_path)) {
227190 return rc;
228191 }
229192
230- int init_ring_fd [[gnu::cleanup (cleanup_close)]] = -1 ;
231193 if (int const rc = allocate_event_ring_file (
232- simple_cfg,
233- dir_fd,
234- init_file_name.c_str (),
235- ring_file_path.c_str (),
236- &init_ring_fd)) {
237- (void )unlinkat (dir_fd, file_name.c_str (), 0 );
238- (void )unlinkat (dir_fd, init_file_name.c_str (), 0 );
194+ simple_cfg, init_file_path, ring_file_path, ring_fd)) {
195+ (void )unlink (init_file_path.c_str ());
239196 return rc;
240197 }
241198
242199 if (renameat2 (
243- dir_fd ,
244- init_file_name .c_str (),
245- dir_fd ,
246- file_name .c_str (),
247- RENAME_EXCHANGE ) == -1 ) {
200+ AT_FDCWD ,
201+ init_file_path .c_str (),
202+ AT_FDCWD ,
203+ ring_file_path .c_str (),
204+ RENAME_NOREPLACE ) == -1 ) {
248205 int const rc = errno;
249- (void )unlinkat (dir_fd, file_name.c_str (), 0 );
250- (void )unlinkat (dir_fd, init_file_name.c_str (), 0 );
206+ (void )unlink (init_file_path.c_str ());
251207 LOG_ERROR (
252- " atomic exchange of {}/{} -> {}/{} failed: {} [{}]" ,
253- ring_file_path.parent_path ().c_str (),
254- init_file_name,
255- ring_file_path.parent_path ().c_str (),
256- file_name,
208+ " rename of {} -> {} failed: {} [{}]" ,
209+ init_file_path.c_str (),
210+ ring_file_path.c_str (),
257211 strerror (rc),
258212 rc);
259213 return rc;
260214 }
261- (void )unlinkat (dir_fd, init_file_name.c_str (), 0 );
262- std::swap (*ring_fd, init_ring_fd);
215+
263216 return 0 ;
264217}
265218
266219// Call create_owned_event_ring, but with SIGTERM and SIGINT blocked while it
267- // runs so we don 't have any junk files lying around; those signals will be
268- // unblocked again (if they were before) to receive any pending signals prior
269- // to returning
220+ // runs so we can 't be killed, which would leave junk files lying around; those
221+ // signals will be unblocked again (if they were before) to receive any pending
222+ // signals prior to returning
270223int create_owned_event_ring_nointr (
271224 fs::path const &ring_file_path,
272- monad_event_ring_simple_config const * simple_cfg, int *ring_fd)
225+ monad_event_ring_simple_config const & simple_cfg, int *ring_fd)
273226{
274227 sigset_t to_block;
275228 sigset_t old_mask;
@@ -381,7 +334,7 @@ int init_execution_event_recorder(EventRingConfig ring_config)
381334
382335 int ring_fd [[gnu::cleanup (cleanup_close)]] = -1 ;
383336 if (int const rc = create_owned_event_ring_nointr (
384- ring_config.event_ring_spec , & simple_cfg, &ring_fd)) {
337+ ring_config.event_ring_spec , simple_cfg, &ring_fd)) {
385338 return rc;
386339 }
387340
0 commit comments