Skip to content

Commit 818ef36

Browse files
committed
grab_excl_lock_file: don't use fio_*
This function checks for concurrent locker by "kill" command, which is strictly local. There is no way to make it remote reliably. More over, we have to write pid of remote agent. So, if for whatever reason we will need to lock backup on remote host, we'd better call this function from agent. And, it will be better to use fcntl(F_SETLK) on Unix and LockFileEx on Windows. But lets leave it for future.
1 parent 5f419c3 commit 818ef36

File tree

1 file changed

+38
-54
lines changed

1 file changed

+38
-54
lines changed

src/catalog.c

Lines changed: 38 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -298,12 +298,17 @@ int
298298
grab_excl_lock_file(const char *root_dir, const char *backup_id, bool strict)
299299
{
300300
char lock_file[MAXPGPATH];
301-
int fd = 0;
301+
FILE *fp = NULL;
302302
char buffer[256];
303303
int ntries = LOCK_TIMEOUT;
304304
int empty_tries = LOCK_STALE_TIMEOUT;
305-
int len;
306-
int encoded_pid;
305+
size_t len;
306+
pid_t encoded_pid;
307+
int save_errno = 0;
308+
enum {
309+
GELF_FAILED_WRITE = 1,
310+
GELF_FAILED_CLOSE = 2,
311+
} failed_action = 0;
307312

308313
join_path_components(lock_file, root_dir, BACKUP_LOCK_FILE);
309314

@@ -314,19 +319,17 @@ grab_excl_lock_file(const char *root_dir, const char *backup_id, bool strict)
314319
*/
315320
do
316321
{
317-
FILE *fp_out = NULL;
318-
319322
if (interrupted)
320323
elog(ERROR, "Interrupted while locking backup %s", backup_id);
321324

322325
/*
323-
* Try to create the lock file --- O_EXCL makes this atomic.
326+
* Try to create the lock file --- "wx" makes this atomic.
324327
*
325328
* Think not to make the file protection weaker than 0600. See
326329
* comments below.
327330
*/
328-
fd = fio_open(FIO_BACKUP_HOST, lock_file, O_RDWR | O_CREAT | O_EXCL);
329-
if (fd >= 0)
331+
fp = fopen(lock_file, "wx");
332+
if (fp != NULL)
330333
break; /* Success; exit the retry loop */
331334

332335
/* read-only fs is a special case */
@@ -342,7 +345,6 @@ grab_excl_lock_file(const char *root_dir, const char *backup_id, bool strict)
342345
* If file already exists or we have some permission problem (???),
343346
* then retry;
344347
*/
345-
// if ((errno != EEXIST && errno != EACCES))
346348
if (errno != EEXIST)
347349
elog(ERROR, "Could not create lock file \"%s\": %s",
348350
lock_file, strerror(errno));
@@ -352,18 +354,19 @@ grab_excl_lock_file(const char *root_dir, const char *backup_id, bool strict)
352354
* here: file might have been deleted since we tried to create it.
353355
*/
354356

355-
fp_out = fopen(lock_file, "r");
356-
if (fp_out == NULL)
357+
fp = fopen(lock_file, "r");
358+
if (fp == NULL)
357359
{
358360
if (errno == ENOENT)
359361
continue; /* race condition; try again */
360362
elog(ERROR, "Cannot open lock file \"%s\": %s", lock_file, strerror(errno));
361363
}
362364

363-
len = fread(buffer, 1, sizeof(buffer) - 1, fp_out);
364-
if (ferror(fp_out))
365+
len = fread(buffer, 1, sizeof(buffer) - 1, fp);
366+
if (ferror(fp))
365367
elog(ERROR, "Cannot read from lock file: \"%s\"", lock_file);
366-
fclose(fp_out);
368+
fclose(fp);
369+
fp = NULL;
367370

368371
/*
369372
* There are several possible reasons for lock file
@@ -400,7 +403,7 @@ grab_excl_lock_file(const char *root_dir, const char *backup_id, bool strict)
400403
continue;
401404
}
402405

403-
encoded_pid = atoi(buffer);
406+
encoded_pid = (pid_t)atoll(buffer);
404407

405408
if (encoded_pid <= 0)
406409
{
@@ -450,7 +453,7 @@ grab_excl_lock_file(const char *root_dir, const char *backup_id, bool strict)
450453
* it. Need a loop because of possible race condition against other
451454
* would-be creators.
452455
*/
453-
if (fio_remove(FIO_BACKUP_HOST, lock_file, false) < 0)
456+
if (remove(lock_file) < 0)
454457
{
455458
if (errno == ENOENT)
456459
continue; /* race condition, again */
@@ -461,40 +464,32 @@ grab_excl_lock_file(const char *root_dir, const char *backup_id, bool strict)
461464
} while (ntries--);
462465

463466
/* Failed to acquire exclusive lock in time */
464-
if (fd <= 0)
467+
if (fp == NULL)
465468
return LOCK_FAIL_TIMEOUT;
466469

467470
/*
468471
* Successfully created the file, now fill it.
469472
*/
470-
snprintf(buffer, sizeof(buffer), "%lld\n", (long long)my_pid);
471-
472473
errno = 0;
473-
if (fio_write(fd, buffer, strlen(buffer)) != strlen(buffer))
474-
{
475-
int save_errno = errno;
474+
fprintf(fp, "%lld\n", (long long)my_pid);
475+
fflush(fp);
476476

477-
fio_close(fd);
478-
if (fio_remove(FIO_BACKUP_HOST, lock_file, false) != 0)
479-
elog(WARNING, "Cannot remove lock file \"%s\": %s", lock_file, strerror(errno));
480-
481-
/* In lax mode if we failed to grab lock because of 'out of space error',
482-
* then treat backup as locked.
483-
* Only delete command should be run in lax mode.
484-
*/
485-
if (!strict && save_errno == ENOSPC)
486-
return LOCK_FAIL_ENOSPC;
487-
else
488-
elog(ERROR, "Could not write lock file \"%s\": %s",
489-
lock_file, strerror(save_errno));
477+
if (ferror(fp))
478+
{
479+
failed_action = GELF_FAILED_WRITE;
480+
save_errno = errno;
481+
clearerr(fp);
490482
}
491483

492-
if (fio_flush(fd) != 0)
484+
if (fclose(fp) && save_errno == 0)
493485
{
494-
int save_errno = errno;
486+
failed_action = GELF_FAILED_CLOSE;
487+
save_errno = errno;
488+
}
495489

496-
fio_close(fd);
497-
if (fio_remove(FIO_BACKUP_HOST, lock_file, false) != 0)
490+
if (save_errno)
491+
{
492+
if (remove(lock_file) != 0)
498493
elog(WARNING, "Cannot remove lock file \"%s\": %s", lock_file, strerror(errno));
499494

500495
/* In lax mode if we failed to grab lock because of 'out of space error',
@@ -503,21 +498,10 @@ grab_excl_lock_file(const char *root_dir, const char *backup_id, bool strict)
503498
*/
504499
if (!strict && save_errno == ENOSPC)
505500
return LOCK_FAIL_ENOSPC;
506-
else
507-
elog(ERROR, "Could not flush lock file \"%s\": %s",
508-
lock_file, strerror(save_errno));
509-
}
510-
511-
if (fio_close(fd) != 0)
512-
{
513-
int save_errno = errno;
514-
515-
if (fio_remove(FIO_BACKUP_HOST, lock_file, false) != 0)
516-
elog(WARNING, "Cannot remove lock file \"%s\": %s", lock_file, strerror(errno));
517-
518-
if (!strict && save_errno == ENOSPC)
519-
return LOCK_FAIL_ENOSPC;
520-
else
501+
else if (failed_action == GELF_FAILED_WRITE)
502+
elog(ERROR, "Could not write lock file \"%s\": %s",
503+
lock_file, strerror(save_errno));
504+
else if (failed_action == GELF_FAILED_CLOSE)
521505
elog(ERROR, "Could not close lock file \"%s\": %s",
522506
lock_file, strerror(save_errno));
523507
}

0 commit comments

Comments
 (0)