Skip to content

Commit b6f1dd8

Browse files
committed
PGPRO-1248: new status ORPHAN for backup with corrupted parent
1 parent 269812a commit b6f1dd8

File tree

7 files changed

+773
-38
lines changed

7 files changed

+773
-38
lines changed

src/catalog.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,8 @@ readBackupControlFile(const char *path)
556556
backup->status = BACKUP_STATUS_DELETED;
557557
else if (strcmp(status, "DONE") == 0)
558558
backup->status = BACKUP_STATUS_DONE;
559+
else if (strcmp(status, "ORPHAN") == 0)
560+
backup->status = BACKUP_STATUS_ORPHAN;
559561
else if (strcmp(status, "CORRUPT") == 0)
560562
backup->status = BACKUP_STATUS_CORRUPT;
561563
else

src/parsexlog.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,9 @@ validate_backup_wal_from_start_to_stop(pgBackup *backup,
258258
*/
259259
backup->status = BACKUP_STATUS_CORRUPT;
260260
pgBackupWriteBackupControlFile(backup);
261-
elog(ERROR, "there are not enough WAL records to restore from %X/%X to %X/%X",
261+
elog(WARNING, "There are not enough WAL records to consistenly restore "
262+
"backup %s from START LSN: %X/%X to STOP LSN: %X/%X",
263+
base36enc(backup->start_time),
262264
(uint32) (backup->start_lsn >> 32),
263265
(uint32) (backup->start_lsn),
264266
(uint32) (backup->stop_lsn >> 32),
@@ -329,14 +331,19 @@ validate_wal(pgBackup *backup,
329331

330332
free(backup_id);
331333

334+
if (backup->status == BACKUP_STATUS_CORRUPT)
335+
{
336+
elog(WARNING, "Backup %s WAL segments are corrupted", base36enc(backup->start_time));
337+
return;
338+
}
332339
/*
333340
* If recovery target is provided check that we can restore backup to a
334-
* recoverty target time or xid.
341+
* recovery target time or xid.
335342
*/
336343
if (!TransactionIdIsValid(target_xid) && target_time == 0)
337344
{
338-
/* Recoverty target is not given so exit */
339-
elog(INFO, "backup validation completed successfully");
345+
/* Recovery target is not given so exit */
346+
elog(INFO, "Backup %s WAL segments are valid", base36enc(backup->start_time));
340347
return;
341348
}
342349

src/pg_probackup.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ typedef enum BackupStatus
118118
BACKUP_STATUS_DELETING, /* data files are being deleted */
119119
BACKUP_STATUS_DELETED, /* data files have been deleted */
120120
BACKUP_STATUS_DONE, /* completed but not validated yet */
121+
BACKUP_STATUS_ORPHAN, /* backup validity is unknown but at least one parent backup is corrupted */
121122
BACKUP_STATUS_CORRUPT /* files are corrupted, not available */
122123
} BackupStatus;
123124

src/restore.c

Lines changed: 57 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,10 @@ do_restore_or_validate(time_t target_backup_id,
9090
pgBackup *current_backup = NULL;
9191
pgBackup *dest_backup = NULL;
9292
pgBackup *base_full_backup = NULL;
93+
pgBackup *corrupted_backup = NULL;
9394
int dest_backup_index = 0;
9495
int base_full_backup_index = 0;
96+
int corrupted_backup_index = 0;
9597
char *action = is_restore ? "Restore":"Validate";
9698

9799
if (is_restore)
@@ -205,7 +207,7 @@ do_restore_or_validate(time_t target_backup_id,
205207
}
206208
}
207209
else
208-
/* Skip differential backups are ok */
210+
/* It`s ok to skip incremental backup */
209211
continue;
210212
}
211213
}
@@ -220,36 +222,77 @@ do_restore_or_validate(time_t target_backup_id,
220222
if (is_restore)
221223
check_tablespace_mapping(dest_backup);
222224

225+
if (dest_backup->backup_mode != BACKUP_MODE_FULL)
226+
elog(INFO, "Validating parents for backup %s", base36enc(dest_backup->start_time));
227+
223228
/*
224229
* Validate backups from base_full_backup to dest_backup.
225230
*/
226231
for (i = base_full_backup_index; i >= dest_backup_index; i--)
227232
{
228233
pgBackup *backup = (pgBackup *) parray_get(backups, i);
229234
pgBackupValidate(backup);
235+
if (backup->status == BACKUP_STATUS_CORRUPT)
236+
{
237+
corrupted_backup = backup;
238+
corrupted_backup_index = i;
239+
break;
240+
}
241+
}
242+
/* There is no point in wal validation
243+
* if there is corrupted backup between base_backup and dest_backup
244+
*/
245+
if (!corrupted_backup)
246+
/*
247+
* Validate corresponding WAL files.
248+
* We pass base_full_backup timeline as last argument to this function,
249+
* because it's needed to form the name of xlog file.
250+
*/
251+
validate_wal(dest_backup, arclog_path, rt->recovery_target_time,
252+
rt->recovery_target_xid, base_full_backup->tli);
253+
254+
/* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */
255+
if (corrupted_backup)
256+
{
257+
for (i = corrupted_backup_index - 1; i >= 0; i--)
258+
{
259+
pgBackup *backup = (pgBackup *) parray_get(backups, i);
260+
/* Mark incremental OK backup as orphan */
261+
if (backup->backup_mode == BACKUP_MODE_FULL)
262+
break;
263+
if (backup->status != BACKUP_STATUS_OK)
264+
continue;
265+
else
266+
{
267+
backup->status = BACKUP_STATUS_ORPHAN;
268+
pgBackupWriteBackupControlFile(backup);
269+
elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted",
270+
base36enc(backup->start_time), base36enc(corrupted_backup->start_time));
271+
}
272+
}
230273
}
231274

232275
/*
233-
* Validate corresponding WAL files.
234-
* We pass base_full_backup timeline as last argument to this function,
235-
* because it's needed to form the name of xlog file.
276+
* If dest backup is corrupted or was orphaned in previous check
277+
* produce corresponding error message
236278
*/
237-
validate_wal(dest_backup, arclog_path, rt->recovery_target_time,
238-
rt->recovery_target_xid, base_full_backup->tli);
239-
279+
if (dest_backup->status == BACKUP_STATUS_OK)
280+
elog(INFO, "Backup %s is valid.", base36enc(dest_backup->start_time));
281+
else if (dest_backup->status == BACKUP_STATUS_CORRUPT)
282+
elog(ERROR, "Backup %s is corrupt.", base36enc(dest_backup->start_time));
283+
else if (dest_backup->status == BACKUP_STATUS_ORPHAN)
284+
elog(ERROR, "Backup %s is orphan.", base36enc(dest_backup->start_time));
285+
else
286+
elog(ERROR, "Backup %s has status: %s",
287+
base36enc(dest_backup->start_time), status2str(dest_backup->status));
240288

241289
/* We ensured that all backups are valid, now restore if required */
242290
if (is_restore)
243291
{
244-
pgBackup *backup;
245292
for (i = base_full_backup_index; i >= dest_backup_index; i--)
246293
{
247-
backup = (pgBackup *) parray_get(backups, i);
248-
if (backup->status == BACKUP_STATUS_OK)
249-
restore_backup(backup);
250-
else
251-
elog(ERROR, "backup %s is not valid",
252-
base36enc(backup->start_time));
294+
pgBackup *backup = (pgBackup *) parray_get(backups, i);
295+
restore_backup(backup);
253296
}
254297

255298
/*

src/util.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,9 @@ status2str(BackupStatus status)
230230
"DELETING",
231231
"DELETED",
232232
"DONE",
233+
"ORPHAN",
233234
"CORRUPT"
234235
};
235-
236236
if (status < BACKUP_STATUS_INVALID || BACKUP_STATUS_CORRUPT < status)
237237
return "UNKNOWN";
238238

src/validate.c

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,9 @@ pgBackupValidate(pgBackup *backup)
9797
pgBackupWriteBackupControlFile(backup);
9898

9999
if (corrupted)
100-
elog(WARNING, "Backup %s is corrupted", backup_id_string);
100+
elog(WARNING, "Backup %s data files are corrupted", backup_id_string);
101101
else
102-
elog(INFO, "Backup %s is valid", backup_id_string);
102+
elog(INFO, "Backup %s data files are valid", backup_id_string);
103103
free(backup_id_string);
104104
}
105105

@@ -269,8 +269,6 @@ do_validate_instance(void)
269269
current_backup = (pgBackup *) parray_get(backups, i);
270270
backup_id = base36enc(current_backup->start_time);
271271

272-
elog(INFO, "Validate backup %s", backup_id);
273-
274272
if (current_backup->backup_mode != BACKUP_MODE_FULL)
275273
{
276274
int j;
@@ -301,9 +299,27 @@ do_validate_instance(void)
301299
validate_wal(current_backup, arclog_path, 0,
302300
0, base_full_backup->tli);
303301
}
304-
302+
/* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */
305303
if (current_backup->status != BACKUP_STATUS_OK)
304+
{
305+
int j;
306306
corrupted_backup_found = true;
307+
for (j = i - 1; j >= 0; j--)
308+
{
309+
pgBackup *backup = (pgBackup *) parray_get(backups, j);
310+
if (backup->backup_mode == BACKUP_MODE_FULL)
311+
break;
312+
if (backup->status != BACKUP_STATUS_OK)
313+
continue;
314+
else
315+
{
316+
backup->status = BACKUP_STATUS_ORPHAN;
317+
pgBackupWriteBackupControlFile(backup);
318+
elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted",
319+
base36enc(backup->start_time), base36enc(current_backup->start_time));
320+
}
321+
}
322+
}
307323

308324
free(backup_id);
309325
}

0 commit comments

Comments
 (0)