Skip to content

Commit 6f6a6fd

Browse files
josephhztytso
authored andcommitted
jbd2: fix ocfs2 corrupt when updating journal superblock fails
If updating journal superblock fails after journal data has been flushed, the error is omitted and this will mislead the caller as a normal case. In ocfs2, the checkpoint will be treated successfully and the other node can get the lock to update. Since the sb_start is still pointing to the old log block, it will rewrite the journal data during journal recovery by the other node. Thus the new updates will be overwritten and ocfs2 corrupts. So in above case we have to return the error, and ocfs2_commit_cache will take care of the error and prevent the other node to do update first. And only after recovering journal it can do the new updates. The issue discussion mail can be found at: https://oss.oracle.com/pipermail/ocfs2-devel/2015-June/010856.html http://comments.gmane.org/gmane.comp.file-systems.ext4/48841 [ Fixed bug in patch which allowed a non-negative error return from jbd2_cleanup_journal_tail() to leak out of jbd2_fjournal_flush(); this was causing xfstests ext4/306 to fail. -- Ted ] Reported-by: Yiwen Jiang <jiangyiwen@huawei.com> Signed-off-by: Joseph Qi <joseph.qi@huawei.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Tested-by: Yiwen Jiang <jiangyiwen@huawei.com> Cc: Junxiao Bi <junxiao.bi@oracle.com> Cc: stable@vger.kernel.org
1 parent 97b4af2 commit 6f6a6fd

File tree

3 files changed

+35
-12
lines changed

3 files changed

+35
-12
lines changed

fs/jbd2/checkpoint.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
390390
unsigned long blocknr;
391391

392392
if (is_journal_aborted(journal))
393-
return 1;
393+
return -EIO;
394394

395395
if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
396396
return 1;
@@ -407,8 +407,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
407407
if (journal->j_flags & JBD2_BARRIER)
408408
blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
409409

410-
__jbd2_update_log_tail(journal, first_tid, blocknr);
411-
return 0;
410+
return __jbd2_update_log_tail(journal, first_tid, blocknr);
412411
}
413412

414413

fs/jbd2/journal.c

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -876,9 +876,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
876876
*
877877
* Requires j_checkpoint_mutex
878878
*/
879-
void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
879+
int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
880880
{
881881
unsigned long freed;
882+
int ret;
882883

883884
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
884885

@@ -888,7 +889,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
888889
* space and if we lose sb update during power failure we'd replay
889890
* old transaction with possibly newly overwritten data.
890891
*/
891-
jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
892+
ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
893+
if (ret)
894+
goto out;
895+
892896
write_lock(&journal->j_state_lock);
893897
freed = block - journal->j_tail;
894898
if (block < journal->j_tail)
@@ -904,6 +908,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
904908
journal->j_tail_sequence = tid;
905909
journal->j_tail = block;
906910
write_unlock(&journal->j_state_lock);
911+
912+
out:
913+
return ret;
907914
}
908915

909916
/*
@@ -1322,7 +1329,7 @@ static int journal_reset(journal_t *journal)
13221329
return jbd2_journal_start_thread(journal);
13231330
}
13241331

1325-
static void jbd2_write_superblock(journal_t *journal, int write_op)
1332+
static int jbd2_write_superblock(journal_t *journal, int write_op)
13261333
{
13271334
struct buffer_head *bh = journal->j_sb_buffer;
13281335
journal_superblock_t *sb = journal->j_superblock;
@@ -1361,7 +1368,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
13611368
printk(KERN_ERR "JBD2: Error %d detected when updating "
13621369
"journal superblock for %s.\n", ret,
13631370
journal->j_devname);
1371+
jbd2_journal_abort(journal, ret);
13641372
}
1373+
1374+
return ret;
13651375
}
13661376

13671377
/**
@@ -1374,10 +1384,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
13741384
* Update a journal's superblock information about log tail and write it to
13751385
* disk, waiting for the IO to complete.
13761386
*/
1377-
void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
1387+
int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
13781388
unsigned long tail_block, int write_op)
13791389
{
13801390
journal_superblock_t *sb = journal->j_superblock;
1391+
int ret;
13811392

13821393
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
13831394
jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
@@ -1386,13 +1397,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
13861397
sb->s_sequence = cpu_to_be32(tail_tid);
13871398
sb->s_start = cpu_to_be32(tail_block);
13881399

1389-
jbd2_write_superblock(journal, write_op);
1400+
ret = jbd2_write_superblock(journal, write_op);
1401+
if (ret)
1402+
goto out;
13901403

13911404
/* Log is no longer empty */
13921405
write_lock(&journal->j_state_lock);
13931406
WARN_ON(!sb->s_sequence);
13941407
journal->j_flags &= ~JBD2_FLUSHED;
13951408
write_unlock(&journal->j_state_lock);
1409+
1410+
out:
1411+
return ret;
13961412
}
13971413

13981414
/**
@@ -1941,7 +1957,14 @@ int jbd2_journal_flush(journal_t *journal)
19411957
return -EIO;
19421958

19431959
mutex_lock(&journal->j_checkpoint_mutex);
1944-
jbd2_cleanup_journal_tail(journal);
1960+
if (!err) {
1961+
err = jbd2_cleanup_journal_tail(journal);
1962+
if (err < 0) {
1963+
mutex_unlock(&journal->j_checkpoint_mutex);
1964+
goto out;
1965+
}
1966+
err = 0;
1967+
}
19451968

19461969
/* Finally, mark the journal as really needing no recovery.
19471970
* This sets s_start==0 in the underlying superblock, which is
@@ -1957,7 +1980,8 @@ int jbd2_journal_flush(journal_t *journal)
19571980
J_ASSERT(journal->j_head == journal->j_tail);
19581981
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
19591982
write_unlock(&journal->j_state_lock);
1960-
return 0;
1983+
out:
1984+
return err;
19611985
}
19621986

19631987
/**

include/linux/jbd2.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
10351035
int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
10361036
int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
10371037
unsigned long *block);
1038-
void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
1038+
int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
10391039
void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
10401040

10411041
/* Commit management */
@@ -1157,7 +1157,7 @@ extern int jbd2_journal_recover (journal_t *journal);
11571157
extern int jbd2_journal_wipe (journal_t *, int);
11581158
extern int jbd2_journal_skip_recovery (journal_t *);
11591159
extern void jbd2_journal_update_sb_errno(journal_t *);
1160-
extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
1160+
extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
11611161
unsigned long, int);
11621162
extern void __jbd2_journal_abort_hard (journal_t *);
11631163
extern void jbd2_journal_abort (journal_t *, int);

0 commit comments

Comments
 (0)