Skip to content

Commit ec1f3a2

Browse files
adam900710kdave
authored andcommitted
btrfs: scrub: update device stats when an error is detected
[BUG] Since the migration to the new scrub_stripe interface, scrub no longer updates the device stats when hitting an error, no matter if it's a read or checksum mismatch error. E.g: BTRFS info (device dm-2): scrub: started on devid 1 BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488 BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file) BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488 BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file) BTRFS info (device dm-2): scrub: finished on devid 1 with status: 0 Note there is no line showing the device stats error update. [CAUSE] In the migration to the new scrub_stripe interface, we no longer call btrfs_dev_stat_inc_and_print(). [FIX] - Introduce a new bitmap for metadata generation errors * A new bitmap @meta_gen_error_bitmap is introduced to record which blocks have metadata generation mismatch errors. * A new counter for that bitmap @init_nr_meta_gen_errors, is also introduced to store the number of generation mismatch errors that are found during the initial read. This is for the error reporting at scrub_stripe_report_errors(). * New dedicated error message for unrepaired generation mismatches * Update @meta_gen_error_bitmap if a transid mismatch is hit - Add btrfs_dev_stat_inc_and_print() calls to the following call sites * scrub_stripe_report_errors() * scrub_write_endio() This is only for the write errors. This means there is a minor behavior change: - The timing of device stats error message Since we concentrate the error messages at scrub_stripe_report_errors(), the device stats error messages will all show up in one go, after the detailed scrub error messages: BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488 BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file) BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488 BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file) BTRFS error (device dm-2): bdev /dev/mapper/test-scratch1 errs: wr 0, rd 0, flush 0, corrupt 1, gen 0 BTRFS error (device dm-2): bdev /dev/mapper/test-scratch1 errs: wr 0, rd 0, flush 0, corrupt 2, gen 0 Fixes: e02ee89 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure") Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 45a5951 commit ec1f3a2

File tree

1 file changed

+29
-3
lines changed

1 file changed

+29
-3
lines changed

fs/btrfs/scrub.c

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,19 +153,22 @@ struct scrub_stripe {
153153
unsigned int init_nr_io_errors;
154154
unsigned int init_nr_csum_errors;
155155
unsigned int init_nr_meta_errors;
156+
unsigned int init_nr_meta_gen_errors;
156157

157158
/*
158159
* The following error bitmaps are all for the current status.
159160
* Every time we submit a new read, these bitmaps may be updated.
160161
*
161-
* error_bitmap = io_error_bitmap | csum_error_bitmap | meta_error_bitmap;
162+
* error_bitmap = io_error_bitmap | csum_error_bitmap |
163+
* meta_error_bitmap | meta_generation_bitmap;
162164
*
163165
* IO and csum errors can happen for both metadata and data.
164166
*/
165167
unsigned long error_bitmap;
166168
unsigned long io_error_bitmap;
167169
unsigned long csum_error_bitmap;
168170
unsigned long meta_error_bitmap;
171+
unsigned long meta_gen_error_bitmap;
169172

170173
/* For writeback (repair or replace) error reporting. */
171174
unsigned long write_error_bitmap;
@@ -662,7 +665,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
662665
}
663666
if (stripe->sectors[sector_nr].generation !=
664667
btrfs_stack_header_generation(header)) {
665-
bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
668+
bitmap_set(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree);
666669
bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
667670
btrfs_warn_rl(fs_info,
668671
"tree block %llu mirror %u has bad generation, has %llu want %llu",
@@ -674,6 +677,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
674677
bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree);
675678
bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
676679
bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
680+
bitmap_clear(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree);
677681
}
678682

679683
static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
@@ -971,8 +975,22 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
971975
if (__ratelimit(&rs) && dev)
972976
scrub_print_common_warning("header error", dev, false,
973977
stripe->logical, physical);
978+
if (test_bit(sector_nr, &stripe->meta_gen_error_bitmap))
979+
if (__ratelimit(&rs) && dev)
980+
scrub_print_common_warning("generation error", dev, false,
981+
stripe->logical, physical);
974982
}
975983

984+
/* Update the device stats. */
985+
for (int i = 0; i < stripe->init_nr_io_errors; i++)
986+
btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_READ_ERRS);
987+
for (int i = 0; i < stripe->init_nr_csum_errors; i++)
988+
btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
989+
/* Generation mismatch error is based on each metadata, not each block. */
990+
for (int i = 0; i < stripe->init_nr_meta_gen_errors;
991+
i += (fs_info->nodesize >> fs_info->sectorsize_bits))
992+
btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_GENERATION_ERRS);
993+
976994
spin_lock(&sctx->stat_lock);
977995
sctx->stat.data_extents_scrubbed += stripe->nr_data_extents;
978996
sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents;
@@ -981,7 +999,8 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
981999
sctx->stat.no_csum += nr_nodatacsum_sectors;
9821000
sctx->stat.read_errors += stripe->init_nr_io_errors;
9831001
sctx->stat.csum_errors += stripe->init_nr_csum_errors;
984-
sctx->stat.verify_errors += stripe->init_nr_meta_errors;
1002+
sctx->stat.verify_errors += stripe->init_nr_meta_errors +
1003+
stripe->init_nr_meta_gen_errors;
9851004
sctx->stat.uncorrectable_errors +=
9861005
bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
9871006
sctx->stat.corrected_errors += nr_repaired_sectors;
@@ -1027,6 +1046,8 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
10271046
stripe->nr_sectors);
10281047
stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap,
10291048
stripe->nr_sectors);
1049+
stripe->init_nr_meta_gen_errors = bitmap_weight(&stripe->meta_gen_error_bitmap,
1050+
stripe->nr_sectors);
10301051

10311052
if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
10321053
goto out;
@@ -1141,6 +1162,9 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
11411162
bitmap_set(&stripe->write_error_bitmap, sector_nr,
11421163
bio_size >> fs_info->sectorsize_bits);
11431164
spin_unlock_irqrestore(&stripe->write_error_lock, flags);
1165+
for (int i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++)
1166+
btrfs_dev_stat_inc_and_print(stripe->dev,
1167+
BTRFS_DEV_STAT_WRITE_ERRS);
11441168
}
11451169
bio_put(&bbio->bio);
11461170

@@ -1502,10 +1526,12 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
15021526
stripe->init_nr_io_errors = 0;
15031527
stripe->init_nr_csum_errors = 0;
15041528
stripe->init_nr_meta_errors = 0;
1529+
stripe->init_nr_meta_gen_errors = 0;
15051530
stripe->error_bitmap = 0;
15061531
stripe->io_error_bitmap = 0;
15071532
stripe->csum_error_bitmap = 0;
15081533
stripe->meta_error_bitmap = 0;
1534+
stripe->meta_gen_error_bitmap = 0;
15091535
}
15101536

15111537
/*

0 commit comments

Comments
 (0)