Skip to content

Commit 42437a6

Browse files
josefbacikkdave
authored andcommitted
btrfs: introduce mount option rescue=ignorebadroots
In the face of extent root corruption, or any other core fs wide root corruption we will fail to mount the file system. This makes recovery kind of a pain, because you need to fall back to userspace tools to scrape off data. Instead provide a mechanism to gracefully handle bad roots, so we can at least mount read-only and possibly recover data from the file system. Signed-off-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 68319c1 commit 42437a6

File tree

10 files changed

+130
-28
lines changed

10 files changed

+130
-28
lines changed

fs/btrfs/block-group.c

+48
Original file line numberDiff line numberDiff line change
@@ -1985,6 +1985,51 @@ static int read_one_block_group(struct btrfs_fs_info *info,
19851985
return ret;
19861986
}
19871987

1988+
static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
1989+
{
1990+
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
1991+
struct btrfs_space_info *space_info;
1992+
struct rb_node *node;
1993+
int ret = 0;
1994+
1995+
for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
1996+
struct extent_map *em;
1997+
struct map_lookup *map;
1998+
struct btrfs_block_group *bg;
1999+
2000+
em = rb_entry(node, struct extent_map, rb_node);
2001+
map = em->map_lookup;
2002+
bg = btrfs_create_block_group_cache(fs_info, em->start);
2003+
if (!bg) {
2004+
ret = -ENOMEM;
2005+
break;
2006+
}
2007+
2008+
/* Fill dummy cache as FULL */
2009+
bg->length = em->len;
2010+
bg->flags = map->type;
2011+
bg->last_byte_to_unpin = (u64)-1;
2012+
bg->cached = BTRFS_CACHE_FINISHED;
2013+
bg->used = em->len;
2014+
bg->flags = map->type;
2015+
ret = btrfs_add_block_group_cache(fs_info, bg);
2016+
if (ret) {
2017+
btrfs_remove_free_space_cache(bg);
2018+
btrfs_put_block_group(bg);
2019+
break;
2020+
}
2021+
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
2022+
0, &space_info);
2023+
bg->space_info = space_info;
2024+
link_block_group(bg);
2025+
2026+
set_avail_alloc_bits(fs_info, bg->flags);
2027+
}
2028+
if (!ret)
2029+
btrfs_init_global_block_rsv(fs_info);
2030+
return ret;
2031+
}
2032+
19882033
int btrfs_read_block_groups(struct btrfs_fs_info *info)
19892034
{
19902035
struct btrfs_path *path;
@@ -1995,6 +2040,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
19952040
int need_clear = 0;
19962041
u64 cache_gen;
19972042

2043+
if (!info->extent_root)
2044+
return fill_dummy_bgs(info);
2045+
19982046
key.objectid = 0;
19992047
key.offset = 0;
20002048
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;

fs/btrfs/block-rsv.c

+8
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,14 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
426426
fs_info->delayed_block_rsv.space_info = space_info;
427427
fs_info->delayed_refs_rsv.space_info = space_info;
428428

429+
/*
430+
* Our various recovery options can leave us with NULL roots, so check
431+
* here and just bail before we go dereferencing NULLs everywhere.
432+
*/
433+
if (!fs_info->extent_root || !fs_info->csum_root ||
434+
!fs_info->dev_root || !fs_info->chunk_root || !fs_info->tree_root)
435+
return;
436+
429437
fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
430438
fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
431439
fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;

fs/btrfs/compression.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
150150
struct compressed_bio *cb = bio->bi_private;
151151
u8 *cb_sum = cb->sums;
152152

153-
if (inode->flags & BTRFS_INODE_NODATASUM)
153+
if (!fs_info->csum_root || (inode->flags & BTRFS_INODE_NODATASUM))
154154
return 0;
155155

156156
shash->tfm = fs_info->csum_shash;

fs/btrfs/ctree.h

+1
Original file line numberDiff line numberDiff line change
@@ -1298,6 +1298,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
12981298
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
12991299
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
13001300
#define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29)
1301+
#define BTRFS_MOUNT_IGNOREBADROOTS (1 << 30)
13011302

13021303
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
13031304
#define BTRFS_DEFAULT_MAX_INLINE (2048)

fs/btrfs/disk-io.c

+41-24
Original file line numberDiff line numberDiff line change
@@ -2307,30 +2307,39 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
23072307

23082308
root = btrfs_read_tree_root(tree_root, &location);
23092309
if (IS_ERR(root)) {
2310-
ret = PTR_ERR(root);
2311-
goto out;
2310+
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
2311+
ret = PTR_ERR(root);
2312+
goto out;
2313+
}
2314+
} else {
2315+
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2316+
fs_info->extent_root = root;
23122317
}
2313-
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2314-
fs_info->extent_root = root;
23152318

23162319
location.objectid = BTRFS_DEV_TREE_OBJECTID;
23172320
root = btrfs_read_tree_root(tree_root, &location);
23182321
if (IS_ERR(root)) {
2319-
ret = PTR_ERR(root);
2320-
goto out;
2322+
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
2323+
ret = PTR_ERR(root);
2324+
goto out;
2325+
}
2326+
} else {
2327+
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2328+
fs_info->dev_root = root;
2329+
btrfs_init_devices_late(fs_info);
23212330
}
2322-
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2323-
fs_info->dev_root = root;
2324-
btrfs_init_devices_late(fs_info);
23252331

23262332
location.objectid = BTRFS_CSUM_TREE_OBJECTID;
23272333
root = btrfs_read_tree_root(tree_root, &location);
23282334
if (IS_ERR(root)) {
2329-
ret = PTR_ERR(root);
2330-
goto out;
2335+
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
2336+
ret = PTR_ERR(root);
2337+
goto out;
2338+
}
2339+
} else {
2340+
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2341+
fs_info->csum_root = root;
23312342
}
2332-
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2333-
fs_info->csum_root = root;
23342343

23352344
/*
23362345
* This tree can share blocks with some other fs tree during relocation
@@ -2339,11 +2348,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
23392348
root = btrfs_get_fs_root(tree_root->fs_info,
23402349
BTRFS_DATA_RELOC_TREE_OBJECTID, true);
23412350
if (IS_ERR(root)) {
2342-
ret = PTR_ERR(root);
2343-
goto out;
2351+
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
2352+
ret = PTR_ERR(root);
2353+
goto out;
2354+
}
2355+
} else {
2356+
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2357+
fs_info->data_reloc_root = root;
23442358
}
2345-
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2346-
fs_info->data_reloc_root = root;
23472359

23482360
location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
23492361
root = btrfs_read_tree_root(tree_root, &location);
@@ -2356,9 +2368,11 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
23562368
location.objectid = BTRFS_UUID_TREE_OBJECTID;
23572369
root = btrfs_read_tree_root(tree_root, &location);
23582370
if (IS_ERR(root)) {
2359-
ret = PTR_ERR(root);
2360-
if (ret != -ENOENT)
2361-
goto out;
2371+
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
2372+
ret = PTR_ERR(root);
2373+
if (ret != -ENOENT)
2374+
goto out;
2375+
}
23622376
} else {
23632377
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
23642378
fs_info->uuid_root = root;
@@ -2368,11 +2382,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
23682382
location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
23692383
root = btrfs_read_tree_root(tree_root, &location);
23702384
if (IS_ERR(root)) {
2371-
ret = PTR_ERR(root);
2372-
goto out;
2385+
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
2386+
ret = PTR_ERR(root);
2387+
goto out;
2388+
}
2389+
} else {
2390+
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2391+
fs_info->free_space_root = root;
23732392
}
2374-
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2375-
fs_info->free_space_root = root;
23762393
}
23772394

23782395
return 0;

fs/btrfs/file-item.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
272272
int count = 0;
273273
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
274274

275-
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
275+
if (!fs_info->csum_root || (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
276276
return BLK_STS_OK;
277277

278278
path = btrfs_alloc_path();

fs/btrfs/inode.c

+5-1
Original file line numberDiff line numberDiff line change
@@ -2187,7 +2187,8 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
21872187
int skip_sum;
21882188
int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
21892189

2190-
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
2190+
skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
2191+
!fs_info->csum_root;
21912192

21922193
if (btrfs_is_free_space_inode(BTRFS_I(inode)))
21932194
metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
@@ -2902,6 +2903,9 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
29022903
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
29032904
return 0;
29042905

2906+
if (!root->fs_info->csum_root)
2907+
return 0;
2908+
29052909
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
29062910
test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
29072911
clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);

fs/btrfs/super.c

+11-1
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ enum {
360360
Opt_rescue,
361361
Opt_usebackuproot,
362362
Opt_nologreplay,
363+
Opt_ignorebadroots,
363364

364365
/* Deprecated options */
365366
Opt_recovery,
@@ -455,6 +456,8 @@ static const match_table_t tokens = {
455456
static const match_table_t rescue_tokens = {
456457
{Opt_usebackuproot, "usebackuproot"},
457458
{Opt_nologreplay, "nologreplay"},
459+
{Opt_ignorebadroots, "ignorebadroots"},
460+
{Opt_ignorebadroots, "ibadroots"},
458461
{Opt_err, NULL},
459462
};
460463

@@ -498,6 +501,10 @@ static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
498501
btrfs_set_and_info(info, NOLOGREPLAY,
499502
"disabling log replay at mount time");
500503
break;
504+
case Opt_ignorebadroots:
505+
btrfs_set_and_info(info, IGNOREBADROOTS,
506+
"ignoring bad roots");
507+
break;
501508
case Opt_err:
502509
btrfs_info(info, "unrecognized rescue option '%s'", p);
503510
ret = -EINVAL;
@@ -983,7 +990,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
983990
if (new_flags & SB_RDONLY)
984991
goto out;
985992

986-
if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay"))
993+
if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") ||
994+
check_ro_option(info, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots"))
987995
ret = -EINVAL;
988996
out:
989997
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
@@ -1439,6 +1447,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
14391447
print_rescue_option(seq, "nologreplay", &printed);
14401448
if (btrfs_test_opt(info, USEBACKUPROOT))
14411449
print_rescue_option(seq, "usebackuproot", &printed);
1450+
if (btrfs_test_opt(info, IGNOREBADROOTS))
1451+
print_rescue_option(seq, "ignorebadroots", &printed);
14421452
if (btrfs_test_opt(info, FLUSHONCOMMIT))
14431453
seq_puts(seq, ",flushoncommit");
14441454
if (btrfs_test_opt(info, DISCARD_SYNC))

fs/btrfs/sysfs.c

+1
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show);
332332
static const char *rescue_opts[] = {
333333
"usebackuproot",
334334
"nologreplay",
335+
"ignorebadroots",
335336
};
336337

337338
static ssize_t supported_rescue_options_show(struct kobject *kobj,

fs/btrfs/volumes.c

+13
Original file line numberDiff line numberDiff line change
@@ -7659,6 +7659,19 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
76597659
u64 prev_dev_ext_end = 0;
76607660
int ret = 0;
76617661

7662+
/*
7663+
* We don't have a dev_root because we mounted with ignorebadroots and
7664+
* failed to load the root, so we want to skip the verification in this
7665+
* case for sure.
7666+
*
7667+
* However if the dev root is fine, but the tree itself is corrupted
7668+
* we'd still fail to mount. This verification is only to make sure
7669+
* writes can happen safely, so instead just bypass this check
7670+
* completely in the case of IGNOREBADROOTS.
7671+
*/
7672+
if (btrfs_test_opt(fs_info, IGNOREBADROOTS))
7673+
return 0;
7674+
76627675
key.objectid = 1;
76637676
key.type = BTRFS_DEV_EXTENT_KEY;
76647677
key.offset = 0;

0 commit comments

Comments
 (0)