Skip to content

Commit cef2704

Browse files
committed
Merge tag 'bcachefs-2024-04-15' of https://evilpiepirate.org/git/bcachefs
Pull yet more bcachefs fixes from Kent Overstreet: "This gets recovery working again for the affected user I've been working with, and I'm still waiting to hear back on other bug reports but should fix it for everyone else who's been having issues with recovery. - Various recovery fixes: - fixes for the btree_insert_entry being resized on path allocation btree_path array recently became dynamically resizable, and btree_insert_entry along with it; this was being observed during journal replay, when write buffer btree updates don't use the write buffer and instead use the normal btree update path - multiple fixes for deadlock in recovery when we need to do lots of btree node merges; excessive merges were clocking up the whole pipeline - write buffer path now correctly does btree node merges when needed - fix failure to go RW when superblock indicates recovery passes needed (i.e. to complete an unfinished upgrade) - Various unsafety fixes - test case contributed by a user who had two drives out of a six drive array write out a whole bunch of garbage after power failure - New (tiny) on disk format feature: since it appears the btree node scan tool will be a more regular thing (crappy hardware, user error) - this adds a 64 bit per-device bitmap of regions that have ever had btree nodes. - A path->should_be_locked fix, from a larger patch series tightening up invariants and assertions around btree transaction and path locking state. This particular fix prevents us from keeping around btree_paths that are no longer needed" * tag 'bcachefs-2024-04-15' of https://evilpiepirate.org/git/bcachefs: (24 commits) bcachefs: set_btree_iter_dontneed also clears should_be_locked bcachefs: fix error path of __bch2_read_super() bcachefs: Check for backpointer bucket_offset >= bucket size bcachefs: bch_member.btree_allocated_bitmap bcachefs: sysfs internal/trigger_journal_flush bcachefs: Fix bch2_btree_node_fill() for !path bcachefs: add safety checks in bch2_btree_node_fill() bcachefs: Interior known are required to have known key types bcachefs: add missing bounds check in __bch2_bkey_val_invalid() bcachefs: Fix btree node merging on write buffer btrees bcachefs: Disable merges from interior update path bcachefs: Run merges at BCH_WATERMARK_btree bcachefs: Fix missing write refs in fs fio paths bcachefs: Fix deadlock in journal replay bcachefs: Go rw if running any explicit recovery passes bcachefs: Standardize helpers for printing enum strs with bounds checks bcachefs: don't queue btree nodes for rewrites during scan bcachefs: fix race in bch2_btree_node_evict() bcachefs: fix unsafety in bch2_stripe_to_text() bcachefs: fix unsafety in bch2_extent_ptr_to_text() ...
2 parents 3fdfcd9 + ad29cf9 commit cef2704

34 files changed

+432
-182
lines changed

fs/bcachefs/backpointers.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,15 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
4949
if (!bch2_dev_exists2(c, bp.k->p.inode))
5050
return 0;
5151

52+
struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode);
5253
struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
5354
int ret = 0;
5455

55-
bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
56+
bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
57+
!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
5658
c, err,
57-
backpointer_pos_wrong,
58-
"backpointer at wrong pos");
59+
backpointer_bucket_offset_wrong,
60+
"backpointer bucket_offset wrong");
5961
fsck_err:
6062
return ret;
6163
}

fs/bcachefs/backpointers.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,11 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
5353
u64 bucket_offset)
5454
{
5555
struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
56-
struct bpos ret;
57-
58-
ret = POS(bucket.inode,
59-
(bucket_to_sector(ca, bucket.offset) <<
60-
MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
56+
struct bpos ret = POS(bucket.inode,
57+
(bucket_to_sector(ca, bucket.offset) <<
58+
MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
6159

6260
EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
63-
6461
return ret;
6562
}
6663

fs/bcachefs/bcachefs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,8 @@ struct btree_trans_buf {
709709
x(stripe_delete) \
710710
x(reflink) \
711711
x(fallocate) \
712+
x(fsync) \
713+
x(dio_write) \
712714
x(discard) \
713715
x(discard_fast) \
714716
x(invalidate) \

fs/bcachefs/bcachefs_format.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,8 @@ struct bch_member {
578578
__le64 nbuckets; /* device size */
579579
__le16 first_bucket; /* index of first bucket used */
580580
__le16 bucket_size; /* sectors */
581-
__le32 pad;
581+
__u8 btree_bitmap_shift;
582+
__u8 pad[3];
582583
__le64 last_mount; /* time_t */
583584

584585
__le64 flags;
@@ -587,6 +588,7 @@ struct bch_member {
587588
__le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
588589
__le64 errors_reset_time;
589590
__le64 seq;
591+
__le64 btree_allocated_bitmap;
590592
};
591593

592594
#define BCH_MEMBER_V1_BYTES 56
@@ -876,7 +878,8 @@ struct bch_sb_field_downgrade {
876878
x(rebalance_work, BCH_VERSION(1, 3)) \
877879
x(member_seq, BCH_VERSION(1, 4)) \
878880
x(subvolume_fs_parent, BCH_VERSION(1, 5)) \
879-
x(btree_subvolume_children, BCH_VERSION(1, 6))
881+
x(btree_subvolume_children, BCH_VERSION(1, 6)) \
882+
x(mi_btree_bitmap, BCH_VERSION(1, 7))
880883

881884
enum bcachefs_metadata_version {
882885
bcachefs_metadata_version_min = 9,
@@ -1314,7 +1317,7 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
13141317
x(write_buffer_keys, 11) \
13151318
x(datetime, 12)
13161319

1317-
enum {
1320+
enum bch_jset_entry_type {
13181321
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
13191322
BCH_JSET_ENTRY_TYPES()
13201323
#undef x
@@ -1360,7 +1363,7 @@ struct jset_entry_blacklist_v2 {
13601363
x(inodes, 1) \
13611364
x(key_version, 2)
13621365

1363-
enum {
1366+
enum bch_fs_usage_type {
13641367
#define x(f, nr) BCH_FS_USAGE_##f = nr,
13651368
BCH_FS_USAGE_TYPES()
13661369
#undef x

fs/bcachefs/bkey.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,12 @@ static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
314314
return bkey_packed(k) ? format->key_u64s : BKEY_U64s;
315315
}
316316

317+
static inline bool bkeyp_u64s_valid(const struct bkey_format *f,
318+
const struct bkey_packed *k)
319+
{
320+
return ((unsigned) k->u64s - bkeyp_key_u64s(f, k) <= U8_MAX - BKEY_U64s);
321+
}
322+
317323
static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
318324
const struct bkey_packed *k)
319325
{

fs/bcachefs/bkey_methods.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,15 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
171171
if (type >= BKEY_TYPE_NR)
172172
return 0;
173173

174-
bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) &&
174+
bkey_fsck_err_on((type == BKEY_TYPE_btree ||
175+
(flags & BKEY_INVALID_COMMIT)) &&
175176
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
176177
bkey_invalid_type_for_btree,
177178
"invalid key type for btree %s (%s)",
178-
bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]);
179+
bch2_btree_node_type_str(type),
180+
k.k->type < KEY_TYPE_MAX
181+
? bch2_bkey_types[k.k->type]
182+
: "(unknown)");
179183

180184
if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
181185
bkey_fsck_err_on(k.k->size == 0, c, err,

fs/bcachefs/btree_cache.c

Lines changed: 45 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -709,9 +709,31 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
709709
struct bch_fs *c = trans->c;
710710
struct btree_cache *bc = &c->btree_cache;
711711
struct btree *b;
712-
u32 seq;
713712

714-
BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
713+
if (unlikely(level >= BTREE_MAX_DEPTH)) {
714+
int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u",
715+
level, BTREE_MAX_DEPTH);
716+
return ERR_PTR(ret);
717+
}
718+
719+
if (unlikely(!bkey_is_btree_ptr(&k->k))) {
720+
struct printbuf buf = PRINTBUF;
721+
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
722+
723+
int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf);
724+
printbuf_exit(&buf);
725+
return ERR_PTR(ret);
726+
}
727+
728+
if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) {
729+
struct printbuf buf = PRINTBUF;
730+
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
731+
732+
int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf);
733+
printbuf_exit(&buf);
734+
return ERR_PTR(ret);
735+
}
736+
715737
/*
716738
* Parent node must be locked, else we could read in a btree node that's
717739
* been freed:
@@ -752,34 +774,26 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
752774
}
753775

754776
set_btree_node_read_in_flight(b);
755-
756777
six_unlock_write(&b->c.lock);
757-
seq = six_lock_seq(&b->c.lock);
758-
six_unlock_intent(&b->c.lock);
759778

760-
/* Unlock before doing IO: */
761-
if (path && sync)
762-
bch2_trans_unlock_noassert(trans);
763-
764-
bch2_btree_node_read(trans, b, sync);
779+
if (path) {
780+
u32 seq = six_lock_seq(&b->c.lock);
765781

766-
if (!sync)
767-
return NULL;
782+
/* Unlock before doing IO: */
783+
six_unlock_intent(&b->c.lock);
784+
bch2_trans_unlock_noassert(trans);
768785

769-
if (path) {
770-
int ret = bch2_trans_relock(trans) ?:
771-
bch2_btree_path_relock_intent(trans, path);
772-
if (ret) {
773-
BUG_ON(!trans->restarted);
774-
return ERR_PTR(ret);
775-
}
776-
}
786+
bch2_btree_node_read(trans, b, sync);
777787

778-
if (!six_relock_type(&b->c.lock, lock_type, seq)) {
779-
BUG_ON(!path);
788+
if (!sync)
789+
return NULL;
780790

781-
trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
782-
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
791+
if (!six_relock_type(&b->c.lock, lock_type, seq))
792+
b = NULL;
793+
} else {
794+
bch2_btree_node_read(trans, b, sync);
795+
if (lock_type == SIX_LOCK_read)
796+
six_lock_downgrade(&b->c.lock);
783797
}
784798

785799
return b;
@@ -1112,18 +1126,19 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
11121126
{
11131127
struct bch_fs *c = trans->c;
11141128
struct btree_cache *bc = &c->btree_cache;
1115-
struct btree *b;
11161129

11171130
BUG_ON(path && !btree_node_locked(path, level + 1));
11181131
BUG_ON(level >= BTREE_MAX_DEPTH);
11191132

1120-
b = btree_cache_find(bc, k);
1133+
struct btree *b = btree_cache_find(bc, k);
11211134
if (b)
11221135
return 0;
11231136

11241137
b = bch2_btree_node_fill(trans, path, k, btree_id,
11251138
level, SIX_LOCK_read, false);
1126-
return PTR_ERR_OR_ZERO(b);
1139+
if (!IS_ERR_OR_NULL(b))
1140+
six_unlock_read(&b->c.lock);
1141+
return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
11271142
}
11281143

11291144
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
@@ -1148,6 +1163,8 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
11481163

11491164
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
11501165
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
1166+
if (unlikely(b->hash_val != btree_ptr_hash_val(k)))
1167+
goto out;
11511168

11521169
if (btree_node_dirty(b)) {
11531170
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
@@ -1162,7 +1179,7 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
11621179
btree_node_data_free(c, b);
11631180
bch2_btree_node_hash_remove(bc, b);
11641181
mutex_unlock(&bc->lock);
1165-
1182+
out:
11661183
six_unlock_write(&b->c.lock);
11671184
six_unlock_intent(&b->c.lock);
11681185
}

fs/bcachefs/btree_gc.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -828,6 +828,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
828828
struct bch_fs *c = trans->c;
829829
struct bkey deleted = KEY(0, 0, 0);
830830
struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
831+
struct printbuf buf = PRINTBUF;
831832
int ret = 0;
832833

833834
deleted.p = k->k->p;
@@ -848,11 +849,23 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
848849
if (ret)
849850
goto err;
850851

852+
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, *k),
853+
c, btree_bitmap_not_marked,
854+
"btree ptr not marked in member info btree allocated bitmap\n %s",
855+
(bch2_bkey_val_to_text(&buf, c, *k),
856+
buf.buf))) {
857+
mutex_lock(&c->sb_lock);
858+
bch2_dev_btree_bitmap_mark(c, *k);
859+
bch2_write_super(c);
860+
mutex_unlock(&c->sb_lock);
861+
}
862+
851863
ret = commit_do(trans, NULL, NULL, 0,
852864
bch2_key_trigger(trans, btree_id, level, old,
853865
unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
854866
fsck_err:
855867
err:
868+
printbuf_exit(&buf);
856869
bch_err_fn(c, ret);
857870
return ret;
858871
}

fs/bcachefs/btree_io.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
831831
(rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
832832
}
833833

834-
static bool __bkey_valid(struct bch_fs *c, struct btree *b,
834+
static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
835835
struct bset *i, struct bkey_packed *k)
836836
{
837837
if (bkey_p_next(k) > vstruct_last(i))
@@ -840,7 +840,7 @@ static bool __bkey_valid(struct bch_fs *c, struct btree *b,
840840
if (k->format > KEY_FORMAT_CURRENT)
841841
return false;
842842

843-
if (k->u64s < bkeyp_key_u64s(&b->format, k))
843+
if (!bkeyp_u64s_valid(&b->format, k))
844844
return false;
845845

846846
struct printbuf buf = PRINTBUF;
@@ -884,11 +884,13 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
884884
"invalid bkey format %u", k->format))
885885
goto drop_this_key;
886886

887-
if (btree_err_on(k->u64s < bkeyp_key_u64s(&b->format, k),
887+
if (btree_err_on(!bkeyp_u64s_valid(&b->format, k),
888888
-BCH_ERR_btree_node_read_err_fixable,
889889
c, NULL, b, i,
890890
btree_node_bkey_bad_u64s,
891-
"k->u64s too small (%u < %u)", k->u64s, bkeyp_key_u64s(&b->format, k)))
891+
"bad k->u64s %u (min %u max %lu)", k->u64s,
892+
bkeyp_key_u64s(&b->format, k),
893+
U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k)))
892894
goto drop_this_key;
893895

894896
if (!write)
@@ -947,13 +949,12 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
947949
* do
948950
*/
949951

950-
if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
952+
if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
951953
for (next_good_key = 1;
952954
next_good_key < (u64 *) vstruct_last(i) - (u64 *) k;
953955
next_good_key++)
954-
if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
956+
if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
955957
goto got_good_key;
956-
957958
}
958959

959960
/*
@@ -1339,7 +1340,9 @@ static void btree_node_read_work(struct work_struct *work)
13391340
rb->start_time);
13401341
bio_put(&rb->bio);
13411342

1342-
if (saw_error && !btree_node_read_error(b)) {
1343+
if (saw_error &&
1344+
!btree_node_read_error(b) &&
1345+
c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
13431346
printbuf_reset(&buf);
13441347
bch2_bpos_to_text(&buf, b->key.k.p);
13451348
bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",

fs/bcachefs/btree_iter.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -498,8 +498,13 @@ static inline void set_btree_iter_dontneed(struct btree_iter *iter)
498498
{
499499
struct btree_trans *trans = iter->trans;
500500

501-
if (!trans->restarted)
502-
btree_iter_path(trans, iter)->preserve = false;
501+
if (!iter->path || trans->restarted)
502+
return;
503+
504+
struct btree_path *path = btree_iter_path(trans, iter);
505+
path->preserve = false;
506+
if (path->ref == 1)
507+
path->should_be_locked = false;
503508
}
504509

505510
void *__bch2_trans_kmalloc(struct btree_trans *, size_t);

fs/bcachefs/btree_node_scan.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,19 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
133133
if (le64_to_cpu(bn->magic) != bset_magic(c))
134134
return;
135135

136+
if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
137+
struct nonce nonce = btree_nonce(&bn->keys, 0);
138+
unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
139+
140+
bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes);
141+
}
142+
136143
if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
137144
return;
138145

146+
if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH)
147+
return;
148+
139149
rcu_read_lock();
140150
struct found_btree_node n = {
141151
.btree_id = BTREE_NODE_ID(bn),
@@ -195,8 +205,13 @@ static int read_btree_nodes_worker(void *p)
195205
last_print = jiffies;
196206
}
197207

198-
try_read_btree_node(w->f, ca, bio, buf,
199-
bucket * ca->mi.bucket_size + bucket_offset);
208+
u64 sector = bucket * ca->mi.bucket_size + bucket_offset;
209+
210+
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
211+
!bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
212+
continue;
213+
214+
try_read_btree_node(w->f, ca, bio, buf, sector);
200215
}
201216
err:
202217
bio_put(bio);

0 commit comments

Comments
 (0)