Skip to content

Commit 440acf9

Browse files
fdmananaintel-lab-lkp
authored andcommitted
btrfs: use sector numbers as keys for the dirty extents xarray
We are using the logical address ("bytenr") of an extent as the key for qgroup records in the dirty extents xarray. This is a problem because the xarrays use "unsigned long" for keys/indices, meaning that on a 32 bits platform any extent starting at or beyond 4G is truncated, which is a too low limitation as virtually everyone is using storage with more than 4G of space. This means a "bytenr" of 4G gets truncated to 0, and so does 8G and 16G for example, resulting in incorrect qgroup accounting. Fix this by using sector numbers as keys instead, that is, using keys that match the logical address right shifted by fs_info->sectorsize_bits, which is what we do for the fs_info->buffer_radix that tracks extent buffers (radix trees also use an "unsigned long" type for keys). This also makes the index space more dense which helps optimize the xarray (as mentioned at Documentation/core-api/xarray.rst). Fixes: 3cce39a ("btrfs: qgroup: use xarray to track dirty extents in transaction") Signed-off-by: Filipe Manana <fdmanana@suse.com>
1 parent 1c50086 commit 440acf9

File tree

3 files changed

+23
-11
lines changed

3 files changed

+23
-11
lines changed

fs/btrfs/delayed-ref.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
849849
struct btrfs_qgroup_extent_record *qrecord,
850850
int action, bool *qrecord_inserted_ret)
851851
{
852+
struct btrfs_fs_info *fs_info = trans->fs_info;
852853
struct btrfs_delayed_ref_head *existing;
853854
struct btrfs_delayed_ref_root *delayed_refs;
854855
bool qrecord_inserted = false;
@@ -859,11 +860,12 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
859860
if (qrecord) {
860861
int ret;
861862

862-
ret = btrfs_qgroup_trace_extent_nolock(trans->fs_info,
863+
ret = btrfs_qgroup_trace_extent_nolock(fs_info,
863864
delayed_refs, qrecord);
864865
if (ret) {
865866
/* Clean up if insertion fails or item exists. */
866-
xa_release(&delayed_refs->dirty_extents, qrecord->bytenr);
867+
xa_release(&delayed_refs->dirty_extents,
868+
qrecord->bytenr >> fs_info->sectorsize_bits);
867869
/* Caller responsible for freeing qrecord on error. */
868870
if (ret < 0)
869871
return ERR_PTR(ret);
@@ -873,7 +875,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
873875
}
874876
}
875877

876-
trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
878+
trace_add_delayed_ref_head(fs_info, head_ref, action);
877879

878880
existing = htree_insert(&delayed_refs->href_root,
879881
&head_ref->href_node);
@@ -895,7 +897,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
895897
if (head_ref->is_data && head_ref->ref_mod < 0) {
896898
delayed_refs->pending_csums += head_ref->num_bytes;
897899
trans->delayed_ref_csum_deletions +=
898-
btrfs_csum_bytes_to_leaves(trans->fs_info,
900+
btrfs_csum_bytes_to_leaves(fs_info,
899901
head_ref->num_bytes);
900902
}
901903
delayed_refs->num_heads++;
@@ -1030,7 +1032,8 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
10301032
goto free_head_ref;
10311033
}
10321034
if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents,
1033-
generic_ref->bytenr, GFP_NOFS)) {
1035+
generic_ref->bytenr >> fs_info->sectorsize_bits,
1036+
GFP_NOFS)) {
10341037
ret = -ENOMEM;
10351038
goto free_record;
10361039
}

fs/btrfs/delayed-ref.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,15 @@ struct btrfs_delayed_ref_root {
202202
/* head ref rbtree */
203203
struct rb_root_cached href_root;
204204

205-
/* Track dirty extent records. */
205+
/*
206+
* Track dirty extent records.
207+
* The keys correspond to the logical address of the extent ("bytenr")
208+
* right shifted by fs_info->sectorsize_bits. This is both to get a more
209+
* dense index space (optimizes xarray structure) and because indexes in
210+
* xarrays are of "unsigned long" type, meaning they are 32 bits wide on
211+
* 32 bits platforms, limiting the extent range to 4G which is too low
212+
* and makes it unusable (truncated index values) on 32 bits platforms.
213+
*/
206214
struct xarray dirty_extents;
207215

208216
/* this spin lock protects the rbtree and the entries inside */

fs/btrfs/qgroup.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2005,7 +2005,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
20052005
struct btrfs_qgroup_extent_record *record)
20062006
{
20072007
struct btrfs_qgroup_extent_record *existing, *ret;
2008-
unsigned long bytenr = record->bytenr;
2008+
const unsigned long index = (record->bytenr >> fs_info->sectorsize_bits);
20092009

20102010
if (!btrfs_qgroup_full_accounting(fs_info))
20112011
return 1;
@@ -2014,7 +2014,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
20142014
trace_btrfs_qgroup_trace_extent(fs_info, record);
20152015

20162016
xa_lock(&delayed_refs->dirty_extents);
2017-
existing = xa_load(&delayed_refs->dirty_extents, bytenr);
2017+
existing = xa_load(&delayed_refs->dirty_extents, index);
20182018
if (existing) {
20192019
if (record->data_rsv && !existing->data_rsv) {
20202020
existing->data_rsv = record->data_rsv;
@@ -2024,7 +2024,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
20242024
return 1;
20252025
}
20262026

2027-
ret = __xa_store(&delayed_refs->dirty_extents, record->bytenr, record, GFP_ATOMIC);
2027+
ret = __xa_store(&delayed_refs->dirty_extents, index, record, GFP_ATOMIC);
20282028
xa_unlock(&delayed_refs->dirty_extents);
20292029
if (xa_is_err(ret)) {
20302030
qgroup_mark_inconsistent(fs_info);
@@ -2129,6 +2129,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
21292129
struct btrfs_fs_info *fs_info = trans->fs_info;
21302130
struct btrfs_qgroup_extent_record *record;
21312131
struct btrfs_delayed_ref_root *delayed_refs;
2132+
const unsigned long index = (bytenr >> fs_info->sectorsize_bits);
21322133
int ret;
21332134

21342135
if (!btrfs_qgroup_full_accounting(fs_info) || bytenr == 0 || num_bytes == 0)
@@ -2137,7 +2138,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
21372138
if (!record)
21382139
return -ENOMEM;
21392140

2140-
if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, bytenr, GFP_NOFS)) {
2141+
if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, index, GFP_NOFS)) {
21412142
kfree(record);
21422143
return -ENOMEM;
21432144
}
@@ -2152,7 +2153,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
21522153
spin_unlock(&delayed_refs->lock);
21532154
if (ret) {
21542155
/* Clean up if insertion fails or item exists. */
2155-
xa_release(&delayed_refs->dirty_extents, record->bytenr);
2156+
xa_release(&delayed_refs->dirty_extents, index);
21562157
kfree(record);
21572158
return 0;
21582159
}

0 commit comments

Comments
 (0)