Skip to content

Commit a94794d

Browse files
naotakdave
authored andcommitted
btrfs: zoned: calculate allocation offset for conventional zones
Conventional zones do not have a write pointer, so we cannot use it to determine the allocation offset for sequential allocation if a block group contains a conventional zone. But instead, we can consider the end of the highest addressed extent in the block group for the allocation offset. For new block group, we cannot calculate the allocation offset by consulting the extent tree, because it can cause deadlock by taking extent buffer lock after chunk mutex, which is already taken in btrfs_make_block_group(). Since it is a new block group anyways, we can simply set the allocation offset to 0. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: Anand Jain <anand.jain@oracle.com> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 08e11a3 commit a94794d

File tree

3 files changed

+98
-9
lines changed

3 files changed

+98
-9
lines changed

fs/btrfs/block-group.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1856,7 +1856,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
18561856
goto error;
18571857
}
18581858

1859-
ret = btrfs_load_block_group_zone_info(cache);
1859+
ret = btrfs_load_block_group_zone_info(cache, false);
18601860
if (ret) {
18611861
btrfs_err(info, "zoned: failed to load zone info of bg %llu",
18621862
cache->start);
@@ -2150,7 +2150,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
21502150
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
21512151
cache->needs_free_space = 1;
21522152

2153-
ret = btrfs_load_block_group_zone_info(cache);
2153+
ret = btrfs_load_block_group_zone_info(cache, true);
21542154
if (ret) {
21552155
btrfs_put_block_group(cache);
21562156
return ret;

fs/btrfs/zoned.c

Lines changed: 94 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -927,7 +927,68 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
927927
return 0;
928928
}
929929

930-
int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache)
930+
/*
931+
* Calculate an allocation pointer from the extent allocation information
932+
* for a block group consist of conventional zones. It is pointed to the
933+
* end of the highest addressed extent in the block group as an allocation
934+
* offset.
935+
*/
936+
static int calculate_alloc_pointer(struct btrfs_block_group *cache,
937+
u64 *offset_ret)
938+
{
939+
struct btrfs_fs_info *fs_info = cache->fs_info;
940+
struct btrfs_root *root = fs_info->extent_root;
941+
struct btrfs_path *path;
942+
struct btrfs_key key;
943+
struct btrfs_key found_key;
944+
int ret;
945+
u64 length;
946+
947+
path = btrfs_alloc_path();
948+
if (!path)
949+
return -ENOMEM;
950+
951+
key.objectid = cache->start + cache->length;
952+
key.type = 0;
953+
key.offset = 0;
954+
955+
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
956+
/* We should not find the exact match */
957+
if (!ret)
958+
ret = -EUCLEAN;
959+
if (ret < 0)
960+
goto out;
961+
962+
ret = btrfs_previous_extent_item(root, path, cache->start);
963+
if (ret) {
964+
if (ret == 1) {
965+
ret = 0;
966+
*offset_ret = 0;
967+
}
968+
goto out;
969+
}
970+
971+
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
972+
973+
if (found_key.type == BTRFS_EXTENT_ITEM_KEY)
974+
length = found_key.offset;
975+
else
976+
length = fs_info->nodesize;
977+
978+
if (!(found_key.objectid >= cache->start &&
979+
found_key.objectid + length <= cache->start + cache->length)) {
980+
ret = -EUCLEAN;
981+
goto out;
982+
}
983+
*offset_ret = found_key.objectid + length - cache->start;
984+
ret = 0;
985+
986+
out:
987+
btrfs_free_path(path);
988+
return ret;
989+
}
990+
991+
int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
931992
{
932993
struct btrfs_fs_info *fs_info = cache->fs_info;
933994
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
@@ -941,6 +1002,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache)
9411002
int i;
9421003
unsigned int nofs_flag;
9431004
u64 *alloc_offsets = NULL;
1005+
u64 last_alloc = 0;
9441006
u32 num_sequential = 0, num_conventional = 0;
9451007

9461008
if (!btrfs_is_zoned(fs_info))
@@ -1040,11 +1102,30 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache)
10401102

10411103
if (num_conventional > 0) {
10421104
/*
1043-
* Since conventional zones do not have a write pointer, we
1044-
* cannot determine alloc_offset from the pointer
1105+
* Avoid calling calculate_alloc_pointer() for new BG. It
1106+
* is no use for new BG. It must be always 0.
1107+
*
1108+
* Also, we have a lock chain of extent buffer lock ->
1109+
* chunk mutex. For new BG, this function is called from
1110+
* btrfs_make_block_group() which is already taking the
1111+
* chunk mutex. Thus, we cannot call
1112+
* calculate_alloc_pointer() which takes extent buffer
1113+
* locks to avoid deadlock.
10451114
*/
1046-
ret = -EINVAL;
1047-
goto out;
1115+
if (new) {
1116+
cache->alloc_offset = 0;
1117+
goto out;
1118+
}
1119+
ret = calculate_alloc_pointer(cache, &last_alloc);
1120+
if (ret || map->num_stripes == num_conventional) {
1121+
if (!ret)
1122+
cache->alloc_offset = last_alloc;
1123+
else
1124+
btrfs_err(fs_info,
1125+
"zoned: failed to determine allocation offset of bg %llu",
1126+
cache->start);
1127+
goto out;
1128+
}
10481129
}
10491130

10501131
switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
@@ -1066,6 +1147,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache)
10661147
}
10671148

10681149
out:
1150+
/* An extent is allocated after the write pointer */
1151+
if (!ret && num_conventional && last_alloc > cache->alloc_offset) {
1152+
btrfs_err(fs_info,
1153+
"zoned: got wrong write pointer in BG %llu: %llu > %llu",
1154+
logical, last_alloc, cache->alloc_offset);
1155+
ret = -EIO;
1156+
}
1157+
10691158
kfree(alloc_offsets);
10701159
free_extent_map(em);
10711160

fs/btrfs/zoned.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
4141
int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
4242
u64 length, u64 *bytes);
4343
int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size);
44-
int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache);
44+
int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new);
4545
#else /* CONFIG_BLK_DEV_ZONED */
4646
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
4747
struct blk_zone *zone)
@@ -118,7 +118,7 @@ static inline int btrfs_ensure_empty_zones(struct btrfs_device *device,
118118
}
119119

120120
static inline int btrfs_load_block_group_zone_info(
121-
struct btrfs_block_group *cache)
121+
struct btrfs_block_group *cache, bool new)
122122
{
123123
return 0;
124124
}

0 commit comments

Comments
 (0)