Skip to content
This repository has been archived by the owner on Aug 27, 2022. It is now read-only.

Commit

Permalink
Btrfs: add better -ENOSPC handling
Browse files Browse the repository at this point in the history
This is a step in the direction of better -ENOSPC handling.  Instead of
checking the global bytes counter we check the space_info bytes counters to
make sure we have enough space.

If we don't we go ahead and try to allocate a new chunk, and then if that fails
we return -ENOSPC.  This patch adds two counters to btrfs_space_info,
bytes_delalloc and bytes_may_use.

bytes_delalloc account for extents we've actually setup for delalloc and will
be allocated at some point down the line. 

bytes_may_use is to keep track of how many bytes we may use for delalloc at
some point.  When we actually set the extent_bit for the delalloc bytes we
subtract the reserved bytes from the bytes_may_use counter.  This keeps us from
not actually being able to allocate space for any delalloc bytes.

Signed-off-by: Josef Bacik <jbacik@redhat.com>
  • Loading branch information
Josef Bacik authored and chrismason-xx committed Feb 20, 2009
1 parent 2cfbd50 commit 6a63209
Show file tree
Hide file tree
Showing 6 changed files with 271 additions and 76 deletions.
8 changes: 8 additions & 0 deletions fs/btrfs/btrfs_inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ struct btrfs_inode {
*/
struct list_head delalloc_inodes;

/* the space_info for where this inode's data allocations are done */
struct btrfs_space_info *space_info;

/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
Expand Down Expand Up @@ -94,6 +97,11 @@ struct btrfs_inode {
*/
u64 delalloc_bytes;

/* total number of bytes that may be used for this inode for
* delalloc
*/
u64 reserved_bytes;

/*
* the size of the file stored in the metadata on disk. data=ordered
* means the in-memory i_size might be larger than the size on disk
Expand Down
40 changes: 31 additions & 9 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,13 +596,27 @@ struct btrfs_block_group_item {

struct btrfs_space_info {
u64 flags;
u64 total_bytes;
u64 bytes_used;
u64 bytes_pinned;
u64 bytes_reserved;
u64 bytes_readonly;
int full;
int force_alloc;

u64 total_bytes; /* total bytes in the space */
u64 bytes_used; /* total bytes used on disk */
u64 bytes_pinned; /* total bytes pinned, will be freed when the
transaction finishes */
u64 bytes_reserved; /* total bytes the allocator has reserved for
current allocations */
u64 bytes_readonly; /* total bytes that are read only */

/* delalloc accounting */
u64 bytes_delalloc; /* number of bytes reserved for allocation,
this space is not necessarily reserved yet
by the allocator */
u64 bytes_may_use; /* number of bytes that may be used for
delalloc */

int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
this space */

struct list_head list;

/* for block groups in our same type */
Expand Down Expand Up @@ -1782,6 +1796,16 @@ int btrfs_add_dead_reloc_root(struct btrfs_root *root);
int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
int btrfs_check_metadata_free_space(struct btrfs_root *root);
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_free_reserved_data_space(struct btrfs_root *root,
struct inode *inode, u64 bytes);
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
/* ctree.c */
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
Expand Down Expand Up @@ -2027,8 +2051,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
unsigned long btrfs_force_ra(struct address_space *mapping,
struct file_ra_state *ra, struct file *file,
pgoff_t offset, pgoff_t last_index);
int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
int for_del);
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_delete_inode(struct inode *inode);
Expand Down
215 changes: 200 additions & 15 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);

static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);

static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
return (cache->flags & bits) == bits;
Expand Down Expand Up @@ -1909,6 +1913,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->bytes_readonly = 0;
found->bytes_delalloc = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
Expand Down Expand Up @@ -1972,6 +1977,196 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
return flags;
}

static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
{
struct btrfs_fs_info *info = root->fs_info;
u64 alloc_profile;

if (data) {
alloc_profile = info->avail_data_alloc_bits &
info->data_alloc_profile;
data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
} else if (root == root->fs_info->chunk_root) {
alloc_profile = info->avail_system_alloc_bits &
info->system_alloc_profile;
data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
} else {
alloc_profile = info->avail_metadata_alloc_bits &
info->metadata_alloc_profile;
data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
}

return btrfs_reduce_alloc_profile(root, data);
}

void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
{
u64 alloc_target;

alloc_target = btrfs_get_alloc_profile(root, 1);
BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
alloc_target);
}

/*
* for now this just makes sure we have at least 5% of our metadata space free
* for use.
*/
int btrfs_check_metadata_free_space(struct btrfs_root *root)
{
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_space_info *meta_sinfo;
u64 alloc_target, thresh;

/* get the space info for where the metadata will live */
alloc_target = btrfs_get_alloc_profile(root, 0);
meta_sinfo = __find_space_info(info, alloc_target);

/*
* if the metadata area isn't maxed out then there is no sense in
* checking how much is used, since we can always allocate a new chunk
*/
if (!meta_sinfo->full)
return 0;

spin_lock(&meta_sinfo->lock);
thresh = meta_sinfo->total_bytes * 95;

do_div(thresh, 100);

if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
spin_unlock(&meta_sinfo->lock);
return -ENOSPC;
}
spin_unlock(&meta_sinfo->lock);

return 0;
}

/*
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
*/
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
{
struct btrfs_space_info *data_sinfo;
int ret = 0;

/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);

data_sinfo = BTRFS_I(inode)->space_info;
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
if (data_sinfo->total_bytes - data_sinfo->bytes_used -
data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
data_sinfo->bytes_may_use < bytes) {
/*
* if we don't have enough free bytes in this space then we need
* to alloc a new chunk.
*/
if (!data_sinfo->full) {
u64 alloc_target;
struct btrfs_trans_handle *trans;

data_sinfo->force_alloc = 1;
spin_unlock(&data_sinfo->lock);

alloc_target = btrfs_get_alloc_profile(root, 1);
trans = btrfs_start_transaction(root, 1);
if (!trans)
return -ENOMEM;

ret = do_chunk_alloc(trans, root->fs_info->extent_root,
bytes + 2 * 1024 * 1024,
alloc_target, 0);
btrfs_end_transaction(trans, root);
if (ret)
return ret;
goto again;
}
spin_unlock(&data_sinfo->lock);
printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
", %llu bytes_used, %llu bytes_reserved, "
"%llu bytes_pinned, %llu bytes_readonly, %llu may use"
"%llu total\n", bytes, data_sinfo->bytes_delalloc,
data_sinfo->bytes_used, data_sinfo->bytes_reserved,
data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
data_sinfo->bytes_may_use, data_sinfo->total_bytes);
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
BTRFS_I(inode)->reserved_bytes += bytes;
spin_unlock(&data_sinfo->lock);

return btrfs_check_metadata_free_space(root);
}

/*
* if there was an error for whatever reason after calling
* btrfs_check_data_free_space, call this so we can cleanup the counters.
*/
void btrfs_free_reserved_data_space(struct btrfs_root *root,
struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;

/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);

data_sinfo = BTRFS_I(inode)->space_info;
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
BTRFS_I(inode)->reserved_bytes -= bytes;
spin_unlock(&data_sinfo->lock);
}

/* called when we are adding a delalloc extent to the inode's io_tree */
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
{
struct btrfs_space_info *data_sinfo;

/* get the space info for where this inode will be storing its data */
data_sinfo = BTRFS_I(inode)->space_info;

/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_delalloc += bytes;

/*
* we are adding a delalloc extent without calling
* btrfs_check_data_free_space first. This happens on a weird
* writepage condition, but shouldn't hurt our accounting
*/
if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
BTRFS_I(inode)->reserved_bytes = 0;
} else {
data_sinfo->bytes_may_use -= bytes;
BTRFS_I(inode)->reserved_bytes -= bytes;
}

spin_unlock(&data_sinfo->lock);
}

/* called when we are clearing an delalloc extent from the inode's io_tree */
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
{
struct btrfs_space_info *info;

info = BTRFS_I(inode)->space_info;

spin_lock(&info->lock);
info->bytes_delalloc -= bytes;
spin_unlock(&info->lock);
}

static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force)
Expand Down Expand Up @@ -3105,6 +3300,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
(unsigned long long)(info->total_bytes - info->bytes_used -
info->bytes_pinned - info->bytes_reserved),
(info->full) ? "" : "not ");
printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
" may_use=%llu, used=%llu\n", info->total_bytes,
info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use,
info->bytes_used);

down_read(&info->groups_sem);
list_for_each_entry(cache, &info->block_groups, list) {
Expand All @@ -3131,24 +3330,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
{
int ret;
u64 search_start = 0;
u64 alloc_profile;
struct btrfs_fs_info *info = root->fs_info;

if (data) {
alloc_profile = info->avail_data_alloc_bits &
info->data_alloc_profile;
data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
} else if (root == root->fs_info->chunk_root) {
alloc_profile = info->avail_system_alloc_bits &
info->system_alloc_profile;
data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
} else {
alloc_profile = info->avail_metadata_alloc_bits &
info->metadata_alloc_profile;
data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
}
data = btrfs_get_alloc_profile(root, data);
again:
data = btrfs_reduce_alloc_profile(root, data);
/*
* the only place that sets empty_size is btrfs_realloc_node, which
* is not called recursively on allocations
Expand Down
16 changes: 13 additions & 3 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1091,28 +1091,36 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs);

ret = btrfs_check_free_space(root, write_bytes, 0);
ret = btrfs_check_data_free_space(root, inode, write_bytes);
if (ret)
goto out;

ret = prepare_pages(root, file, pages, num_pages,
pos, first_index, last_index,
write_bytes);
if (ret)
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
goto out;
}

ret = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, buf);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_drop_pages(pages, num_pages);
goto out;
}

ret = dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes);
btrfs_drop_pages(pages, num_pages);
if (ret)
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
goto out;
}

if (will_write) {
btrfs_fdatawrite_range(inode->i_mapping, pos,
Expand All @@ -1136,6 +1144,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
}
out:
mutex_unlock(&inode->i_mutex);
if (ret)
err = ret;

out_nolock:
kfree(pages);
Expand Down
Loading

0 comments on commit 6a63209

Please sign in to comment.