Skip to content

Commit

Permalink
Btrfs: Cache free inode numbers in memory
Browse files Browse the repository at this point in the history
Currently btrfs stores the highest objectid of the fs tree, and it always
returns (highest+1) inode number when we create a file, so inode numbers
won't be reclaimed when we delete files, so we'll run out of inode numbers
as we keep create/delete files in 32bits machines.

This fixes it, and it works similarly to how we cache free space in block
cgroups.

We start a kernel thread to read the file tree. By scanning inode items,
we know which chunks of inode numbers are free, and we cache them in
an rb-tree.

Because we are searching the commit root, we have to carefully handle the
cross-transaction case.

The rb-tree is a hybrid extent+bitmap tree, so if we have too many small
chunks of inode numbers, we'll use bitmaps. Initially we allow 16K ram
of extents, and a bitmap will be used if we exceed this threshold. The
extents threshold is adjusted in runtime.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
  • Loading branch information
Li Zefan committed Apr 25, 2011
1 parent 34d52cb commit 581bb05
Show file tree
Hide file tree
Showing 10 changed files with 500 additions and 53 deletions.
15 changes: 9 additions & 6 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,15 @@ struct btrfs_root {
spinlock_t accounting_lock;
struct btrfs_block_rsv *block_rsv;

/* free ino cache stuff */
struct mutex fs_commit_mutex;
struct btrfs_free_space_ctl *free_ino_ctl;
enum btrfs_caching_type cached;
spinlock_t cache_lock;
wait_queue_head_t cache_wait;
struct btrfs_free_space_ctl *free_ino_pinned;
u64 cache_progress;

struct mutex log_mutex;
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
Expand Down Expand Up @@ -2408,12 +2417,6 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset);
int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);

/* inode-map.c */
int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
struct btrfs_root *fs_root,
u64 dirid, u64 *objectid);
int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid);

/* inode-item.c */
int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
Expand Down
18 changes: 18 additions & 0 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "locking.h"
#include "tree-log.h"
#include "free-space-cache.h"
#include "inode-map.h"

static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
Expand Down Expand Up @@ -1327,6 +1328,19 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
if (IS_ERR(root))
return root;

root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
if (!root->free_ino_ctl)
goto fail;
root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
GFP_NOFS);
if (!root->free_ino_pinned)
goto fail;

btrfs_init_free_ino_ctl(root);
mutex_init(&root->fs_commit_mutex);
spin_lock_init(&root->cache_lock);
init_waitqueue_head(&root->cache_wait);

set_anon_super(&root->anon_super, NULL);

if (btrfs_root_refs(&root->root_item) == 0) {
Expand Down Expand Up @@ -2483,6 +2497,8 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
if (btrfs_root_refs(&root->root_item) == 0)
synchronize_srcu(&fs_info->subvol_srcu);

__btrfs_remove_free_space_cache(root->free_ino_pinned);
__btrfs_remove_free_space_cache(root->free_ino_ctl);
free_fs_root(root);
return 0;
}
Expand All @@ -2496,6 +2512,8 @@ static void free_fs_root(struct btrfs_root *root)
}
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
kfree(root->free_ino_ctl);
kfree(root->free_ino_pinned);
kfree(root->name);
kfree(root);
}
Expand Down
96 changes: 76 additions & 20 deletions fs/btrfs/free-space-cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "transaction.h"
#include "disk-io.h"
#include "extent_io.h"
#include "inode-map.h"

#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
Expand Down Expand Up @@ -105,7 +106,7 @@ int create_free_space_inode(struct btrfs_root *root,
u64 objectid;
int ret;

ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
ret = btrfs_find_free_objectid(root, &objectid);
if (ret < 0)
return ret;

Expand Down Expand Up @@ -1496,10 +1497,9 @@ bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
return merged;
}

int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes)
int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
u64 offset, u64 bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info;
int ret = 0;

Expand Down Expand Up @@ -1751,11 +1751,29 @@ __btrfs_return_cluster_to_free_space(
return 0;
}

void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info;
struct rb_node *node;

spin_lock(&ctl->tree_lock);
while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
info = rb_entry(node, struct btrfs_free_space, offset_index);
unlink_free_space(ctl, info);
kfree(info->bitmap);
kmem_cache_free(btrfs_free_space_cachep, info);
if (need_resched()) {
spin_unlock(&ctl->tree_lock);
cond_resched();
spin_lock(&ctl->tree_lock);
}
}
spin_unlock(&ctl->tree_lock);
}

void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_cluster *cluster;
struct list_head *head;

Expand All @@ -1773,21 +1791,9 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
spin_lock(&ctl->tree_lock);
}
}

while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
info = rb_entry(node, struct btrfs_free_space, offset_index);
unlink_free_space(ctl, info);
if (info->bitmap)
kfree(info->bitmap);
kmem_cache_free(btrfs_free_space_cachep, info);
if (need_resched()) {
spin_unlock(&ctl->tree_lock);
cond_resched();
spin_lock(&ctl->tree_lock);
}
}

spin_unlock(&ctl->tree_lock);

__btrfs_remove_free_space_cache(ctl);
}

u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
Expand Down Expand Up @@ -2352,3 +2358,53 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,

return ret;
}

/*
* Find the left-most item in the cache tree, and then return the
* smallest inode number in the item.
*
* Note: the returned inode number may not be the smallest one in
* the tree, if the left-most item is a bitmap.
*/
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
{
struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
struct btrfs_free_space *entry = NULL;
u64 ino = 0;

spin_lock(&ctl->tree_lock);

if (RB_EMPTY_ROOT(&ctl->free_space_offset))
goto out;

entry = rb_entry(rb_first(&ctl->free_space_offset),
struct btrfs_free_space, offset_index);

if (!entry->bitmap) {
ino = entry->offset;

unlink_free_space(ctl, entry);
entry->offset++;
entry->bytes--;
if (!entry->bytes)
kmem_cache_free(btrfs_free_space_cachep, entry);
else
link_free_space(ctl, entry);
} else {
u64 offset = 0;
u64 count = 1;
int ret;

ret = search_bitmap(ctl, entry, &offset, &count);
BUG_ON(ret);

ino = offset;
bitmap_clear_bits(ctl, entry, offset, 1);
if (entry->bytes == 0)
free_bitmap(ctl, entry);
}
out:
spin_unlock(&ctl->tree_lock);

return ino;
}
16 changes: 13 additions & 3 deletions fs/btrfs/free-space-cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,25 @@ int btrfs_write_out_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);

void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size);
int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
u64 bytenr, u64 size);
static inline int
btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size)
{
return __btrfs_add_free_space(block_group->free_space_ctl,
bytenr, size);
}
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size);
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group);
*block_group);
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes, u64 empty_size);
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
u64 bytes);
int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
Expand Down
Loading

0 comments on commit 581bb05

Please sign in to comment.