Skip to content

Commit

Permalink
GFS2: fallocate support
Browse files Browse the repository at this point in the history
This patch adds support for fallocate to gfs2.  Since the gfs2 does not support
uninitialized data blocks, it must write out zeros to all the blocks.  However,
since it does not need to lock any pages to read from, gfs2 can write out the
zero blocks much more efficiently.  On a moderately full filesystem, fallocate
works around 5 times faster on average.  The fallocate call also allows gfs2 to
add blocks to the file without changing the filesize, which will make it
possible for gfs2 to preallocate space for the rindex file, so that gfs2 can
grow a completely full filesystem.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
  • Loading branch information
bmarzins authored and swhiteho committed Sep 20, 2010
1 parent 9a3f236 commit 3921120
Show file tree
Hide file tree
Showing 6 changed files with 272 additions and 2 deletions.
4 changes: 2 additions & 2 deletions fs/gfs2/aops.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
#include "glops.h"


static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
unsigned int from, unsigned int to)
void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
unsigned int from, unsigned int to)
{
struct buffer_head *head = page_buffers(page);
unsigned int bsize = head->b_size;
Expand Down
1 change: 1 addition & 0 deletions fs/gfs2/incore.h
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,7 @@ struct gfs2_sbd {
struct list_head sd_rindex_mru_list;
struct gfs2_rgrpd *sd_rindex_forward;
unsigned int sd_rgrps;
unsigned int sd_max_rg_data;

/* Journal index stuff */

Expand Down
2 changes: 2 additions & 0 deletions fs/gfs2/inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
extern int gfs2_internal_read(struct gfs2_inode *ip,
struct file_ra_state *ra_state,
char *buf, loff_t *pos, unsigned size);
extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
unsigned int from, unsigned int to);
extern void gfs2_set_aops(struct inode *inode);

static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
Expand Down
254 changes: 254 additions & 0 deletions fs/gfs2/ops_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/fiemap.h>
#include <linux/swap.h>
#include <linux/falloc.h>
#include <asm/uaccess.h>

#include "gfs2.h"
Expand Down Expand Up @@ -1277,6 +1279,257 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
return ret;
}

static void empty_write_end(struct page *page, unsigned from,
unsigned to)
{
struct gfs2_inode *ip = GFS2_I(page->mapping->host);

page_zero_new_buffers(page, from, to);
flush_dcache_page(page);
mark_page_accessed(page);

if (!gfs2_is_writeback(ip))
gfs2_page_add_databufs(ip, page, from, to);

block_commit_write(page, from, to);
}


static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
{
unsigned start, end, next;
struct buffer_head *bh, *head;
int error;

if (!page_has_buffers(page)) {
error = block_prepare_write(page, from, to, gfs2_block_map);
if (unlikely(error))
return error;

empty_write_end(page, from, to);
return 0;
}

bh = head = page_buffers(page);
next = end = 0;
while (next < from) {
next += bh->b_size;
bh = bh->b_this_page;
}
start = next;
do {
next += bh->b_size;
if (buffer_mapped(bh)) {
if (end) {
error = block_prepare_write(page, start, end,
gfs2_block_map);
if (unlikely(error))
return error;
empty_write_end(page, start, end);
end = 0;
}
start = next;
}
else
end = next;
bh = bh->b_this_page;
} while (next < to);

if (end) {
error = block_prepare_write(page, start, end, gfs2_block_map);
if (unlikely(error))
return error;
empty_write_end(page, start, end);
}

return 0;
}

static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
int mode)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *dibh;
int error;
u64 start = offset >> PAGE_CACHE_SHIFT;
unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
pgoff_t curr;
struct page *page;
unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
unsigned int from, to;

if (!end_offset)
end_offset = PAGE_CACHE_SIZE;

error = gfs2_meta_inode_buffer(ip, &dibh);
if (unlikely(error))
goto out;

gfs2_trans_add_bh(ip->i_gl, dibh, 1);

if (gfs2_is_stuffed(ip)) {
error = gfs2_unstuff_dinode(ip, NULL);
if (unlikely(error))
goto out;
}

curr = start;
offset = start << PAGE_CACHE_SHIFT;
from = start_offset;
to = PAGE_CACHE_SIZE;
while (curr <= end) {
page = grab_cache_page_write_begin(inode->i_mapping, curr,
AOP_FLAG_NOFS);
if (unlikely(!page)) {
error = -ENOMEM;
goto out;
}

if (curr == end)
to = end_offset;
error = write_empty_blocks(page, from, to);
if (!error && offset + to > inode->i_size &&
!(mode & FALLOC_FL_KEEP_SIZE)) {
i_size_write(inode, offset + to);
}
unlock_page(page);
page_cache_release(page);
if (error)
goto out;
curr++;
offset += PAGE_CACHE_SIZE;
from = 0;
}

gfs2_dinode_out(ip, dibh->b_data);
mark_inode_dirty(inode);

brelse(dibh);

out:
return error;
}

static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
unsigned int *data_blocks, unsigned int *ind_blocks)
{
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);

for (tmp = max_data; tmp > sdp->sd_diptrs;) {
tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
max_data -= tmp;
}
/* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
so it might end up with fewer data blocks */
if (max_data <= *data_blocks)
return;
*data_blocks = max_data;
*ind_blocks = max_blocks - max_data;
*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
if (*len > max) {
*len = max;
gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
}
}

static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset,
loff_t len)
{
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_inode *ip = GFS2_I(inode);
unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
loff_t bytes, max_bytes;
struct gfs2_alloc *al;
int error;
loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
next = (next + 1) << sdp->sd_sb.sb_bsize_shift;

offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
sdp->sd_sb.sb_bsize_shift;

len = next - offset;
bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
if (!bytes)
bytes = UINT_MAX;

gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
error = gfs2_glock_nq(&ip->i_gh);
if (unlikely(error))
goto out_uninit;

if (!gfs2_write_alloc_required(ip, offset, len))
goto out_unlock;

while (len > 0) {
if (len < bytes)
bytes = len;
al = gfs2_alloc_get(ip);
if (!al) {
error = -ENOMEM;
goto out_unlock;
}

error = gfs2_quota_lock_check(ip);
if (error)
goto out_alloc_put;

retry:
gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);

al->al_requested = data_blocks + ind_blocks;
error = gfs2_inplace_reserve(ip);
if (error) {
if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
bytes >>= 1;
goto retry;
}
goto out_qunlock;
}
max_bytes = bytes;
calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
al->al_requested = data_blocks + ind_blocks;

rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
RES_RG_HDR + ip->i_alloc->al_rgd->rd_length;
if (gfs2_is_jdata(ip))
rblocks += data_blocks ? data_blocks : 1;

error = gfs2_trans_begin(sdp, rblocks,
PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
if (error)
goto out_trans_fail;

error = fallocate_chunk(inode, offset, max_bytes, mode);
gfs2_trans_end(sdp);

if (error)
goto out_trans_fail;

len -= max_bytes;
offset += max_bytes;
gfs2_inplace_release(ip);
gfs2_quota_unlock(ip);
gfs2_alloc_put(ip);
}
goto out_unlock;

out_trans_fail:
gfs2_inplace_release(ip);
out_qunlock:
gfs2_quota_unlock(ip);
out_alloc_put:
gfs2_alloc_put(ip);
out_unlock:
gfs2_glock_dq(&ip->i_gh);
out_uninit:
gfs2_holder_uninit(&ip->i_gh);
return error;
}


static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
Expand Down Expand Up @@ -1327,6 +1580,7 @@ const struct inode_operations gfs2_file_iops = {
.getxattr = gfs2_getxattr,
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
.fallocate = gfs2_fallocate,
.fiemap = gfs2_fiemap,
};

Expand Down
12 changes: 12 additions & 0 deletions fs/gfs2/rgrp.c
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,8 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
struct inode *inode = &ip->i_inode;
struct file_ra_state ra_state;
u64 rgrp_count = i_size_read(inode);
struct gfs2_rgrpd *rgd;
unsigned int max_data = 0;
int error;

do_div(rgrp_count, sizeof(struct gfs2_rindex));
Expand All @@ -603,6 +605,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
}
}

list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
if (rgd->rd_data > max_data)
max_data = rgd->rd_data;
sdp->sd_max_rg_data = max_data;
sdp->sd_rindex_uptodate = 1;
return 0;
}
Expand All @@ -622,6 +628,8 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct inode *inode = &ip->i_inode;
struct file_ra_state ra_state;
struct gfs2_rgrpd *rgd;
unsigned int max_data = 0;
int error;

file_ra_state_init(&ra_state, inode->i_mapping);
Expand All @@ -636,6 +644,10 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip)
return error;
}
}
list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
if (rgd->rd_data > max_data)
max_data = rgd->rd_data;
sdp->sd_max_rg_data = max_data;

sdp->sd_rindex_uptodate = 1;
return 0;
Expand Down
1 change: 1 addition & 0 deletions fs/gfs2/trans.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ struct gfs2_glock;
#define RES_JDATA 1
#define RES_DATA 1
#define RES_LEAF 1
#define RES_RG_HDR 1
#define RES_RG_BIT 2
#define RES_EATTR 1
#define RES_STATFS 1
Expand Down

0 comments on commit 3921120

Please sign in to comment.