Skip to content

Commit 1afc32b

Browse files
author
Mark Fasheh
committed
ocfs2: Write support for inline data
This fixes up write, truncate, mmap, and RESVSP/UNRESVP to understand inline inode data. For the most part, the changes to the core write code can be relied on to do the heavy lifting. Any code calling ocfs2_write_begin (including shared writeable mmap) can count on it doing the right thing with respect to growing inline data to an extent tree. Size reducing truncates, including UNRESVP can simply zero that portion of the inode block being removed. Size increasing truncatesm, including RESVP have to be a little bit smarter and grow the inode to an extent tree if necessary. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com> Reviewed-by: Joel Becker <joel.becker@oracle.com>
1 parent 6798d35 commit 1afc32b

File tree

7 files changed

+526
-8
lines changed

7 files changed

+526
-8
lines changed

fs/ocfs2/alloc.c

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3726,6 +3726,8 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
37263726
struct ocfs2_insert_type insert = {0, };
37273727
struct ocfs2_extent_rec rec;
37283728

3729+
BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
3730+
37293731
mlog(0, "add %u clusters at position %u to inode %llu\n",
37303732
new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
37313733

@@ -5826,6 +5828,174 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
58265828
return ret;
58275829
}
58285830

5831+
static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di)
5832+
{
5833+
unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
5834+
5835+
memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2));
5836+
}
5837+
5838+
void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
5839+
{
5840+
struct ocfs2_inode_info *oi = OCFS2_I(inode);
5841+
struct ocfs2_inline_data *idata = &di->id2.i_data;
5842+
5843+
spin_lock(&oi->ip_lock);
5844+
oi->ip_dyn_features |= OCFS2_INLINE_DATA_FL;
5845+
di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
5846+
spin_unlock(&oi->ip_lock);
5847+
5848+
/*
5849+
* We clear the entire i_data structure here so that all
5850+
* fields can be properly initialized.
5851+
*/
5852+
ocfs2_zero_dinode_id2(inode, di);
5853+
5854+
idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb));
5855+
}
5856+
5857+
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
5858+
struct buffer_head *di_bh)
5859+
{
5860+
int ret, i, has_data, num_pages = 0;
5861+
handle_t *handle;
5862+
u64 uninitialized_var(block);
5863+
struct ocfs2_inode_info *oi = OCFS2_I(inode);
5864+
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5865+
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
5866+
struct ocfs2_extent_list *el = &di->id2.i_list;
5867+
struct ocfs2_alloc_context *data_ac = NULL;
5868+
struct page **pages = NULL;
5869+
loff_t end = osb->s_clustersize;
5870+
5871+
has_data = i_size_read(inode) ? 1 : 0;
5872+
5873+
if (has_data) {
5874+
pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
5875+
sizeof(struct page *), GFP_NOFS);
5876+
if (pages == NULL) {
5877+
ret = -ENOMEM;
5878+
mlog_errno(ret);
5879+
goto out;
5880+
}
5881+
5882+
ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
5883+
if (ret) {
5884+
mlog_errno(ret);
5885+
goto out;
5886+
}
5887+
}
5888+
5889+
handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS);
5890+
if (IS_ERR(handle)) {
5891+
ret = PTR_ERR(handle);
5892+
mlog_errno(ret);
5893+
goto out_unlock;
5894+
}
5895+
5896+
ret = ocfs2_journal_access(handle, inode, di_bh,
5897+
OCFS2_JOURNAL_ACCESS_WRITE);
5898+
if (ret) {
5899+
mlog_errno(ret);
5900+
goto out_commit;
5901+
}
5902+
5903+
if (has_data) {
5904+
u32 bit_off, num;
5905+
unsigned int page_end;
5906+
u64 phys;
5907+
5908+
ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
5909+
&num);
5910+
if (ret) {
5911+
mlog_errno(ret);
5912+
goto out_commit;
5913+
}
5914+
5915+
/*
5916+
* Save two copies, one for insert, and one that can
5917+
* be changed by ocfs2_map_and_dirty_page() below.
5918+
*/
5919+
block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
5920+
5921+
/*
5922+
* Non sparse file systems zero on extend, so no need
5923+
* to do that now.
5924+
*/
5925+
if (!ocfs2_sparse_alloc(osb) &&
5926+
PAGE_CACHE_SIZE < osb->s_clustersize)
5927+
end = PAGE_CACHE_SIZE;
5928+
5929+
ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
5930+
if (ret) {
5931+
mlog_errno(ret);
5932+
goto out_commit;
5933+
}
5934+
5935+
/*
5936+
* This should populate the 1st page for us and mark
5937+
* it up to date.
5938+
*/
5939+
ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
5940+
if (ret) {
5941+
mlog_errno(ret);
5942+
goto out_commit;
5943+
}
5944+
5945+
page_end = PAGE_CACHE_SIZE;
5946+
if (PAGE_CACHE_SIZE > osb->s_clustersize)
5947+
page_end = osb->s_clustersize;
5948+
5949+
for (i = 0; i < num_pages; i++)
5950+
ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
5951+
pages[i], i > 0, &phys);
5952+
}
5953+
5954+
spin_lock(&oi->ip_lock);
5955+
oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
5956+
di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
5957+
spin_unlock(&oi->ip_lock);
5958+
5959+
ocfs2_zero_dinode_id2(inode, di);
5960+
5961+
el->l_tree_depth = 0;
5962+
el->l_next_free_rec = 0;
5963+
el->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb));
5964+
5965+
ocfs2_journal_dirty(handle, di_bh);
5966+
5967+
if (has_data) {
5968+
/*
5969+
* An error at this point should be extremely rare. If
5970+
* this proves to be false, we could always re-build
5971+
* the in-inode data from our pages.
5972+
*/
5973+
ret = ocfs2_insert_extent(osb, handle, inode, di_bh,
5974+
0, block, 1, 0, NULL);
5975+
if (ret) {
5976+
mlog_errno(ret);
5977+
goto out_commit;
5978+
}
5979+
5980+
inode->i_blocks = ocfs2_inode_sector_count(inode);
5981+
}
5982+
5983+
out_commit:
5984+
ocfs2_commit_trans(osb, handle);
5985+
5986+
out_unlock:
5987+
if (data_ac)
5988+
ocfs2_free_alloc_context(data_ac);
5989+
5990+
out:
5991+
if (pages) {
5992+
ocfs2_unlock_and_free_pages(pages, num_pages);
5993+
kfree(pages);
5994+
}
5995+
5996+
return ret;
5997+
}
5998+
58295999
/*
58306000
* It is expected, that by the time you call this function,
58316001
* inode->i_size and fe->i_size have been adjusted.
@@ -6051,6 +6221,81 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
60516221
return status;
60526222
}
60536223

6224+
/*
6225+
* 'start' is inclusive, 'end' is not.
6226+
*/
6227+
int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
6228+
unsigned int start, unsigned int end, int trunc)
6229+
{
6230+
int ret;
6231+
unsigned int numbytes;
6232+
handle_t *handle;
6233+
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6234+
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
6235+
struct ocfs2_inline_data *idata = &di->id2.i_data;
6236+
6237+
if (end > i_size_read(inode))
6238+
end = i_size_read(inode);
6239+
6240+
BUG_ON(start >= end);
6241+
6242+
if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
6243+
!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
6244+
!ocfs2_supports_inline_data(osb)) {
6245+
ocfs2_error(inode->i_sb,
6246+
"Inline data flags for inode %llu don't agree! "
6247+
"Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n",
6248+
(unsigned long long)OCFS2_I(inode)->ip_blkno,
6249+
le16_to_cpu(di->i_dyn_features),
6250+
OCFS2_I(inode)->ip_dyn_features,
6251+
osb->s_feature_incompat);
6252+
ret = -EROFS;
6253+
goto out;
6254+
}
6255+
6256+
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
6257+
if (IS_ERR(handle)) {
6258+
ret = PTR_ERR(handle);
6259+
mlog_errno(ret);
6260+
goto out;
6261+
}
6262+
6263+
ret = ocfs2_journal_access(handle, inode, di_bh,
6264+
OCFS2_JOURNAL_ACCESS_WRITE);
6265+
if (ret) {
6266+
mlog_errno(ret);
6267+
goto out_commit;
6268+
}
6269+
6270+
numbytes = end - start;
6271+
memset(idata->id_data + start, 0, numbytes);
6272+
6273+
/*
6274+
* No need to worry about the data page here - it's been
6275+
* truncated already and inline data doesn't need it for
6276+
* pushing zero's to disk, so we'll let readpage pick it up
6277+
* later.
6278+
*/
6279+
if (trunc) {
6280+
i_size_write(inode, start);
6281+
di->i_size = cpu_to_le64(start);
6282+
}
6283+
6284+
inode->i_blocks = ocfs2_inode_sector_count(inode);
6285+
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
6286+
6287+
di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
6288+
di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
6289+
6290+
ocfs2_journal_dirty(handle, di_bh);
6291+
6292+
out_commit:
6293+
ocfs2_commit_trans(osb, handle);
6294+
6295+
out:
6296+
return ret;
6297+
}
6298+
60546299
static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
60556300
{
60566301
/*

fs/ocfs2/alloc.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
6262
return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2;
6363
}
6464

65+
void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di);
66+
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
67+
struct buffer_head *di_bh);
68+
6569
int ocfs2_truncate_log_init(struct ocfs2_super *osb);
6670
void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb);
6771
void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
@@ -115,6 +119,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
115119
struct inode *inode,
116120
struct buffer_head *fe_bh,
117121
struct ocfs2_truncate_context *tc);
122+
int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
123+
unsigned int start, unsigned int end, int trunc);
118124

119125
int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
120126
u32 cpos, struct buffer_head **leaf_bh);

0 commit comments

Comments
 (0)