Skip to content

Commit 24c19ef

Browse files
author
Mark Fasheh
committed
ocfs2: Remove i_generation from inode lock names
OCFS2 puts inode meta data in the "lock value block" provided by the DLM. Typically, i_generation is encoded in the lock name so that a deleted inode on and a new one in the same block don't share the same lvb. Unfortunately, that scheme means that the read in ocfs2_read_locked_inode() is potentially thrown away as soon as the meta data lock is taken - we cannot encode the lock name without first knowing i_generation, which requires a disk read. This patch encodes i_generation in the inode meta data lvb, and removes the value from the inode meta data lock name. This way, the read can be covered by a lock, and at the same time we can distinguish between an up to date and a stale LVB. This will help cold-cache stat(2) performance in particular. Since this patch changes the protocol version, we take the opportunity to do a minor re-organization of two of the LVB fields. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
1 parent f9e2d82 commit 24c19ef

File tree

10 files changed

+170
-53
lines changed

10 files changed

+170
-53
lines changed

fs/ocfs2/cluster/tcp_internal.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,17 @@
4444
* locking semantics of the file system using the protocol. It should
4545
* be somewhere else, I'm sure, but right now it isn't.
4646
*
47+
* New in version 4:
48+
* - Remove i_generation from lock names for better stat performance.
49+
*
4750
* New in version 3:
4851
* - Replace dentry votes with a cluster lock
4952
*
5053
* New in version 2:
5154
* - full 64 bit i_size in the metadata lock lvbs
5255
* - introduction of "rw" lock and pushing meta/data locking down
5356
*/
54-
#define O2NET_PROTOCOL_VERSION 3ULL
57+
#define O2NET_PROTOCOL_VERSION 4ULL
5558
struct o2net_handshake {
5659
__be64 protocol_version;
5760
__be64 connector_id;

fs/ocfs2/dlmglue.c

+36-6
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
320320

321321
void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
322322
enum ocfs2_lock_type type,
323+
unsigned int generation,
323324
struct inode *inode)
324325
{
325326
struct ocfs2_lock_res_ops *ops;
@@ -341,7 +342,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
341342
};
342343

343344
ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
344-
inode->i_generation, res->l_name);
345+
generation, res->l_name);
345346
ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
346347
}
347348

@@ -1173,17 +1174,19 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
11731174

11741175
int ocfs2_create_new_lock(struct ocfs2_super *osb,
11751176
struct ocfs2_lock_res *lockres,
1176-
int ex)
1177+
int ex,
1178+
int local)
11771179
{
11781180
int level = ex ? LKM_EXMODE : LKM_PRMODE;
11791181
unsigned long flags;
1182+
int lkm_flags = local ? LKM_LOCAL : 0;
11801183

11811184
spin_lock_irqsave(&lockres->l_lock, flags);
11821185
BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
11831186
lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
11841187
spin_unlock_irqrestore(&lockres->l_lock, flags);
11851188

1186-
return ocfs2_lock_create(osb, lockres, level, LKM_LOCAL);
1189+
return ocfs2_lock_create(osb, lockres, level, lkm_flags);
11871190
}
11881191

11891192
/* Grants us an EX lock on the data and metadata resources, skipping
@@ -1212,19 +1215,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
12121215
* on a resource which has an invalid one -- we'll set it
12131216
* valid when we release the EX. */
12141217

1215-
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1);
1218+
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
12161219
if (ret) {
12171220
mlog_errno(ret);
12181221
goto bail;
12191222
}
12201223

1221-
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1);
1224+
/*
1225+
* We don't want to use LKM_LOCAL on a meta data lock as they
1226+
* don't use a generation in their lock names.
1227+
*/
1228+
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0);
12221229
if (ret) {
12231230
mlog_errno(ret);
12241231
goto bail;
12251232
}
12261233

1227-
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1);
1234+
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1);
12281235
if (ret) {
12291236
mlog_errno(ret);
12301237
goto bail;
@@ -1413,6 +1420,16 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
14131420

14141421
lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
14151422

1423+
/*
1424+
* Invalidate the LVB of a deleted inode - this way other
1425+
* nodes are forced to go to disk and discover the new inode
1426+
* status.
1427+
*/
1428+
if (oi->ip_flags & OCFS2_INODE_DELETED) {
1429+
lvb->lvb_version = 0;
1430+
goto out;
1431+
}
1432+
14161433
lvb->lvb_version = OCFS2_LVB_VERSION;
14171434
lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
14181435
lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
@@ -1429,6 +1446,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
14291446
lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
14301447
lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
14311448

1449+
out:
14321450
mlog_meta_lvb(0, lockres);
14331451

14341452
mlog_exit_void();
@@ -1727,6 +1745,18 @@ int ocfs2_meta_lock_full(struct inode *inode,
17271745
wait_event(osb->recovery_event,
17281746
ocfs2_node_map_is_empty(osb, &osb->recovery_map));
17291747

1748+
/*
1749+
* We only see this flag if we're being called from
1750+
* ocfs2_read_locked_inode(). It means we're locking an inode
1751+
* which hasn't been populated yet, so clear the refresh flag
1752+
* and let the caller handle it.
1753+
*/
1754+
if (inode->i_state & I_NEW) {
1755+
status = 0;
1756+
ocfs2_complete_lock_res_refresh(lockres, 0);
1757+
goto bail;
1758+
}
1759+
17301760
/* This is fun. The caller may want a bh back, or it may
17311761
* not. ocfs2_meta_lock_update definitely wants one in, but
17321762
* may or may not read one, depending on what's in the

fs/ocfs2/dlmglue.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@
3232
#define OCFS2_LVB_VERSION 4
3333

3434
struct ocfs2_meta_lvb {
35-
__be16 lvb_reserved0;
36-
__u8 lvb_reserved1;
3735
__u8 lvb_version;
36+
__u8 lvb_reserved0;
37+
__be16 lvb_reserved1;
3838
__be32 lvb_iclusters;
3939
__be32 lvb_iuid;
4040
__be32 lvb_igid;
@@ -62,13 +62,14 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb);
6262
void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
6363
void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
6464
enum ocfs2_lock_type type,
65+
unsigned int generation,
6566
struct inode *inode);
6667
void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
6768
u64 parent, struct inode *inode);
6869
void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
6970
int ocfs2_create_new_inode_locks(struct inode *inode);
7071
int ocfs2_create_new_lock(struct ocfs2_super *osb,
71-
struct ocfs2_lock_res *lockres, int ex);
72+
struct ocfs2_lock_res *lockres, int ex, int local);
7273
int ocfs2_drop_inode_locks(struct inode *inode);
7374
int ocfs2_data_lock_full(struct inode *inode,
7475
int write,

fs/ocfs2/export.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
5858
return ERR_PTR(-ESTALE);
5959
}
6060

61-
inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno);
61+
inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);
6262

6363
if (IS_ERR(inode)) {
6464
mlog_errno(PTR_ERR(inode));
@@ -115,7 +115,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
115115
goto bail_unlock;
116116
}
117117

118-
inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
118+
inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
119119
if (IS_ERR(inode)) {
120120
mlog(ML_ERROR, "Unable to create inode %llu\n",
121121
(unsigned long long)blkno);

0 commit comments

Comments
 (0)