Skip to content

Commit 1c5bd76

Browse files
committed
pNFS: Enable layoutreturn operation for return-on-close
Amend the pnfs return on close helper functions to enable sending the layoutreturn op in CLOSE/DELEGRETURN. This closes a potential race between CLOSE/DELEGRETURN and parallel OPEN calls to the same file, and allows the client and the server to agree on whether or not there is an outstanding layout. Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
1 parent 828ed9e commit 1c5bd76

File tree

3 files changed

+96
-118
lines changed

3 files changed

+96
-118
lines changed

fs/nfs/nfs4proc.c

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3052,7 +3052,8 @@ static void nfs4_free_closedata(void *data)
30523052
struct super_block *sb = calldata->state->inode->i_sb;
30533053

30543054
if (calldata->lr.roc)
3055-
pnfs_roc_release(calldata->state->inode);
3055+
pnfs_roc_release(&calldata->lr.arg, &calldata->lr.res,
3056+
calldata->res.lr_ret);
30563057
nfs4_put_open_state(calldata->state);
30573058
nfs_free_seqid(calldata->arg.seqid);
30583059
nfs4_put_state_owner(sp);
@@ -3103,9 +3104,6 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
31033104
switch (task->tk_status) {
31043105
case 0:
31053106
res_stateid = &calldata->res.stateid;
3106-
if (calldata->lr.roc)
3107-
pnfs_roc_set_barrier(state->inode,
3108-
calldata->lr.roc_barrier);
31093107
renew_lease(server, calldata->timestamp);
31103108
break;
31113109
case -NFS4ERR_ADMIN_REVOKED:
@@ -3181,7 +3179,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
31813179
goto out_no_action;
31823180
}
31833181

3184-
if (!calldata->arg.lr_args && nfs4_wait_on_layoutreturn(inode, task)) {
3182+
if (!calldata->lr.roc && nfs4_wait_on_layoutreturn(inode, task)) {
31853183
nfs_release_seqid(calldata->arg.seqid);
31863184
goto out_wait;
31873185
}
@@ -3195,8 +3193,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
31953193
else
31963194
calldata->arg.bitmask = NULL;
31973195
}
3198-
if (calldata->lr.roc)
3199-
pnfs_roc_get_barrier(inode, &calldata->lr.roc_barrier);
32003196

32013197
calldata->arg.share_access =
32023198
nfs4_map_atomic_open_share(NFS_SERVER(inode),
@@ -3223,13 +3219,6 @@ static const struct rpc_call_ops nfs4_close_ops = {
32233219
.rpc_release = nfs4_free_closedata,
32243220
};
32253221

3226-
static bool nfs4_roc(struct inode *inode)
3227-
{
3228-
if (!nfs_have_layout(inode))
3229-
return false;
3230-
return pnfs_roc(inode);
3231-
}
3232-
32333222
/*
32343223
* It is possible for data to be read/written from a mem-mapped file
32353224
* after the sys_close call (which hits the vfs layer as a flush).
@@ -3281,7 +3270,12 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
32813270
calldata->res.seqid = calldata->arg.seqid;
32823271
calldata->res.server = server;
32833272
calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
3284-
calldata->lr.roc = nfs4_roc(state->inode);
3273+
calldata->lr.roc = pnfs_roc(state->inode,
3274+
&calldata->lr.arg, &calldata->lr.res, msg.rpc_cred);
3275+
if (calldata->lr.roc) {
3276+
calldata->arg.lr_args = &calldata->lr.arg;
3277+
calldata->res.lr_res = &calldata->lr.res;
3278+
}
32853279
nfs_sb_active(calldata->inode->i_sb);
32863280

32873281
msg.rpc_argp = &calldata->arg;
@@ -5676,8 +5670,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
56765670
}
56775671
}
56785672
data->rpc_status = task->tk_status;
5679-
if (data->lr.roc && data->rpc_status == 0)
5680-
pnfs_roc_set_barrier(data->inode, data->lr.roc_barrier);
56815673
}
56825674

56835675
static void nfs4_delegreturn_release(void *calldata)
@@ -5687,7 +5679,8 @@ static void nfs4_delegreturn_release(void *calldata)
56875679

56885680
if (inode) {
56895681
if (data->lr.roc)
5690-
pnfs_roc_release(inode);
5682+
pnfs_roc_release(&data->lr.arg, &data->lr.res,
5683+
data->res.lr_ret);
56915684
nfs_iput_and_deactive(inode);
56925685
}
56935686
kfree(calldata);
@@ -5699,13 +5692,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
56995692

57005693
d_data = (struct nfs4_delegreturndata *)data;
57015694

5702-
if (!d_data->args.lr_args &&
5703-
nfs4_wait_on_layoutreturn(d_data->inode, task))
5695+
if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task))
57045696
return;
57055697

5706-
if (d_data->lr.roc)
5707-
pnfs_roc_get_barrier(d_data->inode, &d_data->lr.roc_barrier);
5708-
57095698
nfs4_setup_sequence(d_data->res.server,
57105699
&d_data->args.seq_args,
57115700
&d_data->res.seq_res,
@@ -5756,8 +5745,14 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
57565745
data->timestamp = jiffies;
57575746
data->rpc_status = 0;
57585747
data->inode = nfs_igrab_and_active(inode);
5759-
if (data->inode)
5760-
data->lr.roc = nfs4_roc(inode);
5748+
if (data->inode) {
5749+
data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res,
5750+
cred);
5751+
if (data->lr.roc) {
5752+
data->args.lr_args = &data->lr.arg;
5753+
data->res.lr_res = &data->lr.res;
5754+
}
5755+
}
57615756

57625757
task_setup_data.callback_data = data;
57635758
msg.rpc_argp = &data->args;

fs/nfs/pnfs.c

Lines changed: 62 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,20 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
984984

985985
}
986986

987+
static void
988+
pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
989+
u32 seq)
990+
{
991+
if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode)
992+
iomode = IOMODE_ANY;
993+
lo->plh_return_iomode = iomode;
994+
set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
995+
if (seq != 0) {
996+
WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq);
997+
lo->plh_return_seq = seq;
998+
}
999+
}
1000+
9871001
static bool
9881002
pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
9891003
nfs4_stateid *stateid,
@@ -1188,17 +1202,22 @@ pnfs_commit_and_return_layout(struct inode *inode)
11881202
return ret;
11891203
}
11901204

1191-
bool pnfs_roc(struct inode *ino)
1205+
bool pnfs_roc(struct inode *ino,
1206+
struct nfs4_layoutreturn_args *args,
1207+
struct nfs4_layoutreturn_res *res,
1208+
const struct rpc_cred *cred)
11921209
{
11931210
struct nfs_inode *nfsi = NFS_I(ino);
11941211
struct nfs_open_context *ctx;
11951212
struct nfs4_state *state;
11961213
struct pnfs_layout_hdr *lo;
1197-
struct pnfs_layout_segment *lseg, *tmp;
1214+
struct pnfs_layout_segment *lseg, *next;
11981215
nfs4_stateid stateid;
1199-
LIST_HEAD(tmp_list);
1200-
bool found = false, layoutreturn = false, roc = false;
1216+
enum pnfs_iomode iomode = 0;
1217+
bool layoutreturn = false, roc = false;
12011218

1219+
if (!nfs_have_layout(ino))
1220+
return false;
12021221
spin_lock(&ino->i_lock);
12031222
lo = nfsi->layout;
12041223
if (!lo || !pnfs_layout_is_valid(lo) ||
@@ -1217,83 +1236,63 @@ bool pnfs_roc(struct inode *ino)
12171236
}
12181237

12191238

1220-
list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) {
1239+
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) {
12211240
/* If we are sending layoutreturn, invalidate all valid lsegs */
1222-
if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
1223-
mark_lseg_invalid(lseg, &tmp_list);
1224-
found = true;
1225-
}
1241+
if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags))
1242+
continue;
1243+
/*
1244+
* Note: mark lseg for return so pnfs_layout_remove_lseg
1245+
* doesn't invalidate the layout for us.
1246+
*/
1247+
set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
1248+
if (!mark_lseg_invalid(lseg, &lo->plh_return_segs))
1249+
continue;
1250+
pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
12261251
}
12271252

1228-
/* always send layoutreturn if being marked so */
1229-
if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) {
1230-
layoutreturn = pnfs_prepare_layoutreturn(lo,
1231-
&stateid, NULL);
1232-
if (layoutreturn)
1233-
goto out_noroc;
1234-
}
1253+
if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
1254+
goto out_noroc;
12351255

12361256
/* ROC in two conditions:
12371257
* 1. there are ROC lsegs
12381258
* 2. we don't send layoutreturn
12391259
*/
1240-
if (found) {
1241-
/* lo ref dropped in pnfs_roc_release() */
1242-
pnfs_get_layout_hdr(lo);
1243-
roc = true;
1244-
}
1260+
/* lo ref dropped in pnfs_roc_release() */
1261+
layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);
1262+
/* If the creds don't match, we can't compound the layoutreturn */
1263+
if (!layoutreturn || cred != lo->plh_lc_cred)
1264+
goto out_noroc;
1265+
1266+
roc = layoutreturn;
1267+
pnfs_init_layoutreturn_args(args, lo, &stateid, iomode);
1268+
res->lrs_present = 0;
1269+
layoutreturn = false;
12451270

12461271
out_noroc:
12471272
spin_unlock(&ino->i_lock);
1248-
pnfs_free_lseg_list(&tmp_list);
12491273
pnfs_layoutcommit_inode(ino, true);
12501274
if (layoutreturn)
1251-
pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
1275+
pnfs_send_layoutreturn(lo, &stateid, iomode, true);
12521276
return roc;
12531277
}
12541278

1255-
void pnfs_roc_release(struct inode *ino)
1279+
void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
1280+
struct nfs4_layoutreturn_res *res,
1281+
int ret)
12561282
{
1257-
struct pnfs_layout_hdr *lo;
1283+
struct pnfs_layout_hdr *lo = args->layout;
1284+
const nfs4_stateid *arg_stateid = NULL;
1285+
const nfs4_stateid *res_stateid = NULL;
12581286

1259-
spin_lock(&ino->i_lock);
1260-
lo = NFS_I(ino)->layout;
1261-
pnfs_clear_layoutreturn_waitbit(lo);
1262-
if (atomic_dec_and_test(&lo->plh_refcount)) {
1263-
pnfs_detach_layout_hdr(lo);
1264-
spin_unlock(&ino->i_lock);
1265-
pnfs_free_layout_hdr(lo);
1266-
} else
1267-
spin_unlock(&ino->i_lock);
1268-
}
1269-
1270-
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
1271-
{
1272-
struct pnfs_layout_hdr *lo;
1273-
1274-
spin_lock(&ino->i_lock);
1275-
lo = NFS_I(ino)->layout;
1276-
if (pnfs_seqid_is_newer(barrier, lo->plh_barrier))
1277-
lo->plh_barrier = barrier;
1278-
spin_unlock(&ino->i_lock);
1279-
trace_nfs4_layoutreturn_on_close(ino, 0);
1280-
}
1281-
1282-
void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier)
1283-
{
1284-
struct nfs_inode *nfsi = NFS_I(ino);
1285-
struct pnfs_layout_hdr *lo;
1286-
u32 current_seqid;
1287-
1288-
spin_lock(&ino->i_lock);
1289-
lo = nfsi->layout;
1290-
current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
1291-
1292-
/* Since close does not return a layout stateid for use as
1293-
* a barrier, we choose the worst-case barrier.
1294-
*/
1295-
*barrier = current_seqid + atomic_read(&lo->plh_outstanding);
1296-
spin_unlock(&ino->i_lock);
1287+
if (ret == 0) {
1288+
arg_stateid = &args->stateid;
1289+
if (res->lrs_present)
1290+
res_stateid = &res->stateid;
1291+
}
1292+
pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range,
1293+
res_stateid);
1294+
pnfs_put_layout_hdr(lo);
1295+
trace_nfs4_layoutreturn_on_close(args->inode, 0);
12971296
}
12981297

12991298
bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
@@ -1931,20 +1930,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
19311930
return ERR_PTR(-EAGAIN);
19321931
}
19331932

1934-
static void
1935-
pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
1936-
u32 seq)
1937-
{
1938-
if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode)
1939-
iomode = IOMODE_ANY;
1940-
lo->plh_return_iomode = iomode;
1941-
set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
1942-
if (seq != 0) {
1943-
WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq);
1944-
lo->plh_return_seq = seq;
1945-
}
1946-
}
1947-
19481933
/**
19491934
* pnfs_mark_matching_lsegs_return - Free or return matching layout segments
19501935
* @lo: pointer to layout header

fs/nfs/pnfs.h

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,13 @@ int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
271271
u32 seq);
272272
int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
273273
struct list_head *lseg_list);
274-
bool pnfs_roc(struct inode *ino);
275-
void pnfs_roc_release(struct inode *ino);
276-
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
277-
void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier);
274+
bool pnfs_roc(struct inode *ino,
275+
struct nfs4_layoutreturn_args *args,
276+
struct nfs4_layoutreturn_res *res,
277+
const struct rpc_cred *cred);
278+
void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
279+
struct nfs4_layoutreturn_res *res,
280+
int ret);
278281
bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task);
279282
void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t);
280283
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
@@ -666,23 +669,18 @@ pnfs_layoutcommit_outstanding(struct inode *inode)
666669

667670

668671
static inline bool
669-
pnfs_roc(struct inode *ino)
672+
pnfs_roc(struct inode *ino,
673+
struct nfs4_layoutreturn_args *args,
674+
struct nfs4_layoutreturn_res *res,
675+
const struct rpc_cred *cred)
670676
{
671677
return false;
672678
}
673679

674680
static inline void
675-
pnfs_roc_release(struct inode *ino)
676-
{
677-
}
678-
679-
static inline void
680-
pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
681-
{
682-
}
683-
684-
static inline void
685-
pnfs_roc_get_barrier(struct inode *ino, u32 *barrier)
681+
pnfs_roc_release(struct nfs4_layoutreturn_args *args,
682+
struct nfs4_layoutreturn_res *res,
683+
int ret)
686684
{
687685
}
688686

0 commit comments

Comments
 (0)