Skip to content

Commit

Permalink
Merge tag 'nfsd-4.8' of git://linux-nfs.org/~bfields/linux
Browse files Browse the repository at this point in the history
Pull nfsd updates from Bruce Fields:
 "Highlights:

   - Trond made a change to the server's tcp logic that allows a fast
     client to better take advantage of high bandwidth networks, but may
     increase the risk that a single client could starve other clients;
     a new sunrpc.svc_rpc_per_connection_limit parameter should help
     mitigate this in the (hopefully unlikely) event this becomes a
     problem in practice.

   - Tom Haynes added a minimal flex-layout pnfs server, which is of no
     use in production for now--don't build it unless you're doing
     client testing or further server development"

* tag 'nfsd-4.8' of git://linux-nfs.org/~bfields/linux: (32 commits)
  nfsd: remove some dead code in nfsd_create_locked()
  nfsd: drop unnecessary MAY_EXEC check from create
  nfsd: clean up bad-type check in nfsd_create_locked
  nfsd: remove unnecessary positive-dentry check
  nfsd: reorganize nfsd_create
  nfsd: check d_can_lookup in fh_verify of directories
  nfsd: remove redundant zero-length check from create
  nfsd: Make creates return EEXIST instead of EACCES
  SUNRPC: Detect immediate closure of accepted sockets
  SUNRPC: accept() may return sockets that are still in SYN_RECV
  nfsd: allow nfsd to advertise multiple layout types
  nfsd: Close race between nfsd4_release_lockowner and nfsd4_lock
  nfsd/blocklayout: Make sure calculate signature/designator length aligned
  xfs: abstract block export operations from nfsd layouts
  SUNRPC: Remove unused callback xpo_adjust_wspace()
  SUNRPC: Change TCP socket space reservation
  SUNRPC: Add a server side per-connection limit
  SUNRPC: Micro optimisation for svc_data_ready
  SUNRPC: Call the default socket callbacks instead of open coding
  SUNRPC: lock the socket while detaching it
  ...
  • Loading branch information
torvalds committed Aug 4, 2016
2 parents d58b0d9 + 2b11885 commit a71e360
Show file tree
Hide file tree
Showing 37 changed files with 784 additions and 335 deletions.
6 changes: 6 additions & 0 deletions Documentation/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3877,6 +3877,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
using these two parameters to set the minimum and
maximum port values.

sunrpc.svc_rpc_per_connection_limit=
[NFS,SUNRPC]
Limit the number of requests that the server will
process in parallel from a single connection.
The default value is 0 (no limit).

sunrpc.pool_mode=
[NFS]
Control how the NFS server code allocates CPUs to
Expand Down
6 changes: 6 additions & 0 deletions fs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ config FS_POSIX_ACL
config EXPORTFS
tristate

config EXPORTFS_BLOCK_OPS
bool "Enable filesystem export operations for block IO"
help
This option enables the export operations for a filesystem to support
external block IO.

config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT
default y
Expand Down
19 changes: 19 additions & 0 deletions fs/nfsd/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ config NFSD_BLOCKLAYOUT
bool "NFSv4.1 server support for pNFS block layouts"
depends on NFSD_V4 && BLOCK
select NFSD_PNFS
select EXPORTFS_BLOCK_OPS
help
This option enables support for the exporting pNFS block layouts
in the kernel's NFS server. The pNFS block layout enables NFS
Expand All @@ -102,6 +103,7 @@ config NFSD_SCSILAYOUT
bool "NFSv4.1 server support for pNFS SCSI layouts"
depends on NFSD_V4 && BLOCK
select NFSD_PNFS
select EXPORTFS_BLOCK_OPS
help
This option enables support for the exporting pNFS SCSI layouts
in the kernel's NFS server. The pNFS SCSI layout enables NFS
Expand All @@ -111,6 +113,23 @@ config NFSD_SCSILAYOUT

If unsure, say N.

config NFSD_FLEXFILELAYOUT
bool "NFSv4.1 server support for pNFS Flex File layouts"
depends on NFSD_V4
select NFSD_PNFS
help
This option enables support for the exporting pNFS Flex File
layouts in the kernel's NFS server. The pNFS Flex File layout
enables NFS clients to directly perform I/O to NFSv3 devices
accesible to both the server and the clients. See
draft-ietf-nfsv4-flex-files for more details.

Warning, this server implements the bare minimum functionality
to be a flex file server - it is for testing the client,
not for use in production.

If unsure, say N.

config NFSD_V4_SECURITY_LABEL
bool "Provide Security Label support for NFSv4 server"
depends on NFSD_V4 && SECURITY
Expand Down
1 change: 1 addition & 0 deletions fs/nfsd/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o
2 changes: 2 additions & 0 deletions fs/nfsd/blocklayout.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ nfsd4_block_get_device_info_simple(struct super_block *sb,

static __be32
nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
struct svc_rqst *rqstp,
struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdp)
{
Expand Down Expand Up @@ -355,6 +356,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,

static __be32
nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
struct svc_rqst *rqstp,
struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdp)
{
Expand Down
4 changes: 2 additions & 2 deletions fs/nfsd/blocklayoutxdr.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)

switch (b->type) {
case PNFS_BLOCK_VOLUME_SIMPLE:
len = 4 + 4 + 8 + 4 + b->simple.sig_len;
len = 4 + 4 + 8 + 4 + (XDR_QUADLEN(b->simple.sig_len) << 2);
p = xdr_reserve_space(xdr, len);
if (!p)
return -ETOOSMALL;
Expand All @@ -55,7 +55,7 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len);
break;
case PNFS_BLOCK_VOLUME_SCSI:
len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8;
len = 4 + 4 + 4 + 4 + (XDR_QUADLEN(b->scsi.designator_len) << 2) + 8;
p = xdr_reserve_space(xdr, len);
if (!p)
return -ETOOSMALL;
Expand Down
14 changes: 12 additions & 2 deletions fs/nfsd/export.c
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
new->ex_fslocs.locations = NULL;
new->ex_fslocs.locations_count = 0;
new->ex_fslocs.migrated = 0;
new->ex_layout_type = 0;
new->ex_layout_types = 0;
new->ex_uuid = NULL;
new->cd = item->cd;
}
Expand All @@ -731,7 +731,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
item->ex_fslocs.locations_count = 0;
new->ex_fslocs.migrated = item->ex_fslocs.migrated;
item->ex_fslocs.migrated = 0;
new->ex_layout_type = item->ex_layout_type;
new->ex_layout_types = item->ex_layout_types;
new->ex_nflavors = item->ex_nflavors;
for (i = 0; i < MAX_SECINFO_LIST; i++) {
new->ex_flavors[i] = item->ex_flavors[i];
Expand Down Expand Up @@ -954,6 +954,16 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)
return 0;
}

/* If the compound op contains a spo_must_allowed op,
* it will be sent with integrity/protection which
* will have to be expressly allowed on mounts that
* don't support it
*/

if (nfsd4_spo_must_allow(rqstp))
return 0;

return nfserr_wrongsec;
}

Expand Down
2 changes: 1 addition & 1 deletion fs/nfsd/export.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ struct svc_export {
struct nfsd4_fs_locations ex_fslocs;
uint32_t ex_nflavors;
struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST];
enum pnfs_layouttype ex_layout_type;
u32 ex_layout_types;
struct nfsd4_deviceid_map *ex_devid_map;
struct cache_detail *cd;
};
Expand Down
133 changes: 133 additions & 0 deletions fs/nfsd/flexfilelayout.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright (c) 2016 Tom Haynes <loghyr@primarydata.com>
*
* The following implements a super-simple flex-file server
* where the NFSv4.1 mds is also the ds. And the storage is
* the same. I.e., writing to the mds via a NFSv4.1 WRITE
* goes to the same location as the NFSv3 WRITE.
*/
#include <linux/slab.h>

#include <linux/nfsd/debug.h>

#include <linux/sunrpc/addr.h>

#include "flexfilelayoutxdr.h"
#include "pnfs.h"

#define NFSDDBG_FACILITY NFSDDBG_PNFS

static __be32
nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
struct nfsd4_layoutget *args)
{
struct nfsd4_layout_seg *seg = &args->lg_seg;
u32 device_generation = 0;
int error;
uid_t u;

struct pnfs_ff_layout *fl;

/*
* The super simple flex file server has 1 mirror, 1 data server,
* and 1 file handle. So instead of 4 allocs, do 1 for now.
* Zero it out for the stateid - don't want junk in there!
*/
error = -ENOMEM;
fl = kzalloc(sizeof(*fl), GFP_KERNEL);
if (!fl)
goto out_error;
args->lg_content = fl;

/*
* Avoid layout commit, try to force the I/O to the DS,
* and for fun, cause all IOMODE_RW layout segments to
* effectively be WRITE only.
*/
fl->flags = FF_FLAGS_NO_LAYOUTCOMMIT | FF_FLAGS_NO_IO_THRU_MDS |
FF_FLAGS_NO_READ_IO;

/* Do not allow a IOMODE_READ segment to have write pemissions */
if (seg->iomode == IOMODE_READ) {
u = from_kuid(&init_user_ns, inode->i_uid) + 1;
fl->uid = make_kuid(&init_user_ns, u);
} else
fl->uid = inode->i_uid;
fl->gid = inode->i_gid;

error = nfsd4_set_deviceid(&fl->deviceid, fhp, device_generation);
if (error)
goto out_error;

fl->fh.size = fhp->fh_handle.fh_size;
memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size);

/* Give whole file layout segments */
seg->offset = 0;
seg->length = NFS4_MAX_UINT64;

dprintk("GET: 0x%llx:0x%llx %d\n", seg->offset, seg->length,
seg->iomode);
return 0;

out_error:
seg->length = 0;
return nfserrno(error);
}

static __be32
nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp,
struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdp)
{
struct pnfs_ff_device_addr *da;

u16 port;
char addr[INET6_ADDRSTRLEN];

da = kzalloc(sizeof(struct pnfs_ff_device_addr), GFP_KERNEL);
if (!da)
return nfserrno(-ENOMEM);

gdp->gd_device = da;

da->version = 3;
da->minor_version = 0;

da->rsize = svc_max_payload(rqstp);
da->wsize = da->rsize;

rpc_ntop((struct sockaddr *)&rqstp->rq_daddr,
addr, INET6_ADDRSTRLEN);
if (rqstp->rq_daddr.ss_family == AF_INET) {
struct sockaddr_in *sin;

sin = (struct sockaddr_in *)&rqstp->rq_daddr;
port = ntohs(sin->sin_port);
snprintf(da->netaddr.netid, FF_NETID_LEN + 1, "tcp");
da->netaddr.netid_len = 3;
} else {
struct sockaddr_in6 *sin6;

sin6 = (struct sockaddr_in6 *)&rqstp->rq_daddr;
port = ntohs(sin6->sin6_port);
snprintf(da->netaddr.netid, FF_NETID_LEN + 1, "tcp6");
da->netaddr.netid_len = 4;
}

da->netaddr.addr_len =
snprintf(da->netaddr.addr, FF_ADDR_LEN + 1,
"%s.%hhu.%hhu", addr, port >> 8, port & 0xff);

da->tightly_coupled = false;

return 0;
}

const struct nfsd4_layout_ops ff_layout_ops = {
.notify_types =
NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
.proc_getdeviceinfo = nfsd4_ff_proc_getdeviceinfo,
.encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo,
.proc_layoutget = nfsd4_ff_proc_layoutget,
.encode_layoutget = nfsd4_ff_encode_layoutget,
};
115 changes: 115 additions & 0 deletions fs/nfsd/flexfilelayoutxdr.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright (c) 2016 Tom Haynes <loghyr@primarydata.com>
*/
#include <linux/sunrpc/svc.h>
#include <linux/nfs4.h>

#include "nfsd.h"
#include "flexfilelayoutxdr.h"

#define NFSDDBG_FACILITY NFSDDBG_PNFS

struct ff_idmap {
char buf[11];
int len;
};

__be32
nfsd4_ff_encode_layoutget(struct xdr_stream *xdr,
struct nfsd4_layoutget *lgp)
{
struct pnfs_ff_layout *fl = lgp->lg_content;
int len, mirror_len, ds_len, fh_len;
__be32 *p;

/*
* Unlike nfsd4_encode_user, we know these will
* always be stringified.
*/
struct ff_idmap uid;
struct ff_idmap gid;

fh_len = 4 + fl->fh.size;

uid.len = sprintf(uid.buf, "%u", from_kuid(&init_user_ns, fl->uid));
gid.len = sprintf(gid.buf, "%u", from_kgid(&init_user_ns, fl->gid));

/* 8 + len for recording the length, name, and padding */
ds_len = 20 + sizeof(stateid_opaque_t) + 4 + fh_len +
8 + uid.len + 8 + gid.len;

mirror_len = 4 + ds_len;

/* The layout segment */
len = 20 + mirror_len;

p = xdr_reserve_space(xdr, sizeof(__be32) + len);
if (!p)
return nfserr_toosmall;

*p++ = cpu_to_be32(len);
p = xdr_encode_hyper(p, 0); /* stripe unit of 1 */

*p++ = cpu_to_be32(1); /* single mirror */
*p++ = cpu_to_be32(1); /* single data server */

p = xdr_encode_opaque_fixed(p, &fl->deviceid,
sizeof(struct nfsd4_deviceid));

*p++ = cpu_to_be32(1); /* efficiency */

*p++ = cpu_to_be32(fl->stateid.si_generation);
p = xdr_encode_opaque_fixed(p, &fl->stateid.si_opaque,
sizeof(stateid_opaque_t));

*p++ = cpu_to_be32(1); /* single file handle */
p = xdr_encode_opaque(p, fl->fh.data, fl->fh.size);

p = xdr_encode_opaque(p, uid.buf, uid.len);
p = xdr_encode_opaque(p, gid.buf, gid.len);

*p++ = cpu_to_be32(fl->flags);
*p++ = cpu_to_be32(0); /* No stats collect hint */

return 0;
}

__be32
nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
struct nfsd4_getdeviceinfo *gdp)
{
struct pnfs_ff_device_addr *da = gdp->gd_device;
int len;
int ver_len;
int addr_len;
__be32 *p;

/* len + padding for two strings */
addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len;
ver_len = 20;

len = 4 + ver_len + 4 + addr_len;

p = xdr_reserve_space(xdr, len + sizeof(__be32));
if (!p)
return nfserr_resource;

/*
* Fill in the overall length and number of volumes at the beginning
* of the layout.
*/
*p++ = cpu_to_be32(len);
*p++ = cpu_to_be32(1); /* 1 netaddr */
p = xdr_encode_opaque(p, da->netaddr.netid, da->netaddr.netid_len);
p = xdr_encode_opaque(p, da->netaddr.addr, da->netaddr.addr_len);

*p++ = cpu_to_be32(1); /* 1 versions */

*p++ = cpu_to_be32(da->version);
*p++ = cpu_to_be32(da->minor_version);
*p++ = cpu_to_be32(da->rsize);
*p++ = cpu_to_be32(da->wsize);
*p++ = cpu_to_be32(da->tightly_coupled);

return 0;
}
Loading

0 comments on commit a71e360

Please sign in to comment.