Commit 6dea0737 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-3.18' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Highlights:

   - support the NFSv4.2 SEEK operation (allowing clients to support
     SEEK_HOLE/SEEK_DATA), thanks to Anna.
   - end the grace period early in a number of cases, mitigating a
     long-standing annoyance, thanks to Jeff
   - improve SMP scalability, thanks to Trond"

* 'for-3.18' of git://linux-nfs.org/~bfields/linux: (55 commits)
  nfsd: eliminate "to_delegation" define
  NFSD: Implement SEEK
  NFSD: Add generic v4.2 infrastructure
  svcrdma: advertise the correct max payload
  nfsd: introduce nfsd4_callback_ops
  nfsd: split nfsd4_callback initialization and use
  nfsd: introduce a generic nfsd4_cb
  nfsd: remove nfsd4_callback.cb_op
  nfsd: do not clear rpc_resp in nfsd4_cb_done_sequence
  nfsd: fix nfsd4_cb_recall_done error handling
  nfsd4: clarify how grace period ends
  nfsd4: stop grace_time update at end of grace period
  nfsd: skip subsequent UMH "create" operations after the first one for v4.0 clients
  nfsd: set and test NFSD4_CLIENT_STABLE bit to reduce nfsdcltrack upcalls
  nfsd: serialize nfsdcltrack upcalls for a particular client
  nfsd: pass extra info in env vars to upcalls to allow for early grace period end
  nfsd: add a v4_end_grace file to /proc/fs/nfsd
  lockd: add a /proc/fs/lockd/nlm_end_grace file
  nfsd: reject reclaim request when client has already sent RECLAIM_COMPLETE
  nfsd: remove redundant boot_time parm from grace_done client tracking op
  ...
parents 25641c0c 34549ab0
......@@ -233,9 +233,13 @@ if NETWORK_FILESYSTEMS
source "fs/nfs/Kconfig"
source "fs/nfsd/Kconfig"
config GRACE_PERIOD
tristate
config LOCKD
tristate
depends on FILE_LOCKING
select GRACE_PERIOD
config LOCKD_V4
bool
......@@ -249,7 +253,7 @@ config NFS_ACL_SUPPORT
config NFS_COMMON
bool
depends on NFSD || NFS_FS
depends on NFSD || NFS_FS || LOCKD
default y
source "net/sunrpc/Kconfig"
......
......@@ -5,6 +5,7 @@
obj-$(CONFIG_LOCKD) += lockd.o
lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o
svcshare.o svcproc.o svcsubs.o mon.o xdr.o
lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
lockd-objs-$(CONFIG_PROC_FS) += procfs.o
lockd-objs := $(lockd-objs-y)
......@@ -11,7 +11,6 @@ struct lockd_net {
struct delayed_work grace_period_end;
struct lock_manager lockd_manager;
struct list_head grace_list;
spinlock_t nsm_clnt_lock;
unsigned int nsm_users;
......
/*
* Procfs support for lockd
*
* Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com>
*/
#include <linux/fs.h>
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/nsproxy.h>
#include <net/net_namespace.h>
#include "netns.h"
#include "procfs.h"
/*
* We only allow strings that start with 'Y', 'y', or '1'.
*/
static ssize_t
nlm_end_grace_write(struct file *file, const char __user *buf, size_t size,
loff_t *pos)
{
char *data;
struct lockd_net *ln = net_generic(current->nsproxy->net_ns,
lockd_net_id);
if (size < 1)
return -EINVAL;
data = simple_transaction_get(file, buf, size);
if (IS_ERR(data))
return PTR_ERR(data);
switch(data[0]) {
case 'Y':
case 'y':
case '1':
locks_end_grace(&ln->lockd_manager);
break;
default:
return -EINVAL;
}
return size;
}
static ssize_t
nlm_end_grace_read(struct file *file, char __user *buf, size_t size,
loff_t *pos)
{
struct lockd_net *ln = net_generic(current->nsproxy->net_ns,
lockd_net_id);
char resp[3];
resp[0] = list_empty(&ln->lockd_manager.list) ? 'Y' : 'N';
resp[1] = '\n';
resp[2] = '\0';
return simple_read_from_buffer(buf, size, pos, resp, sizeof(resp));
}
static const struct file_operations lockd_end_grace_operations = {
.write = nlm_end_grace_write,
.read = nlm_end_grace_read,
.llseek = default_llseek,
.release = simple_transaction_release,
.owner = THIS_MODULE,
};
int __init
lockd_create_procfs(void)
{
struct proc_dir_entry *entry;
entry = proc_mkdir("fs/lockd", NULL);
if (!entry)
return -ENOMEM;
entry = proc_create("nlm_end_grace", S_IRUGO|S_IWUSR, entry,
&lockd_end_grace_operations);
if (!entry) {
remove_proc_entry("fs/lockd", NULL);
return -ENOMEM;
}
return 0;
}
void __exit
lockd_remove_procfs(void)
{
remove_proc_entry("fs/lockd/nlm_end_grace", NULL);
remove_proc_entry("fs/lockd", NULL);
}
/*
* Procfs support for lockd
*
* Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com>
*/
#ifndef _LOCKD_PROCFS_H
#define _LOCKD_PROCFS_H
#include <linux/kconfig.h>
#if IS_ENABLED(CONFIG_PROC_FS)
int lockd_create_procfs(void);
void lockd_remove_procfs(void);
#else
static inline int
lockd_create_procfs(void)
{
return 0;
}
static inline void
lockd_remove_procfs(void)
{
return;
}
#endif /* IS_ENABLED(CONFIG_PROC_FS) */
#endif /* _LOCKD_PROCFS_H */
......@@ -36,6 +36,7 @@
#include <linux/nfs.h>
#include "netns.h"
#include "procfs.h"
#define NLMDBG_FACILITY NLMDBG_SVC
#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE)
......@@ -304,13 +305,16 @@ static int lockd_start_svc(struct svc_serv *serv)
svc_sock_update_bufs(serv);
serv->sv_maxconn = nlm_max_connections;
nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name);
nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name);
if (IS_ERR(nlmsvc_task)) {
error = PTR_ERR(nlmsvc_task);
printk(KERN_WARNING
"lockd_up: kthread_run failed, error=%d\n", error);
goto out_task;
}
nlmsvc_rqst->rq_task = nlmsvc_task;
wake_up_process(nlmsvc_task);
dprintk("lockd_up: service started\n");
return 0;
......@@ -581,7 +585,7 @@ static int lockd_init_net(struct net *net)
struct lockd_net *ln = net_generic(net, lockd_net_id);
INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
INIT_LIST_HEAD(&ln->grace_list);
INIT_LIST_HEAD(&ln->lockd_manager.list);
spin_lock_init(&ln->nsm_clnt_lock);
return 0;
}
......@@ -615,8 +619,15 @@ static int __init init_nlm(void)
err = register_pernet_subsys(&lockd_net_ops);
if (err)
goto err_pernet;
err = lockd_create_procfs();
if (err)
goto err_procfs;
return 0;
err_procfs:
unregister_pernet_subsys(&lockd_net_ops);
err_pernet:
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nlm_sysctl_table);
......@@ -629,6 +640,7 @@ static void __exit exit_nlm(void)
{
/* FIXME: delete all NLM clients */
nlm_shutdown_hosts();
lockd_remove_procfs();
unregister_pernet_subsys(&lockd_net_ops);
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nlm_sysctl_table);
......
......@@ -235,7 +235,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
cb_info->serv = serv;
cb_info->rqst = rqstp;
cb_info->task = kthread_run(callback_svc, cb_info->rqst,
cb_info->task = kthread_create(callback_svc, cb_info->rqst,
"nfsv4.%u-svc", minorversion);
if (IS_ERR(cb_info->task)) {
ret = PTR_ERR(cb_info->task);
......@@ -244,6 +244,8 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
cb_info->task = NULL;
return ret;
}
rqstp->rq_task = cb_info->task;
wake_up_process(cb_info->task);
dprintk("nfs_callback_up: service started\n");
return 0;
}
......
......@@ -3,5 +3,6 @@
#
obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o
nfs_acl-objs := nfsacl.o
obj-$(CONFIG_GRACE_PERIOD) += grace.o
/*
* Common code for control of lockd and nfsv4 grace periods.
*
* Transplanted from lockd code
*/
#include <linux/module.h>
#include <linux/lockd/bind.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/fs.h>
#include "netns.h"
static int grace_net_id;
static DEFINE_SPINLOCK(grace_lock);
/**
* locks_start_grace
* @net: net namespace that this lock manager belongs to
* @lm: who this grace period is for
*
* A grace period is a period during which locks should not be given
......@@ -21,18 +24,20 @@ static DEFINE_SPINLOCK(grace_lock);
*
* This function is called to start a grace period.
*/
void locks_start_grace(struct net *net, struct lock_manager *lm)
void
locks_start_grace(struct net *net, struct lock_manager *lm)
{
struct lockd_net *ln = net_generic(net, lockd_net_id);
struct list_head *grace_list = net_generic(net, grace_net_id);
spin_lock(&grace_lock);
list_add(&lm->list, &ln->grace_list);
list_add(&lm->list, grace_list);
spin_unlock(&grace_lock);
}
EXPORT_SYMBOL_GPL(locks_start_grace);
/**
* locks_end_grace
* @net: net namespace that this lock manager belongs to
* @lm: who this grace period is for
*
* Call this function to state that the given lock manager is ready to
......@@ -41,7 +46,8 @@ EXPORT_SYMBOL_GPL(locks_start_grace);
* Note that callers count on it being safe to call this more than once,
* and the second call should be a no-op.
*/
void locks_end_grace(struct lock_manager *lm)
void
locks_end_grace(struct lock_manager *lm)
{
spin_lock(&grace_lock);
list_del_init(&lm->list);
......@@ -56,10 +62,52 @@ EXPORT_SYMBOL_GPL(locks_end_grace);
* to answer ordinary lock requests, and when they should accept only
* lock reclaims.
*/
int locks_in_grace(struct net *net)
int
locks_in_grace(struct net *net)
{
struct lockd_net *ln = net_generic(net, lockd_net_id);
struct list_head *grace_list = net_generic(net, grace_net_id);
return !list_empty(&ln->grace_list);
return !list_empty(grace_list);
}
EXPORT_SYMBOL_GPL(locks_in_grace);
static int __net_init
grace_init_net(struct net *net)
{
struct list_head *grace_list = net_generic(net, grace_net_id);
INIT_LIST_HEAD(grace_list);
return 0;
}
static void __net_exit
grace_exit_net(struct net *net)
{
struct list_head *grace_list = net_generic(net, grace_net_id);
BUG_ON(!list_empty(grace_list));
}
static struct pernet_operations grace_net_ops = {
.init = grace_init_net,
.exit = grace_exit_net,
.id = &grace_net_id,
.size = sizeof(struct list_head),
};
static int __init
init_grace(void)
{
return register_pernet_subsys(&grace_net_ops);
}
static void __exit
exit_grace(void)
{
unregister_pernet_subsys(&grace_net_ops);
}
MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>");
MODULE_LICENSE("GPL");
module_init(init_grace)
module_exit(exit_grace)
......@@ -71,6 +71,7 @@ config NFSD_V4
select FS_POSIX_ACL
select SUNRPC_GSS
select CRYPTO
select GRACE_PERIOD
help
This option enables support in your system's NFS server for
version 4 of the NFS protocol (RFC 3530).
......@@ -94,9 +95,6 @@ config NFSD_V4_SECURITY_LABEL
If you do not wish to enable fine-grained security labels SELinux or
Smack policies on NFSv4 files, say N.
WARNING: there is still a chance of backwards-incompatible protocol changes.
For now we recommend "Y" only for developers and testers.
config NFSD_FAULT_INJECTION
bool "NFS server manual fault injection"
depends on NFSD_V4 && DEBUG_KERNEL
......
......@@ -18,7 +18,6 @@
* is much larger than a sockaddr_in6.
*/
struct svc_cacherep {
struct hlist_node c_hash;
struct list_head c_lru;
unsigned char c_state, /* unused, inprog, done */
......
......@@ -1145,6 +1145,7 @@ static struct flags {
{ NFSEXP_ALLSQUASH, {"all_squash", ""}},
{ NFSEXP_ASYNC, {"async", "sync"}},
{ NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}},
{ NFSEXP_NOREADDIRPLUS, {"nordirplus", ""}},
{ NFSEXP_NOHIDE, {"nohide", ""}},
{ NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
......
......@@ -223,11 +223,6 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
attr = &argp->attrs;
/* Get the directory inode */
nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
if (nfserr)
RETURN_STATUS(nfserr);
/* Unfudge the mode bits */
attr->ia_mode &= ~S_IFMT;
if (!(attr->ia_valid & ATTR_MODE)) {
......@@ -471,6 +466,14 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
resp->buflen = resp->count;
resp->rqstp = rqstp;
offset = argp->cookie;
nfserr = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP);
if (nfserr)
RETURN_STATUS(nfserr);
if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS)
RETURN_STATUS(nfserr_notsupp);
nfserr = nfsd_readdir(rqstp, &resp->fh,
&offset,
&resp->common,
......
......@@ -49,12 +49,6 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
/* Index of predefined Linux callback client operations */
enum {
NFSPROC4_CLNT_CB_NULL = 0,
NFSPROC4_CLNT_CB_RECALL,
NFSPROC4_CLNT_CB_SEQUENCE,
};
struct nfs4_cb_compound_hdr {
/* args */
u32 ident; /* minorversion 0 only */
......@@ -494,7 +488,7 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
const struct nfsd4_callback *cb)
{
const struct nfs4_delegation *args = cb->cb_op;
const struct nfs4_delegation *dp = cb_to_delegation(cb);
struct nfs4_cb_compound_hdr hdr = {
.ident = cb->cb_clp->cl_cb_ident,
.minorversion = cb->cb_minorversion,
......@@ -502,7 +496,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_cb_compound4args(xdr, &hdr);
encode_cb_sequence4args(xdr, cb, &hdr);
encode_cb_recall4args(xdr, args, &hdr);
encode_cb_recall4args(xdr, dp, &hdr);
encode_cb_nops(&hdr);
}
......@@ -746,27 +740,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
static struct workqueue_struct *callback_wq;
static void run_nfsd4_cb(struct nfsd4_callback *cb)
{
queue_work(callback_wq, &cb->cb_work);
}
static void do_probe_callback(struct nfs4_client *clp)
{
struct nfsd4_callback *cb = &clp->cl_cb_null;
cb->cb_op = NULL;
cb->cb_clp = clp;
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
cb->cb_msg.rpc_argp = NULL;
cb->cb_msg.rpc_resp = NULL;
cb->cb_ops = &nfsd4_cb_probe_ops;
run_nfsd4_cb(cb);
}
/*
* Poke the callback thread to process any updates to the callback
* parameters, and send a null probe.
......@@ -775,7 +748,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp)
{
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
do_probe_callback(clp);
nfsd4_run_cb(&clp->cl_cb_null);
}
void nfsd4_probe_callback_sync(struct nfs4_client *clp)
......@@ -847,23 +820,9 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr);
/* We're done looking into the sequence information */
task->tk_msg.rpc_resp = NULL;
}
}
static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
{
struct nfsd4_callback *cb = calldata;
struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
nfsd4_cb_done(task, calldata);
if (current_rpc_client != task->tk_client) {
if (clp->cl_cb_client != task->tk_client) {
/* We're shutting down or changing cl_cb_client; leave
* it to nfsd4_process_cb_update to restart the call if
* necessary. */
......@@ -872,47 +831,42 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
if (cb->cb_done)
return;
switch (task->tk_status) {
switch (cb->cb_ops->done(cb, task)) {
case 0:
cb->cb_done = true;
task->tk_status = 0;
rpc_restart_call_prepare(task);
return;
case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID:
/* Race: client probably got cb_recall
* before open reply granting delegation */
case 1:
break;
default:
case -1:
/* Network partition? */
nfsd4_mark_cb_down(clp, task->tk_status);
break;
default:
BUG();
}
if (dp->dl_retries--) {
rpc_delay(task, 2*HZ);
task->tk_status = 0;
rpc_restart_call_prepare(task);
return;
}
nfsd4_mark_cb_down(clp, task->tk_status);
cb->cb_done = true;
}
static void nfsd4_cb_recall_release(void *calldata)
static void nfsd4_cb_release(void *calldata)
{
struct nfsd4_callback *cb = calldata;
struct nfs4_client *clp = cb->cb_clp;
struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
if (cb->cb_done) {
spin_lock(&clp->cl_lock);
list_del(&cb->cb_per_client);
spin_unlock(&clp->cl_lock);
nfs4_put_stid(&dp->dl_stid);
cb->cb_ops->release(cb);
}
}
static const struct rpc_call_ops nfsd4_cb_recall_ops = {
static const struct rpc_call_ops nfsd4_cb_ops = {
.rpc_call_prepare = nfsd4_cb_prepare,
.rpc_call_done = nfsd4_cb_recall_done,
.rpc_release = nfsd4_cb_recall_release,
.rpc_call_done = nfsd4_cb_done,
.rpc_release = nfsd4_cb_release,
};
int nfsd4_create_callback_queue(void)
......@@ -937,16 +891,10 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
* instead, nfsd4_run_cb_null() will detect the killed
* client, destroy the rpc client, and stop:
*/
do_probe_callback(clp);
nfsd4_run_cb(&clp->cl_cb_null);
flush_workqueue(callback_wq);
}
static void nfsd4_release_cb(struct nfsd4_callback *cb)
{
if (cb->cb_ops->rpc_release)
cb->cb_ops->rpc_release(cb);
}
/* requires cl_lock: */
static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
{
......@@ -1009,63 +957,49 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
}
/* Yay, the callback channel's back! Restart any callbacks: */
list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
run_nfsd4_cb(cb);
queue_work(callback_wq, &cb->cb_work);
}
static void
nfsd4_run_callback_rpc(struct nfsd4_callback *cb)
nfsd4_run_cb_work(struct work_struct *work)
{
struct nfsd4_callback *cb =
container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
if (cb->cb_ops && cb->cb_ops->prepare)
cb->cb_ops->prepare(cb);
if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
if (!clnt) {
/* Callback channel broken, or client killed; give up: */
nfsd4_release_cb(cb);
if (cb->cb_ops && cb->cb_ops->release)
cb->cb_ops->release(cb);
return;
}