Commit f72caf7e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-2.6.35' of git://linux-nfs.org/~bfields/linux

* 'for-2.6.35' of git://linux-nfs.org/~bfields/linux: (45 commits)
  Revert "nfsd4: distinguish expired from stale stateids"
  nfsd: safer initialization order in find_file()
  nfs4: minor callback code simplification, comment
  NFSD: don't report compiled-out versions as present
  nfsd4: implement reclaim_complete
  nfsd4: nfsd4_destroy_session must set callback client under the state lock
  nfsd4: keep a reference count on client while in use
  nfsd4: mark_client_expired
  nfsd4: introduce nfs4_client.cl_refcount
  nfsd4: refactor expire_client
  nfsd4: extend the client_lock to cover cl_lru
  nfsd4: use list_move in move_to_confirmed
  nfsd4: fold release_session into expire_client
  nfsd4: rename sessionid_lock to client_lock
  nfsd4: fix bare destroy_session null dereference
  nfsd4: use local variable in nfs4svc_encode_compoundres
  nfsd: further comment typos
  sunrpc: centralise most calls to svc_xprt_received
  nfsd4: fix unlikely race in session replay case
  nfsd4: fix filehandle comment
  ...
parents 6a6be470 e4e83ea4
......@@ -137,7 +137,7 @@ NS*| OPENATTR | OPT | | Section 18.17 |
| READ | REQ | | Section 18.22 |
| READDIR | REQ | | Section 18.23 |
| READLINK | OPT | | Section 18.24 |
NS | RECLAIM_COMPLETE | REQ | | Section 18.51 |
| RECLAIM_COMPLETE | REQ | | Section 18.51 |
| RELEASE_LOCKOWNER | MNI | | N/A |
| REMOVE | REQ | | Section 18.25 |
| RENAME | REQ | | Section 18.26 |
......
......@@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_cache = {
.alloc = expkey_alloc,
};
static struct svc_expkey *
svc_expkey_lookup(struct svc_expkey *item)
static int
svc_expkey_hash(struct svc_expkey *item)
{
struct cache_head *ch;
int hash = item->ek_fsidtype;
char * cp = (char*)item->ek_fsid;
int len = key_len(item->ek_fsidtype);
......@@ -270,6 +269,14 @@ svc_expkey_lookup(struct svc_expkey *item)
hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
hash &= EXPKEY_HASHMASK;
return hash;
}
static struct svc_expkey *
svc_expkey_lookup(struct svc_expkey *item)
{
struct cache_head *ch;
int hash = svc_expkey_hash(item);
ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
hash);
......@@ -283,13 +290,7 @@ static struct svc_expkey *
svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
{
struct cache_head *ch;
int hash = new->ek_fsidtype;
char * cp = (char*)new->ek_fsid;
int len = key_len(new->ek_fsidtype);
hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS);
hash &= EXPKEY_HASHMASK;
int hash = svc_expkey_hash(new);
ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
&old->h, hash);
......@@ -738,14 +739,22 @@ struct cache_detail svc_export_cache = {
.alloc = svc_export_alloc,
};
static struct svc_export *
svc_export_lookup(struct svc_export *exp)
static int
svc_export_hash(struct svc_export *exp)
{
struct cache_head *ch;
int hash;
hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS);
hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS);
return hash;
}
static struct svc_export *
svc_export_lookup(struct svc_export *exp)
{
struct cache_head *ch;
int hash = svc_export_hash(exp);
ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
hash);
......@@ -759,10 +768,7 @@ static struct svc_export *
svc_export_update(struct svc_export *new, struct svc_export *old)
{
struct cache_head *ch;
int hash;
hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS);
hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS);
int hash = svc_export_hash(old);
ch = sunrpc_cache_update(&svc_export_cache, &new->h,
&old->h,
......@@ -1071,9 +1077,9 @@ exp_export(struct nfsctl_export *nxp)
err = 0;
finish:
kfree(new.ex_pathname);
if (exp)
if (!IS_ERR_OR_NULL(exp))
exp_put(exp);
if (fsid_key && !IS_ERR(fsid_key))
if (!IS_ERR_OR_NULL(fsid_key))
cache_put(&fsid_key->h, &svc_expkey_cache);
path_put(&path);
out_put_clp:
......
......@@ -32,6 +32,7 @@
*/
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/slab.h>
#include "nfsd.h"
#include "state.h"
......@@ -79,11 +80,6 @@ enum nfs_cb_opnum4 {
cb_sequence_dec_sz + \
op_dec_sz)
struct nfs4_rpc_args {
void *args_op;
struct nfsd4_cb_sequence args_seq;
};
/*
* Generic encode routines from fs/nfs/nfs4xdr.c
*/
......@@ -428,13 +424,19 @@ static struct rpc_procinfo nfs4_cb_procedures[] = {
};
static struct rpc_version nfs_cb_version4 = {
/*
* Note on the callback rpc program version number: despite language in rfc
* 5661 section 18.36.3 requiring servers to use 4 in this field, the
* official xdr descriptions for both 4.0 and 4.1 specify version 1, and
* in practice that appears to be what implementations use. The section
* 18.36.3 language is expected to be fixed in an erratum.
*/
.number = 1,
.nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
.procs = nfs4_cb_procedures
};
static struct rpc_version * nfs_cb_version[] = {
NULL,
&nfs_cb_version4,
};
......@@ -456,15 +458,14 @@ static struct rpc_program cb_program = {
static int max_cb_time(void)
{
return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ;
return max(nfsd4_lease/10, (time_t)1) * HZ;
}
/* Reference counting, callback cleanup, etc., all look racy as heck.
* And why is cb_set an atomic? */
* And why is cl_cb_set an atomic? */
int setup_callback_client(struct nfs4_client *clp)
int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
{
struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
struct rpc_timeout timeparms = {
.to_initval = max_cb_time(),
.to_retries = 0,
......@@ -476,7 +477,7 @@ int setup_callback_client(struct nfs4_client *clp)
.timeout = &timeparms,
.program = &cb_program,
.prognumber = cb->cb_prog,
.version = nfs_cb_version[1]->number,
.version = 0,
.authflavor = clp->cl_flavor,
.flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
.client_name = clp->cl_principal,
......@@ -486,7 +487,7 @@ int setup_callback_client(struct nfs4_client *clp)
if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
return -EINVAL;
if (cb->cb_minorversion) {
args.bc_xprt = clp->cl_cb_xprt;
args.bc_xprt = cb->cb_xprt;
args.protocol = XPRT_TRANSPORT_BC_TCP;
}
/* Create RPC client */
......@@ -496,7 +497,7 @@ int setup_callback_client(struct nfs4_client *clp)
PTR_ERR(client));
return PTR_ERR(client);
}
cb->cb_client = client;
nfsd4_set_callback_client(clp, client);
return 0;
}
......@@ -514,8 +515,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
if (task->tk_status)
warn_no_callback_path(clp, task->tk_status);
else
atomic_set(&clp->cl_cb_conn.cb_set, 1);
put_nfs4_client(clp);
atomic_set(&clp->cl_cb_set, 1);
}
static const struct rpc_call_ops nfsd4_cb_probe_ops = {
......@@ -537,7 +537,6 @@ int set_callback_cred(void)
void do_probe_callback(struct nfs4_client *clp)
{
struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
struct rpc_message msg = {
.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
.rpc_argp = clp,
......@@ -545,34 +544,27 @@ void do_probe_callback(struct nfs4_client *clp)
};
int status;
status = rpc_call_async(cb->cb_client, &msg,
status = rpc_call_async(clp->cl_cb_client, &msg,
RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
&nfsd4_cb_probe_ops, (void *)clp);
if (status) {
if (status)
warn_no_callback_path(clp, status);
put_nfs4_client(clp);
}
}
/*
* Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
*/
void
nfsd4_probe_callback(struct nfs4_client *clp)
void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
{
int status;
BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set));
BUG_ON(atomic_read(&clp->cl_cb_set));
status = setup_callback_client(clp);
status = setup_callback_client(clp, cb);
if (status) {
warn_no_callback_path(clp, status);
return;
}
/* the task holds a reference to the nfs4_client struct */
atomic_inc(&clp->cl_count);
do_probe_callback(clp);
}
......@@ -658,18 +650,32 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
}
}
static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
{
struct nfs4_delegation *dp = calldata;
struct nfs4_client *clp = dp->dl_client;
struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
nfsd4_cb_done(task, calldata);
if (current_rpc_client == NULL) {
/* We're shutting down; give up. */
/* XXX: err, or is it ok just to fall through
* and rpc_restart_call? */
return;
}
switch (task->tk_status) {
case -EIO:
/* Network partition? */
atomic_set(&clp->cl_cb_conn.cb_set, 0);
atomic_set(&clp->cl_cb_set, 0);
warn_no_callback_path(clp, task->tk_status);
if (current_rpc_client != task->tk_client) {
/* queue a callback on the new connection: */
nfsd4_cb_recall(dp);
return;
}
case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID:
/* Race: client probably got cb_recall
......@@ -677,7 +683,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
break;
default:
/* success, or error we can't handle */
goto done;
return;
}
if (dp->dl_retries--) {
rpc_delay(task, 2*HZ);
......@@ -685,20 +691,16 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
rpc_restart_call(task);
return;
} else {
atomic_set(&clp->cl_cb_conn.cb_set, 0);
atomic_set(&clp->cl_cb_set, 0);
warn_no_callback_path(clp, task->tk_status);
}
done:
kfree(task->tk_msg.rpc_argp);
}
static void nfsd4_cb_recall_release(void *calldata)
{
struct nfs4_delegation *dp = calldata;
struct nfs4_client *clp = dp->dl_client;
nfs4_put_delegation(dp);
put_nfs4_client(clp);
}
static const struct rpc_call_ops nfsd4_cb_recall_ops = {
......@@ -707,33 +709,75 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
.rpc_release = nfsd4_cb_recall_release,
};
static struct workqueue_struct *callback_wq;
int nfsd4_create_callback_queue(void)
{
callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
if (!callback_wq)
return -ENOMEM;
return 0;
}
void nfsd4_destroy_callback_queue(void)
{
destroy_workqueue(callback_wq);
}
/* must be called under the state lock */
void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new)
{
struct rpc_clnt *old = clp->cl_cb_client;
clp->cl_cb_client = new;
/*
* After this, any work that saw the old value of cl_cb_client will
* be gone:
*/
flush_workqueue(callback_wq);
/* So we can safely shut it down: */
if (old)
rpc_shutdown_client(old);
}
/*
* called with dp->dl_count inc'ed.
*/
void
nfsd4_cb_recall(struct nfs4_delegation *dp)
static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
{
struct nfs4_client *clp = dp->dl_client;
struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
struct nfs4_rpc_args *args;
struct rpc_clnt *clnt = clp->cl_cb_client;
struct nfs4_rpc_args *args = &dp->dl_recall.cb_args;
struct rpc_message msg = {
.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
.rpc_cred = callback_cred
};
int status = -ENOMEM;
int status;
if (clnt == NULL)
return; /* Client is shutting down; give up. */
args = kzalloc(sizeof(*args), GFP_KERNEL);
if (!args)
goto out;
args->args_op = dp;
msg.rpc_argp = args;
dp->dl_retries = 1;
status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
&nfsd4_cb_recall_ops, dp);
out:
if (status) {
kfree(args);
put_nfs4_client(clp);
if (status)
nfs4_put_delegation(dp);
}
}
void nfsd4_do_callback_rpc(struct work_struct *w)
{
/* XXX: for now, just send off delegation recall. */
/* In future, generalize to handle any sort of callback. */
struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work);
struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
_nfsd4_cb_recall(dp);
}
void nfsd4_cb_recall(struct nfs4_delegation *dp)
{
queue_work(callback_wq, &dp->dl_recall.cb_work);
}
......@@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[];
static const char *nfsd4_op_name(unsigned opnum);
/*
* Enforce NFSv4.1 COMPOUND ordering rules.
* Enforce NFSv4.1 COMPOUND ordering rules:
*
* TODO:
* - enforce NFS4ERR_NOT_ONLY_OP,
* - DESTROY_SESSION MUST be the final operation in the COMPOUND request.
* Also note, enforced elsewhere:
* - SEQUENCE other than as first op results in
* NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().)
* - BIND_CONN_TO_SESSION must be the only op in its compound
* (Will be enforced in nfsd4_bind_conn_to_session().)
* - DESTROY_SESSION must be the final operation in a compound, if
* sessionid's in SEQUENCE and DESTROY_SESSION are the same.
* (Enforced in nfsd4_destroy_session().)
*/
static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args)
static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
{
if (args->minorversion && args->opcnt > 0) {
struct nfsd4_op *op = &args->ops[0];
return (op->status == nfserr_op_illegal) ||
(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP);
}
return true;
struct nfsd4_op *op = &args->ops[0];
/* These ordering requirements don't apply to NFSv4.0: */
if (args->minorversion == 0)
return nfs_ok;
/* This is weird, but OK, not our problem: */
if (args->opcnt == 0)
return nfs_ok;
if (op->status == nfserr_op_illegal)
return nfs_ok;
if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
return nfserr_op_not_in_session;
if (op->opnum == OP_SEQUENCE)
return nfs_ok;
if (args->opcnt != 1)
return nfserr_not_only_op;
return nfs_ok;
}
/*
......@@ -1012,6 +1028,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
resp->rqstp = rqstp;
resp->cstate.minorversion = args->minorversion;
resp->cstate.replay_owner = NULL;
resp->cstate.session = NULL;
fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
/* Use the deferral mechanism only for NFSv4.0 compounds */
......@@ -1024,13 +1041,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
if (args->minorversion > nfsd_supported_minorversion)
goto out;
if (!nfs41_op_ordering_ok(args)) {
status = nfs41_check_op_ordering(args);
if (status) {
op = &args->ops[0];
op->status = nfserr_sequence_pos;
op->status = status;
goto encode_op;
}
status = nfs_ok;
while (!status && resp->opcnt < args->opcnt) {
op = &args->ops[resp->opcnt++];
......@@ -1295,6 +1312,11 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
.op_name = "OP_SEQUENCE",
},
[OP_RECLAIM_COMPLETE] = {
.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_RECLAIM_COMPLETE",
},
};
static const char *nfsd4_op_name(unsigned opnum)
......
This diff is collapsed.
......@@ -1234,6 +1234,16 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
DECODE_TAIL;
}
static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc)
{
DECODE_HEAD;
READ_BUF(4);
READ32(rc->rca_one_fs);
DECODE_TAIL;
}
static __be32
nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
{
......@@ -1346,7 +1356,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
[OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
};
struct nfsd4_minorversion_ops {
......@@ -1900,7 +1910,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
if ((buflen -= 4) < 0)
goto out_resource;
WRITE32(NFSD_LEASE_TIME);
WRITE32(nfsd4_lease);
}
if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
if ((buflen -= 4) < 0)
......@@ -3307,11 +3317,14 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
iov = &rqstp->rq_res.head[0];
iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
BUG_ON(iov->iov_len > PAGE_SIZE);
if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) {
nfsd4_store_cache_entry(resp);
dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
resp->cstate.slot->sl_inuse = false;
nfsd4_put_session(resp->cstate.session);
if (nfsd4_has_session(cs)) {
if (cs->status != nfserr_replay_cache) {
nfsd4_store_cache_entry(resp);
dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
cs->slot->sl_inuse = false;
}
/* Renew the clientid on success and on replay */
release_session_client(cs->session);
}
return 1;
}
......
......@@ -46,6 +46,7 @@ enum {
*/
#ifdef CONFIG_NFSD_V4
NFSD_Leasetime,
NFSD_Gracetime,
NFSD_RecoveryDir,
#endif
};
......@@ -70,6 +71,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size);
static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
#ifdef CONFIG_NFSD_V4
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
#endif
......@@ -91,6 +93,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_MaxBlkSize] = write_maxblksize,
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = write_leasetime,
[NFSD_Gracetime] = write_gracetime,
[NFSD_RecoveryDir] = write_recoverydir,
#endif
};
......@@ -1204,29 +1207,45 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
}
#ifdef CONFIG_NFSD_V4
extern time_t nfs4_leasetime(void);
static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
{
/* if size > 10 seconds, call
* nfs4_reset_lease() then write out the new lease (seconds) as reply
*/
char *mesg = buf;
int rv, lease;
int rv, i;
if (size > 0) {
if (nfsd_serv)
return -EBUSY;
rv = get_int(&mesg, &lease);
rv = get_int(&mesg, &i);
if (rv)
return rv;
if (lease < 10 || lease > 3600)
/*
* Some sanity checking. We don't have a reason for
* these particular numbers, but problems with the
* extremes are:
* - Too short: the briefest network outage may
* cause clients to lose all their locks. Also,
* the frequent polling may be wasteful.
* - Too long: do you really want reboot recovery
* to take more than an hour? Or to make other
* clients wait an hour before being able to
* revoke a dead client's locks?
*/
if (i < 10 || i > 3600)
return -EINVAL;
nfs4_reset_lease(lease);
*time = i;
}
return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n",
nfs4_lease_time());
return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
}
static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
{
ssize_t rv;
mutex_lock(&nfsd_mutex);
rv = __nfsd4_write_time(file, buf, size, time);
mutex_unlock(&nfsd_mutex);
return rv;
}
/**
......@@ -1252,12 +1271,22 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
*/
static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
{
ssize_t rv;
return nfsd4_write_time(file, buf, size, &nfsd4_lease);
}
mutex_lock(&nfsd_mutex);
rv = __write_leasetime(file, buf, size);
mutex_unlock(&nfsd_mutex);