vfs.c 53.7 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#define MSNFS	/* HACK HACK */
/*
 * File operations used by nfsd. Some of these have been ripped from
 * other parts of the kernel because they weren't exported, others
 * are partial duplicates with added or changed functionality.
 *
 * Note that several functions dget() the dentry upon which they want
 * to act, most notably those that create directory entries. Response
 * dentry's are dput()'d if necessary in the release callback.
 * So if you notice code paths that apparently fail to dput() the
 * dentry, don't worry--they have been taken care of.
 *
 * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
 */

#include <linux/fs.h>
#include <linux/file.h>
19
#include <linux/splice.h>
Linus Torvalds's avatar
Linus Torvalds committed
20
21
22
23
#include <linux/fcntl.h>
#include <linux/namei.h>
#include <linux/delay.h>
#include <linux/quotaops.h>
Robert Love's avatar
Robert Love committed
24
#include <linux/fsnotify.h>
Linus Torvalds's avatar
Linus Torvalds committed
25
26
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
27
28
29
30
31
32
33
34
#include <linux/jhash.h>
#include <linux/ima.h>
#include <asm/uaccess.h>

#ifdef CONFIG_NFSD_V3
#include "xdr3.h"
#endif /* CONFIG_NFSD_V3 */

35
#ifdef CONFIG_NFSD_V4
Linus Torvalds's avatar
Linus Torvalds committed
36
37
38
39
#include <linux/nfs4_acl.h>
#include <linux/nfsd_idmap.h>
#endif /* CONFIG_NFSD_V4 */

40
41
#include "nfsd.h"
#include "vfs.h"
Linus Torvalds's avatar
Linus Torvalds committed
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

#define NFSDDBG_FACILITY		NFSDDBG_FILEOP


/*
 * This is a cache of readahead params that help us choose the proper
 * readahead strategy. Initially, we set all readahead parameters to 0
 * and let the VFS handle things.
 * If you increase the number of cached files very much, you'll need to
 * add a hash table here.
 */
struct raparms {
	struct raparms		*p_next;
	unsigned int		p_count;
	ino_t			p_ino;
	dev_t			p_dev;
	int			p_set;
	struct file_ra_state	p_ra;
60
	unsigned int		p_hindex;
Linus Torvalds's avatar
Linus Torvalds committed
61
62
};

63
64
65
66
67
68
69
70
71
struct raparm_hbucket {
	struct raparms		*pb_head;
	spinlock_t		pb_lock;
} ____cacheline_aligned_in_smp;

#define RAPARM_HASH_BITS	4
#define RAPARM_HASH_SIZE	(1<<RAPARM_HASH_BITS)
#define RAPARM_HASH_MASK	(RAPARM_HASH_SIZE-1)
static struct raparm_hbucket	raparm_hash[RAPARM_HASH_SIZE];
Linus Torvalds's avatar
Linus Torvalds committed
72
73
74
75

/* 
 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 
 * a mount point.
76
 * Returns -EAGAIN or -ETIMEDOUT leaving *dpp and *expp unchanged,
Linus Torvalds's avatar
Linus Torvalds committed
77
78
79
80
81
82
83
84
 *  or nfs_ok having possibly changed *dpp and *expp
 */
int
nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, 
		        struct svc_export **expp)
{
	struct svc_export *exp = *expp, *exp2 = NULL;
	struct dentry *dentry = *dpp;
Al Viro's avatar
Al Viro committed
85
86
	struct path path = {.mnt = mntget(exp->ex_path.mnt),
			    .dentry = dget(dentry)};
87
	int err = 0;
Linus Torvalds's avatar
Linus Torvalds committed
88

Al Viro's avatar
Al Viro committed
89
	while (d_mountpoint(path.dentry) && follow_down(&path))
Al Viro's avatar
Al Viro committed
90
		;
Linus Torvalds's avatar
Linus Torvalds committed
91

Al Viro's avatar
Al Viro committed
92
	exp2 = rqst_exp_get_by_name(rqstp, &path);
Linus Torvalds's avatar
Linus Torvalds committed
93
	if (IS_ERR(exp2)) {
94
95
96
97
98
99
100
101
102
103
		err = PTR_ERR(exp2);
		/*
		 * We normally allow NFS clients to continue
		 * "underneath" a mountpoint that is not exported.
		 * The exception is V4ROOT, where no traversal is ever
		 * allowed without an explicit export of the new
		 * directory.
		 */
		if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
			err = 0;
Al Viro's avatar
Al Viro committed
104
		path_put(&path);
Linus Torvalds's avatar
Linus Torvalds committed
105
106
		goto out;
	}
107
108
	if (nfsd_v4client(rqstp) ||
		(exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
Linus Torvalds's avatar
Linus Torvalds committed
109
		/* successfully crossed mount point */
Al Viro's avatar
Al Viro committed
110
		/*
Al Viro's avatar
Al Viro committed
111
112
113
114
		 * This is subtle: path.dentry is *not* on path.mnt
		 * at this point.  The only reason we are safe is that
		 * original mnt is pinned down by exp, so we should
		 * put path *before* putting exp
Al Viro's avatar
Al Viro committed
115
		 */
Al Viro's avatar
Al Viro committed
116
117
		*dpp = path.dentry;
		path.dentry = dentry;
Al Viro's avatar
Al Viro committed
118
		*expp = exp2;
Al Viro's avatar
Al Viro committed
119
		exp2 = exp;
Linus Torvalds's avatar
Linus Torvalds committed
120
	}
Al Viro's avatar
Al Viro committed
121
122
	path_put(&path);
	exp_put(exp2);
Linus Torvalds's avatar
Linus Torvalds committed
123
124
125
126
out:
	return err;
}

127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
static void follow_to_parent(struct path *path)
{
	struct dentry *dp;

	while (path->dentry == path->mnt->mnt_root && follow_up(path))
		;
	dp = dget_parent(path->dentry);
	dput(path->dentry);
	path->dentry = dp;
}

static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp)
{
	struct svc_export *exp2;
	struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
			    .dentry = dget(dparent)};

	follow_to_parent(&path);

	exp2 = rqst_exp_parent(rqstp, &path);
	if (PTR_ERR(exp2) == -ENOENT) {
		*dentryp = dget(dparent);
	} else if (IS_ERR(exp2)) {
		path_put(&path);
		return PTR_ERR(exp2);
	} else {
		*dentryp = dget(path.dentry);
		exp_put(*exp);
		*exp = exp2;
	}
	path_put(&path);
	return 0;
}

161
162
163
164
/*
 * For nfsd purposes, we treat V4ROOT exports as though there was an
 * export at *every* directory.
 */
165
int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
166
167
168
169
170
171
172
173
{
	if (d_mountpoint(dentry))
		return 1;
	if (!(exp->ex_flags & NFSEXP_V4ROOT))
		return 0;
	return dentry->d_inode != NULL;
}

174
__be32
175
nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
176
		   const char *name, unsigned int len,
177
		   struct svc_export **exp_ret, struct dentry **dentry_ret)
Linus Torvalds's avatar
Linus Torvalds committed
178
179
180
181
{
	struct svc_export	*exp;
	struct dentry		*dparent;
	struct dentry		*dentry;
182
183
	__be32			err;
	int			host_err;
Linus Torvalds's avatar
Linus Torvalds committed
184
185
186
187

	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);

	/* Obtain dentry and export. */
Miklos Szeredi's avatar
Miklos Szeredi committed
188
	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
Linus Torvalds's avatar
Linus Torvalds committed
189
190
191
192
193
194
195
196
197
198
199
	if (err)
		return err;

	dparent = fhp->fh_dentry;
	exp  = fhp->fh_export;
	exp_get(exp);

	/* Lookup the name, but don't follow links */
	if (isdotent(name, len)) {
		if (len==1)
			dentry = dget(dparent);
200
		else if (dparent != exp->ex_path.dentry)
Linus Torvalds's avatar
Linus Torvalds committed
201
			dentry = dget_parent(dparent);
202
		else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
Linus Torvalds's avatar
Linus Torvalds committed
203
204
205
			dentry = dget(dparent); /* .. == . just like at / */
		else {
			/* checking mountpoint crossing is very different when stepping up */
206
207
			host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
			if (host_err)
Linus Torvalds's avatar
Linus Torvalds committed
208
209
210
211
212
				goto out_nfserr;
		}
	} else {
		fh_lock(fhp);
		dentry = lookup_one_len(name, dparent, len);
213
		host_err = PTR_ERR(dentry);
Linus Torvalds's avatar
Linus Torvalds committed
214
215
216
217
218
		if (IS_ERR(dentry))
			goto out_nfserr;
		/*
		 * check if we have crossed a mount point ...
		 */
219
		if (nfsd_mountpoint(dentry, exp)) {
220
			if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
Linus Torvalds's avatar
Linus Torvalds committed
221
222
223
224
225
				dput(dentry);
				goto out_nfserr;
			}
		}
	}
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
	*dentry_ret = dentry;
	*exp_ret = exp;
	return 0;

out_nfserr:
	exp_put(exp);
	return nfserrno(host_err);
}

/*
 * Look up one component of a pathname.
 * N.B. After this call _both_ fhp and resfh need an fh_put
 *
 * If the lookup would cross a mountpoint, and the mounted filesystem
 * is exported to the client with NFSEXP_NOHIDE, then the lookup is
 * accepted as it stands and the mounted directory is
 * returned. Otherwise the covered directory is returned.
 * NOTE: this mountpoint crossing is not supported properly by all
 *   clients and is explicitly disallowed for NFSv3
 *      NeilBrown <neilb@cse.unsw.edu.au>
 */
__be32
nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
249
				unsigned int len, struct svc_fh *resfh)
250
251
252
253
254
255
256
257
{
	struct svc_export	*exp;
	struct dentry		*dentry;
	__be32 err;

	err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
	if (err)
		return err;
258
259
260
	err = check_nfsd_access(exp, rqstp);
	if (err)
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
261
262
263
264
265
266
267
	/*
	 * Note: we compose the file handle now, but as the
	 * dentry may be negative, it may need to be updated.
	 */
	err = fh_compose(resfh, exp, dentry, fhp);
	if (!err && !dentry->d_inode)
		err = nfserr_noent;
268
out:
Linus Torvalds's avatar
Linus Torvalds committed
269
270
271
272
273
	dput(dentry);
	exp_put(exp);
	return err;
}

274

Linus Torvalds's avatar
Linus Torvalds committed
275
276
277
278
/*
 * Set various file attributes.
 * N.B. After this call fhp needs an fh_put
 */
279
__be32
Linus Torvalds's avatar
Linus Torvalds committed
280
281
282
283
284
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
	     int check_guard, time_t guardtime)
{
	struct dentry	*dentry;
	struct inode	*inode;
Miklos Szeredi's avatar
Miklos Szeredi committed
285
	int		accmode = NFSD_MAY_SATTR;
Linus Torvalds's avatar
Linus Torvalds committed
286
	int		ftype = 0;
287
288
	__be32		err;
	int		host_err;
Linus Torvalds's avatar
Linus Torvalds committed
289
290
291
	int		size_change = 0;

	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
Miklos Szeredi's avatar
Miklos Szeredi committed
292
		accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
Linus Torvalds's avatar
Linus Torvalds committed
293
294
295
296
297
	if (iap->ia_valid & ATTR_SIZE)
		ftype = S_IFREG;

	/* Get inode */
	err = fh_verify(rqstp, fhp, ftype, accmode);
298
	if (err)
Linus Torvalds's avatar
Linus Torvalds committed
299
300
301
302
303
		goto out;

	dentry = fhp->fh_dentry;
	inode = dentry->d_inode;

304
305
306
307
308
309
310
	/* Ignore any mode updates on symlinks */
	if (S_ISLNK(inode->i_mode))
		iap->ia_valid &= ~ATTR_MODE;

	if (!iap->ia_valid)
		goto out;

311
312
	/*
	 * NFSv2 does not differentiate between "set-[ac]time-to-now"
Linus Torvalds's avatar
Linus Torvalds committed
313
314
315
316
317
318
319
320
321
322
323
324
	 * which only requires access, and "set-[ac]time-to-X" which
	 * requires ownership.
	 * So if it looks like it might be "set both to the same time which
	 * is close to now", and if inode_change_ok fails, then we
	 * convert to "set to now" instead of "set to explicit time"
	 *
	 * We only call inode_change_ok as the last test as technically
	 * it is not an interface that we should be using.  It is only
	 * valid if the filesystem does not define it's own i_op->setattr.
	 */
#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
#define	MAX_TOUCH_TIME_ERROR (30*60)
325
326
327
328
329
330
331
332
	if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
	    iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
		/*
		 * Looks probable.
		 *
		 * Now just make sure time is in the right ballpark.
		 * Solaris, at least, doesn't seem to care what the time
		 * request is.  We require it be within 30 minutes of now.
Linus Torvalds's avatar
Linus Torvalds committed
333
		 */
334
335
336
337
338
339
340
341
342
343
344
345
		time_t delta = iap->ia_atime.tv_sec - get_seconds();
		if (delta < 0)
			delta = -delta;
		if (delta < MAX_TOUCH_TIME_ERROR &&
		    inode_change_ok(inode, iap) != 0) {
			/*
			 * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
			 * This will cause notify_change to set these times
			 * to "now"
			 */
			iap->ia_valid &= ~BOTH_TIME_SET;
		}
Linus Torvalds's avatar
Linus Torvalds committed
346
347
	}
	    
348
349
350
351
	/*
	 * The size case is special.
	 * It changes the file as well as the attributes.
	 */
Linus Torvalds's avatar
Linus Torvalds committed
352
353
	if (iap->ia_valid & ATTR_SIZE) {
		if (iap->ia_size < inode->i_size) {
Miklos Szeredi's avatar
Miklos Szeredi committed
354
355
			err = nfsd_permission(rqstp, fhp->fh_export, dentry,
					NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
Linus Torvalds's avatar
Linus Torvalds committed
356
357
358
359
360
361
362
363
			if (err)
				goto out;
		}

		/*
		 * If we are changing the size of the file, then
		 * we need to break all leases.
		 */
364
365
366
367
		host_err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
		if (host_err == -EWOULDBLOCK)
			host_err = -ETIMEDOUT;
		if (host_err) /* ENOMEM or EWOULDBLOCK */
Linus Torvalds's avatar
Linus Torvalds committed
368
369
			goto out_nfserr;

370
371
		host_err = get_write_access(inode);
		if (host_err)
Linus Torvalds's avatar
Linus Torvalds committed
372
373
374
			goto out_nfserr;

		size_change = 1;
375
376
		host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
		if (host_err) {
Linus Torvalds's avatar
Linus Torvalds committed
377
378
379
			put_write_access(inode);
			goto out_nfserr;
		}
380
		vfs_dq_init(inode);
Linus Torvalds's avatar
Linus Torvalds committed
381
382
	}

383
	/* sanitize the mode change */
Linus Torvalds's avatar
Linus Torvalds committed
384
385
	if (iap->ia_valid & ATTR_MODE) {
		iap->ia_mode &= S_IALLUGO;
386
		iap->ia_mode |= (inode->i_mode & ~S_IALLUGO);
387
388
389
	}

	/* Revoke setuid/setgid on chown */
390
391
392
	if (!S_ISDIR(inode->i_mode) &&
	    (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
	     ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) {
393
394
395
		iap->ia_valid |= ATTR_KILL_PRIV;
		if (iap->ia_valid & ATTR_MODE) {
			/* we're setting mode too, just clear the s*id bits */
396
			iap->ia_mode &= ~S_ISUID;
397
398
399
400
401
			if (iap->ia_mode & S_IXGRP)
				iap->ia_mode &= ~S_ISGID;
		} else {
			/* set ATTR_KILL_* bits and let VFS handle it */
			iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
402
		}
Linus Torvalds's avatar
Linus Torvalds committed
403
404
405
406
407
408
409
410
411
	}

	/* Change the attributes. */

	iap->ia_valid |= ATTR_CTIME;

	err = nfserr_notsync;
	if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
		fh_lock(fhp);
412
413
		host_err = notify_change(dentry, iap);
		err = nfserrno(host_err);
Linus Torvalds's avatar
Linus Torvalds committed
414
415
416
417
418
419
420
421
422
423
424
		fh_unlock(fhp);
	}
	if (size_change)
		put_write_access(inode);
	if (!err)
		if (EX_ISSYNC(fhp->fh_export))
			write_inode_now(inode, 1);
out:
	return err;

out_nfserr:
425
	err = nfserrno(host_err);
Linus Torvalds's avatar
Linus Torvalds committed
426
427
428
	goto out;
}

429
430
431
432
433
434
#if defined(CONFIG_NFSD_V2_ACL) || \
    defined(CONFIG_NFSD_V3_ACL) || \
    defined(CONFIG_NFSD_V4)
static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
{
	ssize_t buflen;
435
	ssize_t ret;
436
437
438
439

	buflen = vfs_getxattr(dentry, key, NULL, 0);
	if (buflen <= 0)
		return buflen;
Linus Torvalds's avatar
Linus Torvalds committed
440

441
442
443
444
	*buf = kmalloc(buflen, GFP_KERNEL);
	if (!*buf)
		return -ENOMEM;

445
446
447
448
	ret = vfs_getxattr(dentry, key, *buf, buflen);
	if (ret < 0)
		kfree(*buf);
	return ret;
449
450
451
452
}
#endif

#if defined(CONFIG_NFSD_V4)
Linus Torvalds's avatar
Linus Torvalds committed
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
static int
set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
{
	int len;
	size_t buflen;
	char *buf = NULL;
	int error = 0;

	buflen = posix_acl_xattr_size(pacl->a_count);
	buf = kmalloc(buflen, GFP_KERNEL);
	error = -ENOMEM;
	if (buf == NULL)
		goto out;

	len = posix_acl_to_xattr(pacl, buf, buflen);
	if (len < 0) {
		error = len;
		goto out;
	}

473
	error = vfs_setxattr(dentry, key, buf, len, 0);
Linus Torvalds's avatar
Linus Torvalds committed
474
475
476
477
478
out:
	kfree(buf);
	return error;
}

479
__be32
Linus Torvalds's avatar
Linus Torvalds committed
480
481
482
nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
    struct nfs4_acl *acl)
{
483
484
	__be32 error;
	int host_error;
Linus Torvalds's avatar
Linus Torvalds committed
485
486
487
488
489
490
	struct dentry *dentry;
	struct inode *inode;
	struct posix_acl *pacl = NULL, *dpacl = NULL;
	unsigned int flags = 0;

	/* Get inode */
Miklos Szeredi's avatar
Miklos Szeredi committed
491
	error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
Linus Torvalds's avatar
Linus Torvalds committed
492
	if (error)
493
		return error;
Linus Torvalds's avatar
Linus Torvalds committed
494
495
496
497
498
499

	dentry = fhp->fh_dentry;
	inode = dentry->d_inode;
	if (S_ISDIR(inode->i_mode))
		flags = NFS4_ACL_DIR;

500
501
	host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
	if (host_error == -EINVAL) {
502
		return nfserr_attrnotsupp;
503
	} else if (host_error < 0)
Linus Torvalds's avatar
Linus Torvalds committed
504
505
		goto out_nfserr;

506
507
	host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
	if (host_error < 0)
508
		goto out_release;
Linus Torvalds's avatar
Linus Torvalds committed
509

510
	if (S_ISDIR(inode->i_mode))
511
		host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
Linus Torvalds's avatar
Linus Torvalds committed
512

513
out_release:
Linus Torvalds's avatar
Linus Torvalds committed
514
515
516
	posix_acl_release(pacl);
	posix_acl_release(dpacl);
out_nfserr:
517
	if (host_error == -EOPNOTSUPP)
518
		return nfserr_attrnotsupp;
519
	else
520
		return nfserrno(host_error);
Linus Torvalds's avatar
Linus Torvalds committed
521
522
523
524
525
}

static struct posix_acl *
_get_posix_acl(struct dentry *dentry, char *key)
{
526
	void *buf = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
527
	struct posix_acl *pacl = NULL;
528
	int buflen;
Linus Torvalds's avatar
Linus Torvalds committed
529

530
531
532
533
534
	buflen = nfsd_getxattr(dentry, key, &buf);
	if (!buflen)
		buflen = -ENODATA;
	if (buflen <= 0)
		return ERR_PTR(buflen);
Linus Torvalds's avatar
Linus Torvalds committed
535
536
537
538
539
540
541
542
543
544
545
546
547
548

	pacl = posix_acl_from_xattr(buf, buflen);
	kfree(buf);
	return pacl;
}

int
nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl)
{
	struct inode *inode = dentry->d_inode;
	int error = 0;
	struct posix_acl *pacl = NULL, *dpacl = NULL;
	unsigned int flags = 0;

549
	pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
Linus Torvalds's avatar
Linus Torvalds committed
550
551
552
553
554
555
556
557
558
	if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
		pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
	if (IS_ERR(pacl)) {
		error = PTR_ERR(pacl);
		pacl = NULL;
		goto out;
	}

	if (S_ISDIR(inode->i_mode)) {
559
		dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
Linus Torvalds's avatar
Linus Torvalds committed
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
		if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
			dpacl = NULL;
		else if (IS_ERR(dpacl)) {
			error = PTR_ERR(dpacl);
			dpacl = NULL;
			goto out;
		}
		flags = NFS4_ACL_DIR;
	}

	*acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags);
	if (IS_ERR(*acl)) {
		error = PTR_ERR(*acl);
		*acl = NULL;
	}
 out:
	posix_acl_release(pacl);
	posix_acl_release(dpacl);
	return error;
}

#endif /* defined(CONFIG_NFS_V4) */

#ifdef CONFIG_NFSD_V3
/*
 * Check server access rights to a file system object
 */
struct accessmap {
	u32		access;
	int		how;
};
static struct accessmap	nfs3_regaccess[] = {
Miklos Szeredi's avatar
Miklos Szeredi committed
592
593
594
595
    {	NFS3_ACCESS_READ,	NFSD_MAY_READ			},
    {	NFS3_ACCESS_EXECUTE,	NFSD_MAY_EXEC			},
    {	NFS3_ACCESS_MODIFY,	NFSD_MAY_WRITE|NFSD_MAY_TRUNC	},
    {	NFS3_ACCESS_EXTEND,	NFSD_MAY_WRITE			},
Linus Torvalds's avatar
Linus Torvalds committed
596
597
598
599
600

    {	0,			0				}
};

static struct accessmap	nfs3_diraccess[] = {
Miklos Szeredi's avatar
Miklos Szeredi committed
601
602
603
604
605
    {	NFS3_ACCESS_READ,	NFSD_MAY_READ			},
    {	NFS3_ACCESS_LOOKUP,	NFSD_MAY_EXEC			},
    {	NFS3_ACCESS_MODIFY,	NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC},
    {	NFS3_ACCESS_EXTEND,	NFSD_MAY_EXEC|NFSD_MAY_WRITE	},
    {	NFS3_ACCESS_DELETE,	NFSD_MAY_REMOVE			},
Linus Torvalds's avatar
Linus Torvalds committed
606
607
608
609
610
611
612
613
614
615
616
617

    {	0,			0				}
};

static struct accessmap	nfs3_anyaccess[] = {
	/* Some clients - Solaris 2.6 at least, make an access call
	 * to the server to check for access for things like /dev/null
	 * (which really, the server doesn't care about).  So
	 * We provide simple access checking for them, looking
	 * mainly at mode bits, and we make sure to ignore read-only
	 * filesystem checks
	 */
Miklos Szeredi's avatar
Miklos Szeredi committed
618
619
620
621
    {	NFS3_ACCESS_READ,	NFSD_MAY_READ			},
    {	NFS3_ACCESS_EXECUTE,	NFSD_MAY_EXEC			},
    {	NFS3_ACCESS_MODIFY,	NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS	},
    {	NFS3_ACCESS_EXTEND,	NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS	},
Linus Torvalds's avatar
Linus Torvalds committed
622
623
624
625

    {	0,			0				}
};

626
__be32
Linus Torvalds's avatar
Linus Torvalds committed
627
628
629
630
631
632
nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported)
{
	struct accessmap	*map;
	struct svc_export	*export;
	struct dentry		*dentry;
	u32			query, result = 0, sresult = 0;
633
	__be32			error;
Linus Torvalds's avatar
Linus Torvalds committed
634

Miklos Szeredi's avatar
Miklos Szeredi committed
635
	error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
Linus Torvalds's avatar
Linus Torvalds committed
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
	if (error)
		goto out;

	export = fhp->fh_export;
	dentry = fhp->fh_dentry;

	if (S_ISREG(dentry->d_inode->i_mode))
		map = nfs3_regaccess;
	else if (S_ISDIR(dentry->d_inode->i_mode))
		map = nfs3_diraccess;
	else
		map = nfs3_anyaccess;


	query = *access;
	for  (; map->access; map++) {
		if (map->access & query) {
653
			__be32 err2;
Linus Torvalds's avatar
Linus Torvalds committed
654
655
656

			sresult |= map->access;

657
			err2 = nfsd_permission(rqstp, export, dentry, map->how);
Linus Torvalds's avatar
Linus Torvalds committed
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
			switch (err2) {
			case nfs_ok:
				result |= map->access;
				break;
				
			/* the following error codes just mean the access was not allowed,
			 * rather than an error occurred */
			case nfserr_rofs:
			case nfserr_acces:
			case nfserr_perm:
				/* simply don't "or" in the access bit. */
				break;
			default:
				error = err2;
				goto out;
			}
		}
	}
	*access = result;
	if (supported)
		*supported = sresult;

 out:
	return error;
}
#endif /* CONFIG_NFSD_V3 */



/*
 * Open an existing file or directory.
 * The access argument indicates the type of open (read/write/lock)
 * N.B. After this call fhp needs an fh_put
 */
692
__be32
Linus Torvalds's avatar
Linus Torvalds committed
693
694
695
696
697
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
			int access, struct file **filp)
{
	struct dentry	*dentry;
	struct inode	*inode;
698
699
700
	int		flags = O_RDONLY|O_LARGEFILE;
	__be32		err;
	int		host_err;
Linus Torvalds's avatar
Linus Torvalds committed
701

702
703
	validate_process_creds();

Linus Torvalds's avatar
Linus Torvalds committed
704
705
706
707
708
	/*
	 * If we get here, then the client has already done an "open",
	 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
	 * in case a chmod has now revoked permission.
	 */
Miklos Szeredi's avatar
Miklos Szeredi committed
709
	err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
Linus Torvalds's avatar
Linus Torvalds committed
710
711
712
713
714
715
716
717
718
719
	if (err)
		goto out;

	dentry = fhp->fh_dentry;
	inode = dentry->d_inode;

	/* Disallow write access to files with the append-only bit set
	 * or any access when mandatory locking enabled
	 */
	err = nfserr_perm;
Miklos Szeredi's avatar
Miklos Szeredi committed
720
	if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
Linus Torvalds's avatar
Linus Torvalds committed
721
		goto out;
722
723
724
725
726
727
	/*
	 * We must ignore files (but only files) which might have mandatory
	 * locks on them because there is no way to know if the accesser has
	 * the lock.
	 */
	if (S_ISREG((inode)->i_mode) && mandatory_lock(inode))
Linus Torvalds's avatar
Linus Torvalds committed
728
729
730
731
732
733
734
735
736
		goto out;

	if (!inode->i_fop)
		goto out;

	/*
	 * Check to see if there are any leases on this file.
	 * This may block while leases are broken.
	 */
Miklos Szeredi's avatar
Miklos Szeredi committed
737
	host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? FMODE_WRITE : 0));
738
739
740
	if (host_err == -EWOULDBLOCK)
		host_err = -ETIMEDOUT;
	if (host_err) /* NOMEM or WOULDBLOCK */
Linus Torvalds's avatar
Linus Torvalds committed
741
742
		goto out_nfserr;

Miklos Szeredi's avatar
Miklos Szeredi committed
743
744
	if (access & NFSD_MAY_WRITE) {
		if (access & NFSD_MAY_READ)
745
746
747
			flags = O_RDWR|O_LARGEFILE;
		else
			flags = O_WRONLY|O_LARGEFILE;
Linus Torvalds's avatar
Linus Torvalds committed
748

749
		vfs_dq_init(inode);
Linus Torvalds's avatar
Linus Torvalds committed
750
	}
751
	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
752
			    flags, current_cred());
Linus Torvalds's avatar
Linus Torvalds committed
753
	if (IS_ERR(*filp))
754
		host_err = PTR_ERR(*filp);
755
756
	host_err = ima_path_check(&(*filp)->f_path,
				  access & (MAY_READ | MAY_WRITE | MAY_EXEC));
Linus Torvalds's avatar
Linus Torvalds committed
757
out_nfserr:
758
	err = nfserrno(host_err);
Linus Torvalds's avatar
Linus Torvalds committed
759
out:
760
	validate_process_creds();
Linus Torvalds's avatar
Linus Torvalds committed
761
762
763
764
765
766
767
768
769
770
771
772
	return err;
}

/*
 * Close a file.
 */
void
nfsd_close(struct file *filp)
{
	fput(filp);
}

773
774
775
776
777
778
779
780
781
782
783
784
/*
 * Sync a file
 * As this calls fsync (not fdatasync) there is no need for a write_inode
 * after it.
 */
static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
			      const struct file_operations *fop)
{
	struct inode *inode = dp->d_inode;
	int (*fsync) (struct file *, struct dentry *, int);
	int err;

785
	err = filemap_write_and_wait(inode->i_mapping);
786
787
788
789
790
	if (err == 0 && fop && (fsync = fop->fsync))
		err = fsync(filp, dp, 0);
	return err;
}

791
static int
Linus Torvalds's avatar
Linus Torvalds committed
792
793
nfsd_sync(struct file *filp)
{
794
795
796
797
798
799
800
801
        int err;
	struct inode *inode = filp->f_path.dentry->d_inode;
	dprintk("nfsd: sync file %s\n", filp->f_path.dentry->d_name.name);
	mutex_lock(&inode->i_mutex);
	err=nfsd_dosync(filp, filp->f_path.dentry, filp->f_op);
	mutex_unlock(&inode->i_mutex);

	return err;
Linus Torvalds's avatar
Linus Torvalds committed
802
803
}

804
int
805
nfsd_sync_dir(struct dentry *dp)
Linus Torvalds's avatar
Linus Torvalds committed
806
{
807
	return nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
Linus Torvalds's avatar
Linus Torvalds committed
808
809
810
811
812
813
814
815
816
817
818
819
}

/*
 * Obtain the readahead parameters for the file
 * specified by (dev, ino).
 */

static inline struct raparms *
nfsd_get_raparms(dev_t dev, ino_t ino)
{
	struct raparms	*ra, **rap, **frap = NULL;
	int depth = 0;
820
821
822
823
824
	unsigned int hash;
	struct raparm_hbucket *rab;

	hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
	rab = &raparm_hash[hash];
Linus Torvalds's avatar
Linus Torvalds committed
825

826
827
	spin_lock(&rab->pb_lock);
	for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
Linus Torvalds's avatar
Linus Torvalds committed
828
829
830
831
832
833
834
835
		if (ra->p_ino == ino && ra->p_dev == dev)
			goto found;
		depth++;
		if (ra->p_count == 0)
			frap = rap;
	}
	depth = nfsdstats.ra_size*11/10;
	if (!frap) {	
836
		spin_unlock(&rab->pb_lock);
Linus Torvalds's avatar
Linus Torvalds committed
837
838
839
840
841
842
843
		return NULL;
	}
	rap = frap;
	ra = *frap;
	ra->p_dev = dev;
	ra->p_ino = ino;
	ra->p_set = 0;
844
	ra->p_hindex = hash;
Linus Torvalds's avatar
Linus Torvalds committed
845
found:
846
	if (rap != &rab->pb_head) {
Linus Torvalds's avatar
Linus Torvalds committed
847
		*rap = ra->p_next;
848
849
		ra->p_next   = rab->pb_head;
		rab->pb_head = ra;
Linus Torvalds's avatar
Linus Torvalds committed
850
851
852
	}
	ra->p_count++;
	nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
853
	spin_unlock(&rab->pb_lock);
Linus Torvalds's avatar
Linus Torvalds committed
854
855
856
857
	return ra;
}

/*
858
859
860
 * Grab and keep cached pages associated with a file in the svc_rqst
 * so that they can be passed to the network sendmsg/sendpage routines
 * directly. They will be released after the sending has completed.
Linus Torvalds's avatar
Linus Torvalds committed
861
862
 */
static int
863
864
nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
		  struct splice_desc *sd)
Linus Torvalds's avatar
Linus Torvalds committed
865
{
866
	struct svc_rqst *rqstp = sd->u.data;
867
	struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
868
869
870
	struct page *page = buf->page;
	size_t size;
	int ret;
Linus Torvalds's avatar
Linus Torvalds committed
871

872
	ret = buf->ops->confirm(pipe, buf);
873
874
875
876
	if (unlikely(ret))
		return ret;

	size = sd->len;
Linus Torvalds's avatar
Linus Torvalds committed
877
878
879

	if (rqstp->rq_res.page_len == 0) {
		get_page(page);
880
881
882
		put_page(*pp);
		*pp = page;
		rqstp->rq_resused++;
883
		rqstp->rq_res.page_base = buf->offset;
Linus Torvalds's avatar
Linus Torvalds committed
884
		rqstp->rq_res.page_len = size;
885
	} else if (page != pp[-1]) {
Linus Torvalds's avatar
Linus Torvalds committed
886
		get_page(page);
887
888
		if (*pp)
			put_page(*pp);
889
890
		*pp = page;
		rqstp->rq_resused++;
Linus Torvalds's avatar
Linus Torvalds committed
891
		rqstp->rq_res.page_len += size;
892
	} else
Linus Torvalds's avatar
Linus Torvalds committed
893
894
895
896
897
		rqstp->rq_res.page_len += size;

	return size;
}

898
899
900
901
902
903
static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
				    struct splice_desc *sd)
{
	return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
}

904
905
906
907
908
909
910
911
912
static inline int svc_msnfs(struct svc_fh *ffhp)
{
#ifdef MSNFS
	return (ffhp->fh_export->ex_flags & NFSEXP_MSNFS);
#else
	return 0;
#endif
}

913
static __be32
Linus Torvalds's avatar
Linus Torvalds committed
914
915
916
917
918
919
nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
              loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
{
	struct inode *inode;
	struct raparms	*ra;
	mm_segment_t	oldfs;
920
921
	__be32		err;
	int		host_err;
Linus Torvalds's avatar
Linus Torvalds committed
922
923

	err = nfserr_perm;
924
	inode = file->f_path.dentry->d_inode;
925
926

	if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count))
Linus Torvalds's avatar
Linus Torvalds committed
927
928
929
930
931
932
933
934
		goto out;

	/* Get readahead parameters */
	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);

	if (ra && ra->p_set)
		file->f_ra = ra->p_ra;

935
936
937
938
939
940
941
942
	if (file->f_op->splice_read && rqstp->rq_splice_ok) {
		struct splice_desc sd = {
			.len		= 0,
			.total_len	= *count,
			.pos		= offset,
			.u.data		= rqstp,
		};

943
		rqstp->rq_resused = 1;
944
		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
Linus Torvalds's avatar
Linus Torvalds committed
945
946
947
	} else {
		oldfs = get_fs();
		set_fs(KERNEL_DS);
948
		host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
Linus Torvalds's avatar
Linus Torvalds committed
949
950
951
952
953
		set_fs(oldfs);
	}

	/* Write back readahead params */
	if (ra) {
954
955
		struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
		spin_lock(&rab->pb_lock);
Linus Torvalds's avatar
Linus Torvalds committed
956
957
958
		ra->p_ra = file->f_ra;
		ra->p_set = 1;
		ra->p_count--;
959
		spin_unlock(&rab->pb_lock);
Linus Torvalds's avatar
Linus Torvalds committed
960
961
	}

962
963
964
	if (host_err >= 0) {
		nfsdstats.io_read += host_err;
		*count = host_err;
Linus Torvalds's avatar
Linus Torvalds committed
965
		err = 0;
966
		fsnotify_access(file->f_path.dentry);
Linus Torvalds's avatar
Linus Torvalds committed
967
	} else 
968
		err = nfserrno(host_err);
Linus Torvalds's avatar
Linus Torvalds committed
969
970
971
972
out:
	return err;
}

973
974
975
static void kill_suid(struct dentry *dentry)
{
	struct iattr	ia;
976
	ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
977

978
	mutex_lock(&dentry->d_inode->i_mutex);
979
	notify_change(dentry, &ia);
980
	mutex_unlock(&dentry->d_inode->i_mutex);
981
982
}

983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
/*
 * Gathered writes: If another process is currently writing to the file,
 * there's a high chance this is another nfsd (triggered by a bulk write
 * from a client's biod). Rather than syncing the file with each write
 * request, we sleep for 10 msec.
 *
 * I don't know if this roughly approximates C. Juszak's idea of
 * gathered writes, but it's a nice and simple solution (IMHO), and it
 * seems to work:-)
 *
 * Note: we do this only in the NFSv2 case, since v3 and higher have a
 * better tool (separate unstable writes and commits) for solving this
 * problem.
 */
static int wait_for_concurrent_writes(struct file *file)
{
	struct inode *inode = file->f_path.dentry->d_inode;
	static ino_t last_ino;
	static dev_t last_dev;
	int err = 0;

	if (atomic_read(&inode->i_writecount) > 1
	    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
		dprintk("nfsd: write defer %d\n", task_pid_nr(current));
		msleep(10);
		dprintk("nfsd: write resume %d\n", task_pid_nr(current));
	}

	if (inode->i_state & I_DIRTY) {
		dprintk("nfsd: write sync %d\n", task_pid_nr(current));
		err = nfsd_sync(file);
	}
	last_ino = inode->i_ino;
	last_dev = inode->i_sb->s_dev;
	return err;
}

1020
static __be32
Linus Torvalds's avatar
Linus Torvalds committed
1021
1022
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
				loff_t offset, struct kvec *vec, int vlen,
1023
				unsigned long *cnt, int *stablep)
Linus Torvalds's avatar
Linus Torvalds committed
1024
1025
1026
1027
1028
{
	struct svc_export	*exp;
	struct dentry		*dentry;
	struct inode		*inode;
	mm_segment_t		oldfs;
1029
1030
	__be32			err = 0;
	int			host_err;
Linus Torvalds's avatar
Linus Torvalds committed
1031
	int			stable = *stablep;
1032
	int			use_wgather;
Linus Torvalds's avatar
Linus Torvalds committed
1033

1034
#ifdef MSNFS
Linus Torvalds's avatar
Linus Torvalds committed
1035
1036
1037
	err = nfserr_perm;

	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1038
		(!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt)))
Linus Torvalds's avatar
Linus Torvalds committed
1039
1040
1041
		goto out;
#endif

1042
	dentry = file->f_path.dentry;
Linus Torvalds's avatar
Linus Torvalds committed
1043
1044
1045
1046
1047
1048
1049
1050
	inode = dentry->d_inode;
	exp   = fhp->fh_export;

	/*
	 * Request sync writes if
	 *  -	the sync export option has been set, or
	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
	 *  -   The file system doesn't support fsync().
1051
	 * When NFSv2 gathered writes have been configured for this volume,
Linus Torvalds's avatar
Linus Torvalds committed
1052
1053
	 * flushing the data to disk is handled separately below.
	 */
1054
	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
Linus Torvalds's avatar
Linus Torvalds committed
1055

1056
	if (!file->f_op->fsync) {/* COMMIT3 cannot work */
Linus Torvalds's avatar
Linus Torvalds committed
1057
1058
1059
1060
1061
1062
	       stable = 2;
	       *stablep = 2; /* FILE_SYNC */
	}

	if (!EX_ISSYNC(exp))
		stable = 0;
1063
	if (stable && !use_wgather) {
1064
		spin_lock(&file->f_lock);
Linus Torvalds's avatar
Linus Torvalds committed
1065
		file->f_flags |= O_SYNC;
1066
1067
		spin_unlock(&file->f_lock);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1068
1069
1070

	/* Write the data. */
	oldfs = get_fs(); set_fs(KERNEL_DS);
1071
	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
Linus Torvalds's avatar
Linus Torvalds committed
1072
	set_fs(oldfs);
1073
1074
1075
1076
1077
	if (host_err < 0)
		goto out_nfserr;
	*cnt = host_err;
	nfsdstats.io_write += host_err;
	fsnotify_modify(file->f_path.dentry);
Linus Torvalds's avatar
Linus Torvalds committed
1078
1079

	/* clear setuid/setgid flag after write */
1080
	if (inode->i_mode & (S_ISUID | S_ISGID))
1081
		kill_suid(dentry);
Linus Torvalds's avatar
Linus Torvalds committed
1082

1083
	if (stable && use_wgather)
1084
		host_err = wait_for_concurrent_writes(file);
Linus Torvalds's avatar
Linus Torvalds committed
1085

1086
out_nfserr:
1087
	dprintk("nfsd: write complete host_err=%d\n", host_err);
1088
	if (host_err >= 0)
Linus Torvalds's avatar
Linus Torvalds committed
1089
		err = 0;
1090
	else
1091
		err = nfserrno(host_err);
Linus Torvalds's avatar
Linus Torvalds committed
1092
1093
1094
1095
1096
1097
1098
1099
1100
out:
	return err;
}

/*
 * Read data from a file. count must contain the requested read count
 * on entry. On return, *count contains the number of bytes actually read.
 * N.B. After this call fhp needs an fh_put
 */
1101
__be32
Linus Torvalds's avatar
Linus Torvalds committed
1102
1103
1104
1105
nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
		loff_t offset, struct kvec *vec, int vlen,
		unsigned long *count)
{
1106
	__be32		err;
Linus Torvalds's avatar
Linus Torvalds committed
1107
1108

	if (file) {
1109
		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
Miklos Szeredi's avatar
Miklos Szeredi committed
1110
				NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
Linus Torvalds's avatar
Linus Torvalds committed
1111
1112
1113
1114
		if (err)
			goto out;
		err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
	} else {
Miklos Szeredi's avatar
Miklos Szeredi committed
1115
		err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
Linus Torvalds's avatar
Linus Torvalds committed
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
		if (err)
			goto out;
		err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
		nfsd_close(file);
	}
out:
	return err;
}

/*
 * Write data to a file.
 * The stable flag requests synchronous writes.
 * N.B. After this call fhp needs an fh_put
 */
1130
__be32
Linus Torvalds's avatar
Linus Torvalds committed
1131
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1132
		loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
Linus Torvalds's avatar
Linus Torvalds committed
1133
1134
		int *stablep)
{
1135
	__be32			err = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1136
1137

	if (file) {
1138
		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
Miklos Szeredi's avatar
Miklos Szeredi committed
1139
				NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
Linus Torvalds's avatar
Linus Torvalds committed
1140
1141
1142
1143
1144
		if (err)
			goto out;
		err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
				stablep);
	} else {
Miklos Szeredi's avatar
Miklos Szeredi committed
1145
		err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
Linus Torvalds's avatar
Linus Torvalds committed
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
		if (err)
			goto out;

		if (cnt)
			err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
					     cnt, stablep);
		nfsd_close(file);
	}
out:
	return err;
}

#ifdef CONFIG_NFSD_V3
/*
 * Commit all pending writes to stable storage.
 * Strictly speaking, we could sync just the indicated file region here,
 * but there's currently no way we can ask the VFS to do so.
 *
 * Unfortunately we cannot lock the file to make sure we return full WCC
 * data to the client, as locking happens lower down in the filesystem.
 */
1167
__be32
Linus Torvalds's avatar
Linus Torvalds committed
1168
1169
1170
1171
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
               loff_t offset, unsigned long count)
{
	struct file	*file;
1172
	__be32		err;
Linus Torvalds's avatar
Linus Torvalds committed
1173
1174
1175
1176

	if ((u64)count > ~(u64)offset)
		return nfserr_inval;

Miklos Szeredi's avatar
Miklos Szeredi committed
1177
1178
	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
	if (err)
Linus Torvalds's avatar
Linus Torvalds committed
1179
1180
1181
		return err;
	if (EX_ISSYNC(fhp->fh_export)) {
		if (file->f_op && file->f_op->fsync) {
1182
			err = nfserrno(nfsd_sync(file));
Linus Torvalds's avatar
Linus Torvalds committed
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
		} else {
			err = nfserr_notsupp;
		}
	}

	nfsd_close(file);
	return err;
}
#endif /* CONFIG_NFSD_V3 */

1193
static __be32
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
			struct iattr *iap)
{
	/*
	 * Mode has already been set earlier in create:
	 */
	iap->ia_valid &= ~ATTR_MODE;
	/*
	 * Setting uid/gid works only for root.  Irix appears to
	 * send along the gid on create when it tries to implement
	 * setgid directories via NFS:
	 */
1206
	if (current_fsuid() != 0)
1207
1208
1209
1210
1211
1212
		iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
	if (iap->ia_valid)
		return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
	return 0;
}

1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
 * setting size to 0 may fail for some specific file systems by the permission
 * checking which requires WRITE permission but the mode is 000.
 * we ignore the resizing(to 0) on the just new created file, since the size is
 * 0 after file created.
 *
 * call this only after vfs_create() is called.
 * */
static void
nfsd_check_ignore_resizing(struct iattr *iap)
{
	if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
		iap->ia_valid &= ~ATTR_SIZE;
}

Linus Torvalds's avatar
Linus Torvalds committed
1228
1229
1230
1231
1232
1233
1234
1235
/*
 * Create a file (regular, directory, device, fifo); UNIX sockets 
 * not yet implemented.
 * If the response fh has been verified, the parent directory should
 * already be locked. Note that the parent directory is left locked.
 *
 * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
 */
1236
__be32
Linus Torvalds's avatar
Linus Torvalds committed
1237
1238
1239
1240
1241
1242
nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
		char *fname, int flen, struct iattr *iap,
		int type, dev_t rdev, struct svc_fh *resfhp)
{
	struct dentry	*dentry, *dchild = NULL;
	struct inode	*dirp;
1243
	__be32		err;
1244
	__be32		err2;
1245
	int		host_err;
Linus Torvalds's avatar
Linus Torvalds committed
1246
1247
1248
1249
1250
1251
1252
1253

	err = nfserr_perm;
	if (!flen)
		goto out;
	err = nfserr_exist;
	if (isdotent(fname, flen))
		goto out;

Miklos Szeredi's avatar
Miklos Szeredi committed
1254
	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
Linus Torvalds's avatar
Linus Torvalds committed
1255
1256
1257
1258
1259
1260
1261
	if (err)
		goto out;

	dentry = fhp->fh_dentry;
	dirp = dentry->d_inode;

	err = nfserr_notdir;
Al Viro's avatar
Al Viro committed
1262
	if (!dirp->i_op->lookup)
Linus Torvalds's avatar
Linus Torvalds committed
1263
1264
1265
1266
1267
1268
1269
		goto out;
	/*
	 * Check whether the response file handle has been verified yet.
	 * If it has, the parent directory should already be locked.
	 */
	if (!resfhp->fh_dentry) {
		/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
1270
		fh_lock_nested(fhp, I_MUTEX_PARENT);
Linus Torvalds's avatar
Linus Torvalds committed
1271
		dchild = lookup_one_len(fname, dentry, flen);
1272
		host_err = PTR_ERR(dchild);
Linus Torvalds's avatar
Linus Torvalds committed
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
		if (IS_ERR(dchild))
			goto out_nfserr;
		err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
		if (err)
			goto out;
	} else {
		/* called from nfsd_proc_create */
		dchild = dget(resfhp->fh_dentry);
		if (!fhp->fh_locked) {
			/* not actually possible */
			printk(KERN_ERR
				"nfsd_create: parent %s/%s not locked!\n",
				dentry->d_parent->d_name.name,
				dentry->d_name.name);
Al Viro's avatar
Al Viro committed
1287
			err = nfserr_io;
Linus Torvalds's avatar
Linus Torvalds committed
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
			goto out;
		}
	}
	/*
	 * Make sure the child dentry is still negative ...
	 */
	err = nfserr_exist;
	if (dchild->d_inode) {
		dprintk("nfsd_create: dentry %s/%s not negative!\n",
			dentry->d_name.name, dchild->d_name.name);
		goto out; 
	}

	if (!(iap->ia_valid & ATTR_MODE))
		iap->ia_mode = 0;
	iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;

1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
	err = nfserr_inval;
	if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
		printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
		       type);
		goto out;
	}

	host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
	if (host_err)
		goto out_nfserr;

Linus Torvalds's avatar
Linus Torvalds committed
1316
1317
1318
	/*
	 * Get the dir op function pointer.
	 */
1319
	err = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1320
1321
	switch (type) {
	case S_IFREG:
1322
		host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1323
1324
		if (!host_err)
			nfsd_check_ignore_resizing(iap);
Linus Torvalds's avatar
Linus Torvalds committed
1325
1326
		break;
	case S_IFDIR:
1327
		host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
Linus Torvalds's avatar
Linus Torvalds committed
1328
1329
1330
1331
1332
		break;
	case S_IFCHR:
	case S_IFBLK:
	case S_IFIFO:
	case S_IFSOCK:
1333
		host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
Linus Torvalds's avatar
Linus Torvalds committed
1334
1335
		break;
	}
1336
1337
	if (host_err < 0) {
		mnt_drop_write(fhp->fh_export->ex_path.mnt);
Linus Torvalds's avatar
Linus Torvalds committed
1338
		goto out_nfserr;
1339
	}
Linus Torvalds's avatar
Linus Torvalds committed
1340
1341

	if (EX_ISSYNC(fhp->fh_export)) {
1342
		err = nfserrno(nfsd_sync_dir(dentry));
Linus Torvalds's avatar
Linus Torvalds committed
1343
1344
1345
		write_inode_now(dchild->d_inode, 1);
	}

1346
1347
1348
	err2 = nfsd_create_setattr(rqstp, resfhp, iap);
	if (err2)
		err = err2;
1349
	mnt_drop_write(fhp->fh_export->ex_path.mnt);
Linus Torvalds's avatar
Linus Torvalds committed
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
	/*
	 * Update the file handle to get the new inode info.
	 */
	if (!err)
		err = fh_update(resfhp);
out:
	if (dchild && !IS_ERR(dchild))
		dput(dchild);
	return err;

out_nfserr:
1361
	err = nfserrno(host_err);
Linus Torvalds's avatar
Linus Torvalds committed
1362
1363
1364
1365
1366
1367
1368
	goto out;
}

#ifdef CONFIG_NFSD_V3
/*
 * NFSv3 version of nfsd_create
 */
1369
__be32
Linus Torvalds's avatar
Linus Torvalds committed
1370
1371
1372
nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
		char *fname, int flen, struct iattr *iap,
		struct svc_fh *resfhp, int createmode, u32 *verifier,
1373
	        int *truncp, int *created)
Linus Torvalds's avatar
Linus Torvalds committed
1374
1375
1376
{
	struct dentry	*dentry, *dchild = NULL;
	struct inode	*dirp;
1377
	__be32		err;
1378
	__be32		err2;