file.c 75.8 KB
Newer Older
1
2
/*
  FUSE: Filesystem in Userspace
Miklos Szeredi's avatar
Miklos Szeredi committed
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5
6
7
8
9
10
11
12
13

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/kernel.h>
Alexey Dobriyan's avatar
Alexey Dobriyan committed
14
#include <linux/sched.h>
15
#include <linux/module.h>
16
#include <linux/compat.h>
17
#include <linux/swap.h>
18
#include <linux/aio.h>
19
#include <linux/falloc.h>
20

21
static const struct file_operations fuse_direct_io_file_operations;
22

23
24
static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
			  int opcode, struct fuse_open_out *outargp)
25
26
{
	struct fuse_open_in inarg;
27
28
29
	struct fuse_req *req;
	int err;

30
	req = fuse_get_req_nopages(fc);
31
32
	if (IS_ERR(req))
		return PTR_ERR(req);
33
34

	memset(&inarg, 0, sizeof(inarg));
35
36
37
	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
	if (!fc->atomic_o_trunc)
		inarg.flags &= ~O_TRUNC;
38
39
	req->in.h.opcode = opcode;
	req->in.h.nodeid = nodeid;
40
41
42
43
44
45
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(inarg);
	req->in.args[0].value = &inarg;
	req->out.numargs = 1;
	req->out.args[0].size = sizeof(*outargp);
	req->out.args[0].value = outargp;
46
	fuse_request_send(fc, req);
47
48
49
50
51
52
	err = req->out.h.error;
	fuse_put_request(fc, req);

	return err;
}

Tejun Heo's avatar
Tejun Heo committed
53
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
54
55
{
	struct fuse_file *ff;
Tejun Heo's avatar
Tejun Heo committed
56

57
	ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
Tejun Heo's avatar
Tejun Heo committed
58
59
60
	if (unlikely(!ff))
		return NULL;

61
	ff->fc = fc;
62
	ff->reserved_req = fuse_request_alloc(0);
Tejun Heo's avatar
Tejun Heo committed
63
64
65
	if (unlikely(!ff->reserved_req)) {
		kfree(ff);
		return NULL;
66
	}
Tejun Heo's avatar
Tejun Heo committed
67
68
69
70
71
72
73
74
75
76

	INIT_LIST_HEAD(&ff->write_entry);
	atomic_set(&ff->count, 0);
	RB_CLEAR_NODE(&ff->polled_node);
	init_waitqueue_head(&ff->poll_wait);

	spin_lock(&fc->lock);
	ff->kh = ++fc->khctr;
	spin_unlock(&fc->lock);

77
78
79
80
81
	return ff;
}

void fuse_file_free(struct fuse_file *ff)
{
82
	fuse_request_free(ff->reserved_req);
83
84
85
	kfree(ff);
}

86
struct fuse_file *fuse_file_get(struct fuse_file *ff)
87
88
89
90
91
{
	atomic_inc(&ff->count);
	return ff;
}

92
93
94
95
96
97
98
99
100
101
102
103
104
105
static void fuse_release_async(struct work_struct *work)
{
	struct fuse_req *req;
	struct fuse_conn *fc;
	struct path path;

	req = container_of(work, struct fuse_req, misc.release.work);
	path = req->misc.release.path;
	fc = get_fuse_conn(path.dentry->d_inode);

	fuse_put_request(fc, req);
	path_put(&path);
}

Miklos Szeredi's avatar
Miklos Szeredi committed
106
107
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
	if (fc->destroy_req) {
		/*
		 * If this is a fuseblk mount, then it's possible that
		 * releasing the path will result in releasing the
		 * super block and sending the DESTROY request.  If
		 * the server is single threaded, this would hang.
		 * For this reason do the path_put() in a separate
		 * thread.
		 */
		atomic_inc(&req->count);
		INIT_WORK(&req->misc.release.work, fuse_release_async);
		schedule_work(&req->misc.release.work);
	} else {
		path_put(&req->misc.release.path);
	}
Miklos Szeredi's avatar
Miklos Szeredi committed
123
124
}

125
static void fuse_file_put(struct fuse_file *ff, bool sync)
126
127
128
{
	if (atomic_dec_and_test(&ff->count)) {
		struct fuse_req *req = ff->reserved_req;
129

130
131
132
133
134
135
136
137
138
		if (ff->fc->no_open) {
			/*
			 * Drop the release request when client does not
			 * implement 'open'
			 */
			req->background = 0;
			path_put(&req->misc.release.path);
			fuse_put_request(ff->fc, req);
		} else if (sync) {
139
			req->background = 0;
140
141
142
143
144
			fuse_request_send(ff->fc, req);
			path_put(&req->misc.release.path);
			fuse_put_request(ff->fc, req);
		} else {
			req->end = fuse_release_end;
145
			req->background = 1;
146
147
			fuse_request_send_background(ff->fc, req);
		}
148
149
150
151
		kfree(ff);
	}
}

152
153
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
		 bool isdir)
154
155
156
157
158
159
160
161
{
	struct fuse_file *ff;
	int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;

	ff = fuse_file_alloc(fc);
	if (!ff)
		return -ENOMEM;

162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
	ff->fh = 0;
	ff->open_flags = FOPEN_KEEP_CACHE; /* Default for no-open */
	if (!fc->no_open || isdir) {
		struct fuse_open_out outarg;
		int err;

		err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
		if (!err) {
			ff->fh = outarg.fh;
			ff->open_flags = outarg.open_flags;

		} else if (err != -ENOSYS || isdir) {
			fuse_file_free(ff);
			return err;
		} else {
			fc->no_open = 1;
		}
179
180
181
	}

	if (isdir)
182
		ff->open_flags &= ~FOPEN_DIRECT_IO;
183
184
185
186
187
188

	ff->nodeid = nodeid;
	file->private_data = fuse_file_get(ff);

	return 0;
}
189
EXPORT_SYMBOL_GPL(fuse_do_open);
190

191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
static void fuse_link_write_file(struct file *file)
{
	struct inode *inode = file_inode(file);
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_file *ff = file->private_data;
	/*
	 * file may be written through mmap, so chain it onto the
	 * inodes's write_file list
	 */
	spin_lock(&fc->lock);
	if (list_empty(&ff->write_entry))
		list_add(&ff->write_entry, &fi->write_files);
	spin_unlock(&fc->lock);
}

207
void fuse_finish_open(struct inode *inode, struct file *file)
208
{
209
	struct fuse_file *ff = file->private_data;
210
	struct fuse_conn *fc = get_fuse_conn(inode);
211
212

	if (ff->open_flags & FOPEN_DIRECT_IO)
213
		file->f_op = &fuse_direct_io_file_operations;
214
	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
Miklos Szeredi's avatar
Miklos Szeredi committed
215
		invalidate_inode_pages2(inode->i_mapping);
216
	if (ff->open_flags & FOPEN_NONSEEKABLE)
Tejun Heo's avatar
Tejun Heo committed
217
		nonseekable_open(inode, file);
218
219
220
221
222
223
224
225
	if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
		struct fuse_inode *fi = get_fuse_inode(inode);

		spin_lock(&fc->lock);
		fi->attr_version = ++fc->attr_version;
		i_size_write(inode, 0);
		spin_unlock(&fc->lock);
		fuse_invalidate_attr(inode);
226
227
		if (fc->writeback_cache)
			file_update_time(file);
228
	}
229
230
	if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
		fuse_link_write_file(file);
231
232
}

233
int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
234
{
Tejun Heo's avatar
Tejun Heo committed
235
	struct fuse_conn *fc = get_fuse_conn(inode);
236
	int err;
237
238
239
	bool lock_inode = (file->f_flags & O_TRUNC) &&
			  fc->atomic_o_trunc &&
			  fc->writeback_cache;
240
241
242
243
244

	err = generic_file_open(inode, file);
	if (err)
		return err;

245
246
247
	if (lock_inode)
		mutex_lock(&inode->i_mutex);

248
	err = fuse_do_open(fc, get_node_id(inode), file, isdir);
249

250
251
	if (!err)
		fuse_finish_open(inode, file);
252

253
254
255
256
	if (lock_inode)
		mutex_unlock(&inode->i_mutex);

	return err;
257
258
}

259
static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
260
{
261
	struct fuse_conn *fc = ff->fc;
262
	struct fuse_req *req = ff->reserved_req;
263
	struct fuse_release_in *inarg = &req->misc.release.in;
264

265
266
267
268
269
270
	spin_lock(&fc->lock);
	list_del(&ff->write_entry);
	if (!RB_EMPTY_NODE(&ff->polled_node))
		rb_erase(&ff->polled_node, &fc->polled_files);
	spin_unlock(&fc->lock);

271
	wake_up_interruptible_all(&ff->poll_wait);
272

273
	inarg->fh = ff->fh;
274
	inarg->flags = flags;
275
	req->in.h.opcode = opcode;
276
	req->in.h.nodeid = ff->nodeid;
277
278
279
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(struct fuse_release_in);
	req->in.args[0].value = inarg;
280
281
}

282
void fuse_release_common(struct file *file, int opcode)
283
{
Tejun Heo's avatar
Tejun Heo committed
284
285
	struct fuse_file *ff;
	struct fuse_req *req;
286

Tejun Heo's avatar
Tejun Heo committed
287
288
	ff = file->private_data;
	if (unlikely(!ff))
289
		return;
Tejun Heo's avatar
Tejun Heo committed
290
291

	req = ff->reserved_req;
292
	fuse_prepare_release(ff, file->f_flags, opcode);
Tejun Heo's avatar
Tejun Heo committed
293

Miklos Szeredi's avatar
Miklos Szeredi committed
294
295
296
297
298
299
	if (ff->flock) {
		struct fuse_release_in *inarg = &req->misc.release.in;
		inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
		inarg->lock_owner = fuse_lock_owner_id(ff->fc,
						       (fl_owner_t) file);
	}
Tejun Heo's avatar
Tejun Heo committed
300
	/* Hold vfsmount and dentry until release is finished */
301
302
	path_get(&file->f_path);
	req->misc.release.path = file->f_path;
Tejun Heo's avatar
Tejun Heo committed
303
304
305
306
307

	/*
	 * Normally this will send the RELEASE request, however if
	 * some asynchronous READ or WRITE requests are outstanding,
	 * the sending will be delayed.
308
309
310
311
	 *
	 * Make the release synchronous if this is a fuseblk mount,
	 * synchronous RELEASE is allowed (and desirable) in this case
	 * because the server can be trusted not to screw up.
Tejun Heo's avatar
Tejun Heo committed
312
	 */
313
	fuse_file_put(ff, ff->fc->destroy_req != NULL);
314
315
}

316
317
static int fuse_open(struct inode *inode, struct file *file)
{
318
	return fuse_open_common(inode, file, false);
319
320
321
322
}

static int fuse_release(struct inode *inode, struct file *file)
{
323
324
325
326
	struct fuse_conn *fc = get_fuse_conn(inode);

	/* see fuse_vma_close() for !writeback_cache case */
	if (fc->writeback_cache)
Miklos Szeredi's avatar
Miklos Szeredi committed
327
		write_inode_now(inode, 1);
328

329
330
331
332
333
334
335
336
337
338
339
	fuse_release_common(file, FUSE_RELEASE);

	/* return value is ignored by VFS */
	return 0;
}

void fuse_sync_release(struct fuse_file *ff, int flags)
{
	WARN_ON(atomic_read(&ff->count) > 1);
	fuse_prepare_release(ff, flags, FUSE_RELEASE);
	ff->reserved_req->force = 1;
340
	ff->reserved_req->background = 0;
341
342
343
	fuse_request_send(ff->fc, ff->reserved_req);
	fuse_put_request(ff->fc, ff->reserved_req);
	kfree(ff);
344
}
345
EXPORT_SYMBOL_GPL(fuse_sync_release);
346

347
/*
348
349
 * Scramble the ID space with XTEA, so that the value of the files_struct
 * pointer is not exposed to userspace.
350
 */
351
u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
352
{
353
354
355
356
357
358
359
360
361
362
363
364
365
366
	u32 *k = fc->scramble_key;
	u64 v = (unsigned long) id;
	u32 v0 = v;
	u32 v1 = v >> 32;
	u32 sum = 0;
	int i;

	for (i = 0; i < 32; i++) {
		v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
		sum += 0x9E3779B9;
		v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
	}

	return (u64) v0 + ((u64) v1 << 32);
367
368
}

Miklos Szeredi's avatar
Miklos Szeredi committed
369
/*
370
 * Check if any page in a range is under writeback
Miklos Szeredi's avatar
Miklos Szeredi committed
371
372
373
374
 *
 * This is currently done by walking the list of writepage requests
 * for the inode, which can be pretty inefficient.
 */
375
376
static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
				   pgoff_t idx_to)
Miklos Szeredi's avatar
Miklos Szeredi committed
377
378
379
380
381
382
383
384
385
386
387
388
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_req *req;
	bool found = false;

	spin_lock(&fc->lock);
	list_for_each_entry(req, &fi->writepages, writepages_entry) {
		pgoff_t curr_index;

		BUG_ON(req->inode != inode);
		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
389
390
		if (idx_from < curr_index + req->num_pages &&
		    curr_index <= idx_to) {
Miklos Szeredi's avatar
Miklos Szeredi committed
391
392
393
394
395
396
397
398
399
			found = true;
			break;
		}
	}
	spin_unlock(&fc->lock);

	return found;
}

400
401
402
403
404
static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
{
	return fuse_range_is_writeback(inode, index, index);
}

Miklos Szeredi's avatar
Miklos Szeredi committed
405
406
407
408
409
410
411
412
413
414
415
416
417
418
/*
 * Wait for page writeback to be completed.
 *
 * Since fuse doesn't rely on the VM writeback tracking, this has to
 * use some other means.
 */
static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
{
	struct fuse_inode *fi = get_fuse_inode(inode);

	wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
	return 0;
}

419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
/*
 * Wait for all pending writepages on the inode to finish.
 *
 * This is currently done by blocking further writes with FUSE_NOWRITE
 * and waiting for all sent writes to complete.
 *
 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
 * could conflict with truncation.
 */
static void fuse_sync_writes(struct inode *inode)
{
	fuse_set_nowrite(inode);
	fuse_release_nowrite(inode);
}

434
static int fuse_flush(struct file *file, fl_owner_t id)
435
{
436
	struct inode *inode = file_inode(file);
437
438
439
440
441
442
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_file *ff = file->private_data;
	struct fuse_req *req;
	struct fuse_flush_in inarg;
	int err;

443
444
445
	if (is_bad_inode(inode))
		return -EIO;

446
447
448
	if (fc->no_flush)
		return 0;

Miklos Szeredi's avatar
Miklos Szeredi committed
449
	err = write_inode_now(inode, 1);
450
451
452
453
454
455
456
	if (err)
		return err;

	mutex_lock(&inode->i_mutex);
	fuse_sync_writes(inode);
	mutex_unlock(&inode->i_mutex);

457
	req = fuse_get_req_nofail_nopages(fc, file);
458
459
	memset(&inarg, 0, sizeof(inarg));
	inarg.fh = ff->fh;
460
	inarg.lock_owner = fuse_lock_owner_id(fc, id);
461
462
463
464
465
	req->in.h.opcode = FUSE_FLUSH;
	req->in.h.nodeid = get_node_id(inode);
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(inarg);
	req->in.args[0].value = &inarg;
466
	req->force = 1;
467
	fuse_request_send(fc, req);
468
469
470
471
472
473
474
475
476
	err = req->out.h.error;
	fuse_put_request(fc, req);
	if (err == -ENOSYS) {
		fc->no_flush = 1;
		err = 0;
	}
	return err;
}

477
478
int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
		      int datasync, int isdir)
479
{
480
	struct inode *inode = file->f_mapping->host;
481
482
483
484
485
486
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_file *ff = file->private_data;
	struct fuse_req *req;
	struct fuse_fsync_in inarg;
	int err;

487
488
489
	if (is_bad_inode(inode))
		return -EIO;

490
491
	mutex_lock(&inode->i_mutex);

Miklos Szeredi's avatar
Miklos Szeredi committed
492
493
494
495
496
	/*
	 * Start writeback against all dirty pages of the inode, then
	 * wait for all outstanding writes, before sending the FSYNC
	 * request.
	 */
Miklos Szeredi's avatar
Miklos Szeredi committed
497
	err = filemap_write_and_wait_range(inode->i_mapping, start, end);
Miklos Szeredi's avatar
Miklos Szeredi committed
498
	if (err)
499
		goto out;
Miklos Szeredi's avatar
Miklos Szeredi committed
500
501

	fuse_sync_writes(inode);
Miklos Szeredi's avatar
Miklos Szeredi committed
502
503
504
	err = sync_inode_metadata(inode, 1);
	if (err)
		goto out;
Miklos Szeredi's avatar
Miklos Szeredi committed
505

Miklos Szeredi's avatar
Miklos Szeredi committed
506
507
	if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
		goto out;
508

509
	req = fuse_get_req_nopages(fc);
510
511
512
513
	if (IS_ERR(req)) {
		err = PTR_ERR(req);
		goto out;
	}
514
515
516
517

	memset(&inarg, 0, sizeof(inarg));
	inarg.fh = ff->fh;
	inarg.fsync_flags = datasync ? 1 : 0;
518
	req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
519
520
521
522
	req->in.h.nodeid = get_node_id(inode);
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(inarg);
	req->in.args[0].value = &inarg;
523
	fuse_request_send(fc, req);
524
525
526
	err = req->out.h.error;
	fuse_put_request(fc, req);
	if (err == -ENOSYS) {
527
528
529
530
		if (isdir)
			fc->no_fsyncdir = 1;
		else
			fc->no_fsync = 1;
531
532
		err = 0;
	}
533
534
out:
	mutex_unlock(&inode->i_mutex);
535
536
537
	return err;
}

538
539
static int fuse_fsync(struct file *file, loff_t start, loff_t end,
		      int datasync)
540
{
541
	return fuse_fsync_common(file, start, end, datasync, 0);
542
543
}

544
545
void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
		    size_t count, int opcode)
546
{
547
	struct fuse_read_in *inarg = &req->misc.read.in;
548
	struct fuse_file *ff = file->private_data;
549

550
551
552
	inarg->fh = ff->fh;
	inarg->offset = pos;
	inarg->size = count;
553
	inarg->flags = file->f_flags;
554
	req->in.h.opcode = opcode;
555
	req->in.h.nodeid = ff->nodeid;
556
557
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(struct fuse_read_in);
558
	req->in.args[0].value = inarg;
559
560
561
562
563
	req->out.argvar = 1;
	req->out.numargs = 1;
	req->out.args[0].size = count;
}

564
565
566
567
568
569
570
571
572
573
574
575
static void fuse_release_user_pages(struct fuse_req *req, int write)
{
	unsigned i;

	for (i = 0; i < req->num_pages; i++) {
		struct page *page = req->pages[i];
		if (write)
			set_page_dirty_lock(page);
		put_page(page);
	}
}

Maxim Patlasov's avatar
Maxim Patlasov committed
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
/**
 * In case of short read, the caller sets 'pos' to the position of
 * actual end of fuse request in IO request. Otherwise, if bytes_requested
 * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1.
 *
 * An example:
 * User requested DIO read of 64K. It was splitted into two 32K fuse requests,
 * both submitted asynchronously. The first of them was ACKed by userspace as
 * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The
 * second request was ACKed as short, e.g. only 1K was read, resulting in
 * pos == 33K.
 *
 * Thus, when all fuse requests are completed, the minimal non-negative 'pos'
 * will be equal to the length of the longest contiguous fragment of
 * transferred data starting from the beginning of IO request.
 */
static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
{
	int left;

	spin_lock(&io->lock);
	if (err)
		io->err = io->err ? : err;
	else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes))
		io->bytes = pos;

	left = --io->reqs;
	spin_unlock(&io->lock);

	if (!left) {
		long res;

		if (io->err)
			res = io->err;
		else if (io->bytes >= 0 && io->write)
			res = -EIO;
		else {
			res = io->bytes < 0 ? io->size : io->bytes;

			if (!is_sync_kiocb(io->iocb)) {
616
				struct inode *inode = file_inode(io->iocb->ki_filp);
Maxim Patlasov's avatar
Maxim Patlasov committed
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
				struct fuse_conn *fc = get_fuse_conn(inode);
				struct fuse_inode *fi = get_fuse_inode(inode);

				spin_lock(&fc->lock);
				fi->attr_version = ++fc->attr_version;
				spin_unlock(&fc->lock);
			}
		}

		aio_complete(io->iocb, res, 0);
		kfree(io);
	}
}

static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
{
	struct fuse_io_priv *io = req->io;
	ssize_t pos = -1;

	fuse_release_user_pages(req, !io->write);

	if (io->write) {
		if (req->misc.write.in.size != req->misc.write.out.size)
			pos = req->misc.write.in.offset - io->offset +
				req->misc.write.out.size;
	} else {
		if (req->misc.read.in.size != req->out.args[0].size)
			pos = req->misc.read.in.offset - io->offset +
				req->out.args[0].size;
	}

	fuse_aio_complete(io, req->out.h.error, pos);
}

static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
		size_t num_bytes, struct fuse_io_priv *io)
{
	spin_lock(&io->lock);
	io->size += num_bytes;
	io->reqs++;
	spin_unlock(&io->lock);

	req->io = io;
	req->end = fuse_aio_complete_req;

662
	__fuse_get_request(req);
Maxim Patlasov's avatar
Maxim Patlasov committed
663
664
665
666
667
	fuse_request_send_background(fc, req);

	return num_bytes;
}

668
static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io,
669
			     loff_t pos, size_t count, fl_owner_t owner)
670
{
671
	struct file *file = io->file;
672
673
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
674

675
	fuse_read_fill(req, file, pos, count, FUSE_READ);
676
	if (owner != NULL) {
677
		struct fuse_read_in *inarg = &req->misc.read.in;
678
679
680
681

		inarg->read_flags |= FUSE_READ_LOCKOWNER;
		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
	}
682
683
684
685

	if (io->async)
		return fuse_async_req_send(fc, req, count, io);

686
	fuse_request_send(fc, req);
687
	return req->out.args[0].size;
688
689
}

690
691
692
693
694
695
696
static void fuse_read_update_size(struct inode *inode, loff_t size,
				  u64 attr_ver)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);

	spin_lock(&fc->lock);
697
698
	if (attr_ver == fi->attr_version && size < inode->i_size &&
	    !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
699
700
701
702
703
704
		fi->attr_version = ++fc->attr_version;
		i_size_write(inode, size);
	}
	spin_unlock(&fc->lock);
}

705
706
707
708
static void fuse_short_read(struct fuse_req *req, struct inode *inode,
			    u64 attr_ver)
{
	size_t num_read = req->out.args[0].size;
709
710
711
712
713
714
715
716
717
718
719
	struct fuse_conn *fc = get_fuse_conn(inode);

	if (fc->writeback_cache) {
		/*
		 * A hole in a file. Some data after the hole are in page cache,
		 * but have not reached the client fs yet. So, the hole is not
		 * present there.
		 */
		int i;
		int start_idx = num_read >> PAGE_CACHE_SHIFT;
		size_t off = num_read & (PAGE_CACHE_SIZE - 1);
720

721
722
723
724
725
726
727
728
		for (i = start_idx; i < req->num_pages; i++) {
			zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE);
			off = 0;
		}
	} else {
		loff_t pos = page_offset(req->pages[0]) + num_read;
		fuse_read_update_size(inode, pos, attr_ver);
	}
729
730
}

731
static int fuse_do_readpage(struct file *file, struct page *page)
732
{
733
	struct fuse_io_priv io = { .async = 0, .file = file };
734
735
	struct inode *inode = page->mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
736
	struct fuse_req *req;
737
738
739
740
	size_t num_read;
	loff_t pos = page_offset(page);
	size_t count = PAGE_CACHE_SIZE;
	u64 attr_ver;
741
742
	int err;

Miklos Szeredi's avatar
Miklos Szeredi committed
743
	/*
Lucas De Marchi's avatar
Lucas De Marchi committed
744
	 * Page writeback can extend beyond the lifetime of the
Miklos Szeredi's avatar
Miklos Szeredi committed
745
746
747
748
749
	 * page-cache page, so make sure we read a properly synced
	 * page.
	 */
	fuse_wait_on_page_writeback(inode, page->index);

750
	req = fuse_get_req(fc, 1);
751
	if (IS_ERR(req))
752
		return PTR_ERR(req);
753

754
755
	attr_ver = fuse_get_attr_version(fc);

756
	req->out.page_zeroing = 1;
757
	req->out.argpages = 1;
758
759
	req->num_pages = 1;
	req->pages[0] = page;
760
	req->page_descs[0].length = count;
761
	num_read = fuse_send_read(req, &io, pos, count, NULL);
762
	err = req->out.h.error;
763
764
765
766
767
768

	if (!err) {
		/*
		 * Short read means EOF.  If file size is larger, truncate it
		 */
		if (num_read < count)
769
			fuse_short_read(req, inode, attr_ver);
770

771
		SetPageUptodate(page);
772
773
	}

774
	fuse_put_request(fc, req);
775
776
777
778
779
780
781
782
783
784
785
786
787
788

	return err;
}

static int fuse_readpage(struct file *file, struct page *page)
{
	struct inode *inode = page->mapping->host;
	int err;

	err = -EIO;
	if (is_bad_inode(inode))
		goto out;

	err = fuse_do_readpage(file, page);
789
	fuse_invalidate_atime(inode);
790
791
792
793
794
 out:
	unlock_page(page);
	return err;
}

795
static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
796
{
797
	int i;
798
799
	size_t count = req->misc.read.in.size;
	size_t num_read = req->out.args[0].size;
800
	struct address_space *mapping = NULL;
801

802
803
	for (i = 0; mapping == NULL && i < req->num_pages; i++)
		mapping = req->pages[i]->mapping;
804

805
806
807
808
809
810
	if (mapping) {
		struct inode *inode = mapping->host;

		/*
		 * Short read means EOF. If file size is larger, truncate it
		 */
811
812
		if (!req->out.h.error && num_read < count)
			fuse_short_read(req, inode, req->misc.read.attr_ver);
813

814
		fuse_invalidate_atime(inode);
815
	}
816

817
818
819
820
	for (i = 0; i < req->num_pages; i++) {
		struct page *page = req->pages[i];
		if (!req->out.h.error)
			SetPageUptodate(page);
821
822
		else
			SetPageError(page);
823
		unlock_page(page);
824
		page_cache_release(page);
825
	}
826
	if (req->ff)
827
		fuse_file_put(req->ff, false);
828
829
}

830
static void fuse_send_readpages(struct fuse_req *req, struct file *file)
831
{
832
833
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
834
835
	loff_t pos = page_offset(req->pages[0]);
	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
836
837

	req->out.argpages = 1;
838
	req->out.page_zeroing = 1;
839
	req->out.page_replace = 1;
840
	fuse_read_fill(req, file, pos, count, FUSE_READ);
841
	req->misc.read.attr_ver = fuse_get_attr_version(fc);
842
	if (fc->async_read) {
843
		req->ff = fuse_file_get(ff);
844
		req->end = fuse_readpages_end;
845
		fuse_request_send_background(fc, req);
846
	} else {
847
		fuse_request_send(fc, req);
848
		fuse_readpages_end(fc, req);
849
		fuse_put_request(fc, req);
850
	}
851
852
}

853
struct fuse_fill_data {
854
	struct fuse_req *req;
855
	struct file *file;
856
	struct inode *inode;
Maxim Patlasov's avatar
Maxim Patlasov committed
857
	unsigned nr_pages;
858
859
860
861
};

static int fuse_readpages_fill(void *_data, struct page *page)
{
862
	struct fuse_fill_data *data = _data;
863
864
865
866
	struct fuse_req *req = data->req;
	struct inode *inode = data->inode;
	struct fuse_conn *fc = get_fuse_conn(inode);

Miklos Szeredi's avatar
Miklos Szeredi committed
867
868
	fuse_wait_on_page_writeback(inode, page->index);

869
870
871
872
	if (req->num_pages &&
	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
Maxim Patlasov's avatar
Maxim Patlasov committed
873
874
		int nr_alloc = min_t(unsigned, data->nr_pages,
				     FUSE_MAX_PAGES_PER_REQ);
875
		fuse_send_readpages(req, data->file);
876
877
878
879
880
881
		if (fc->async_read)
			req = fuse_get_req_for_background(fc, nr_alloc);
		else
			req = fuse_get_req(fc, nr_alloc);

		data->req = req;
882
		if (IS_ERR(req)) {
883
			unlock_page(page);
884
			return PTR_ERR(req);
885
886
		}
	}
Maxim Patlasov's avatar
Maxim Patlasov committed
887
888
889
890
891
892

	if (WARN_ON(req->num_pages >= req->max_pages)) {
		fuse_put_request(fc, req);
		return -EIO;
	}

893
	page_cache_get(page);
894
	req->pages[req->num_pages] = page;
895
	req->page_descs[req->num_pages].length = PAGE_SIZE;
Miklos Szeredi's avatar
Miklos Szeredi committed
896
	req->num_pages++;
Maxim Patlasov's avatar
Maxim Patlasov committed
897
	data->nr_pages--;
898
899
900
901
902
903
904
905
	return 0;
}

static int fuse_readpages(struct file *file, struct address_space *mapping,
			  struct list_head *pages, unsigned nr_pages)
{
	struct inode *inode = mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
906
	struct fuse_fill_data data;
907
	int err;
Maxim Patlasov's avatar
Maxim Patlasov committed
908
	int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
909

910
	err = -EIO;
911
	if (is_bad_inode(inode))
912
		goto out;
913

914
	data.file = file;
915
	data.inode = inode;
916
917
918
919
	if (fc->async_read)
		data.req = fuse_get_req_for_background(fc, nr_alloc);
	else
		data.req = fuse_get_req(fc, nr_alloc);
Maxim Patlasov's avatar
Maxim Patlasov committed
920
	data.nr_pages = nr_pages;
921
	err = PTR_ERR(data.req);
922
	if (IS_ERR(data.req))
923
		goto out;
924
925

	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
926
927
	if (!err) {
		if (data.req->num_pages)
928
			fuse_send_readpages(data.req, file);
929
930
931
		else
			fuse_put_request(fc, data.req);
	}
932
out:
933
	return err;
934
935
}

Miklos Szeredi's avatar
Miklos Szeredi committed
936
937
938
939
static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
				  unsigned long nr_segs, loff_t pos)
{
	struct inode *inode = iocb->ki_filp->f_mapping->host;
940
	struct fuse_conn *fc = get_fuse_conn(inode);
Miklos Szeredi's avatar
Miklos Szeredi committed
941

942
943
944
945
946
947
948
	/*
	 * In auto invalidate mode, always update attributes on read.
	 * Otherwise, only update if we attempt to read past EOF (to ensure
	 * i_size is up to date).
	 */
	if (fc->auto_inval_data ||
	    (pos + iov_length(iov, nr_segs) > i_size_read(inode))) {
Miklos Szeredi's avatar
Miklos Szeredi committed
949
950
951
952
953
954
955
956
957
		int err;
		err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
		if (err)
			return err;
	}

	return generic_file_aio_read(iocb, iov, nr_segs, pos);
}

958
static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
959
			    loff_t pos, size_t count)
960
{
961
962
	struct fuse_write_in *inarg = &req->misc.write.in;
	struct fuse_write_out *outarg = &req->misc.write.out;
963

964
965
966
	inarg->fh = ff->fh;
	inarg->offset = pos;
	inarg->size = count;
967
	req->in.h.opcode = FUSE_WRITE;
968
	req->in.h.nodeid = ff->nodeid;
969
	req->in.numargs = 2;
970
	if (ff->fc->minor < 9)
971
972
973
		req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
	else
		req->in.args[0].size = sizeof(struct fuse_write_in);
974
	req->in.args[0].value = inarg;
975
976
977
	req->in.args[1].size = count;
	req->out.numargs = 1;
	req->out.args[0].size = sizeof(struct fuse_write_out);
978
979
980
	req->out.args[0].value = outarg;
}

981
static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
982
			      loff_t pos, size_t count, fl_owner_t owner)
983
{
984
	struct file *file = io->file;
985
986
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
987
988
	struct fuse_write_in *inarg = &req->misc.write.in;

989
	fuse_write_fill(req, ff, pos, count);
990
	inarg->flags = file->f_flags;
991
992
993
994
	if (owner != NULL) {
		inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
	}
995
996
997
998

	if (io->async)
		return fuse_async_req_send(fc, req, count, io);

999
	fuse_request_send(fc, req);
1000
	return req->misc.write.out.size;