file.c 33.4 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Chris Mason's avatar
Chris Mason committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
38
39
#include "tree-log.h"
#include "locking.h"
40
#include "compat.h"
Chris Mason's avatar
Chris Mason committed
41
42


Chris Mason's avatar
Chris Mason committed
43
44
45
/* simple helper to fault in pages and copy.  This should go away
 * and be replaced with calls into generic code.
 */
46
static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
47
48
					 int write_bytes,
					 struct page **prepared_pages,
49
					 const char __user *buf)
Chris Mason's avatar
Chris Mason committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{
	long page_fault = 0;
	int i;
	int offset = pos & (PAGE_CACHE_SIZE - 1);

	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
		size_t count = min_t(size_t,
				     PAGE_CACHE_SIZE - offset, write_bytes);
		struct page *page = prepared_pages[i];
		fault_in_pages_readable(buf, count);

		/* Copy data from userspace to the current page */
		kmap(page);
		page_fault = __copy_from_user(page_address(page) + offset,
					      buf, count);
		/* Flush processor's dcache for this page */
		flush_dcache_page(page);
		kunmap(page);
		buf += count;
		write_bytes -= count;

		if (page_fault)
			break;
	}
	return page_fault ? -EFAULT : 0;
}

Chris Mason's avatar
Chris Mason committed
77
78
79
/*
 * unlocks pages after btrfs_file_write is done with them
 */
80
static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
Chris Mason's avatar
Chris Mason committed
81
82
83
84
85
{
	size_t i;
	for (i = 0; i < num_pages; i++) {
		if (!pages[i])
			break;
Chris Mason's avatar
Chris Mason committed
86
87
88
89
		/* page checked is some magic around finding pages that
		 * have been modified without going through btrfs_set_page_dirty
		 * clear it here
		 */
Chris Mason's avatar
Chris Mason committed
90
		ClearPageChecked(pages[i]);
Chris Mason's avatar
Chris Mason committed
91
92
93
94
95
96
		unlock_page(pages[i]);
		mark_page_accessed(pages[i]);
		page_cache_release(pages[i]);
	}
}

Chris Mason's avatar
Chris Mason committed
97
98
99
100
101
102
103
104
/*
 * after copy_from_user, pages need to be dirtied and we need to make
 * sure holes are created between the current EOF and the start of
 * any next extents (if required).
 *
 * this also makes the decision about creating an inline extent vs
 * doing real data extents, marking pages dirty and delalloc as required.
 */
105
static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
Chris Mason's avatar
Chris Mason committed
106
107
108
109
110
111
112
113
				   struct btrfs_root *root,
				   struct file *file,
				   struct page **pages,
				   size_t num_pages,
				   loff_t pos,
				   size_t write_bytes)
{
	int err = 0;
114
	int i;
115
	struct inode *inode = fdentry(file)->d_inode;
116
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
117
118
	u64 hint_byte;
	u64 num_bytes;
119
120
121
122
	u64 start_pos;
	u64 end_of_last_block;
	u64 end_pos = pos + write_bytes;
	loff_t isize = i_size_read(inode);
Chris Mason's avatar
Chris Mason committed
123

124
	start_pos = pos & ~((u64)root->sectorsize - 1);
125
126
	num_bytes = (write_bytes + pos - start_pos +
		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
Chris Mason's avatar
Chris Mason committed
127

128
129
	end_of_last_block = start_pos + num_bytes - 1;

130
	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
Chris Mason's avatar
Chris Mason committed
131
	trans = btrfs_join_transaction(root, 1);
132
133
134
135
136
	if (!trans) {
		err = -ENOMEM;
		goto out_unlock;
	}
	btrfs_set_trans_block_group(trans, inode);
137
	hint_byte = 0;
138

139
	set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
140

141
142
143
	/* check for reserved extents on each page, we don't want
	 * to reset the delalloc bit on things that already have
	 * extents reserved.
144
	 */
145
146
147
148
149
150
	btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
	for (i = 0; i < num_pages; i++) {
		struct page *p = pages[i];
		SetPageUptodate(p);
		ClearPageChecked(p);
		set_page_dirty(p);
151
152
153
154
	}
	if (end_pos > isize) {
		i_size_write(inode, end_pos);
		btrfs_update_inode(trans, root, inode);
Chris Mason's avatar
Chris Mason committed
155
	}
156
	err = btrfs_end_transaction(trans, root);
157
out_unlock:
158
	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
Chris Mason's avatar
Chris Mason committed
159
160
161
	return err;
}

Chris Mason's avatar
Chris Mason committed
162
163
164
165
/*
 * this drops all the extents in the cache that intersect the range
 * [start, end].  Existing extents are split as required.
 */
166
167
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
			    int skip_pinned)
168
169
{
	struct extent_map *em;
170
171
	struct extent_map *split = NULL;
	struct extent_map *split2 = NULL;
172
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
173
	u64 len = end - start + 1;
174
175
	int ret;
	int testend = 1;
176
	unsigned long flags;
177
	int compressed = 0;
178

179
	WARN_ON(end < start);
180
	if (end == (u64)-1) {
181
		len = (u64)-1;
182
183
		testend = 0;
	}
184
	while (1) {
185
186
187
188
189
		if (!split)
			split = alloc_extent_map(GFP_NOFS);
		if (!split2)
			split2 = alloc_extent_map(GFP_NOFS);

190
		spin_lock(&em_tree->lock);
191
		em = lookup_extent_mapping(em_tree, start, len);
192
193
		if (!em) {
			spin_unlock(&em_tree->lock);
194
			break;
195
		}
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
		flags = em->flags;
		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
			spin_unlock(&em_tree->lock);
			if (em->start <= start &&
			    (!testend || em->start + em->len >= start + len)) {
				free_extent_map(em);
				break;
			}
			if (start < em->start) {
				len = em->start - start;
			} else {
				len = start + len - (em->start + em->len);
				start = em->start + em->len;
			}
			free_extent_map(em);
			continue;
		}
213
		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
214
		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
215
		remove_extent_mapping(em_tree, em);
216
217
218
219
220

		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    em->start < start) {
			split->start = em->start;
			split->len = start - em->start;
221
			split->orig_start = em->orig_start;
222
			split->block_start = em->block_start;
223
224
225
226
227
228

			if (compressed)
				split->block_len = em->block_len;
			else
				split->block_len = split->len;

229
			split->bdev = em->bdev;
230
			split->flags = flags;
231
232
233
234
235
236
237
238
239
240
241
242
243
			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = split2;
			split2 = NULL;
		}
		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    testend && em->start + em->len > start + len) {
			u64 diff = start + len - em->start;

			split->start = start + len;
			split->len = em->start + em->len - (start + len);
			split->bdev = em->bdev;
244
			split->flags = flags;
245

246
247
248
			if (compressed) {
				split->block_len = em->block_len;
				split->block_start = em->block_start;
249
				split->orig_start = em->orig_start;
250
251
252
			} else {
				split->block_len = split->len;
				split->block_start = em->block_start + diff;
253
				split->orig_start = split->start;
254
			}
255
256
257
258
259
260

			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = NULL;
		}
261
262
		spin_unlock(&em_tree->lock);

263
264
265
266
267
		/* once for us */
		free_extent_map(em);
		/* once for the tree*/
		free_extent_map(em);
	}
268
269
270
271
	if (split)
		free_extent_map(split);
	if (split2)
		free_extent_map(split2);
272
273
274
	return 0;
}

Chris Mason's avatar
Chris Mason committed
275
276
277
278
279
280
281
282
/*
 * this is very complex, but the basic idea is to drop all extents
 * in the range start - end.  hint_block is filled in with a block number
 * that would be a good hint to the block allocator for this file.
 *
 * If an extent intersects the range but is not entirely inside the range
 * it is either truncated or split.  Anything entirely inside the range
 * is deleted from the tree.
Chris Mason's avatar
Chris Mason committed
283
284
285
 *
 * inline_limit is used to tell this code which offsets in the file to keep
 * if they contain inline extents.
Chris Mason's avatar
Chris Mason committed
286
 */
287
noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
Chris Mason's avatar
Chris Mason committed
288
		       struct btrfs_root *root, struct inode *inode,
289
290
		       u64 start, u64 end, u64 locked_end,
		       u64 inline_limit, u64 *hint_byte)
Chris Mason's avatar
Chris Mason committed
291
{
292
293
	u64 extent_end = 0;
	u64 search_start = start;
294
	u64 ram_bytes = 0;
295
	u64 disk_bytenr = 0;
296
	u64 orig_locked_end = locked_end;
Chris Mason's avatar
Chris Mason committed
297
298
	u8 compression;
	u8 encryption;
299
	u16 other_encoding = 0;
300
	struct extent_buffer *leaf;
Chris Mason's avatar
Chris Mason committed
301
302
	struct btrfs_file_extent_item *extent;
	struct btrfs_path *path;
303
304
305
306
	struct btrfs_key key;
	struct btrfs_file_extent_item old;
	int keep;
	int slot;
Chris Mason's avatar
Chris Mason committed
307
	int bookend;
Yan Zheng's avatar
Yan Zheng committed
308
	int found_type = 0;
Chris Mason's avatar
Chris Mason committed
309
310
	int found_extent;
	int found_inline;
Chris Mason's avatar
Chris Mason committed
311
	int recow;
312
	int ret;
Chris Mason's avatar
Chris Mason committed
313

314
	inline_limit = 0;
315
	btrfs_drop_extent_cache(inode, start, end - 1, 0);
316

Chris Mason's avatar
Chris Mason committed
317
318
319
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
320
	while (1) {
Chris Mason's avatar
Chris Mason committed
321
		recow = 0;
Chris Mason's avatar
Chris Mason committed
322
323
324
325
326
327
328
329
330
331
332
333
		btrfs_release_path(root, path);
		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
					       search_start, -1);
		if (ret < 0)
			goto out;
		if (ret > 0) {
			if (path->slots[0] == 0) {
				ret = 0;
				goto out;
			}
			path->slots[0]--;
		}
334
next_slot:
Chris Mason's avatar
Chris Mason committed
335
336
337
338
		keep = 0;
		bookend = 0;
		found_extent = 0;
		found_inline = 0;
Chris Mason's avatar
Chris Mason committed
339
340
		compression = 0;
		encryption = 0;
Chris Mason's avatar
Chris Mason committed
341
		extent = NULL;
342
		leaf = path->nodes[0];
Chris Mason's avatar
Chris Mason committed
343
		slot = path->slots[0];
344
		ret = 0;
345
		btrfs_item_key_to_cpu(leaf, &key, slot);
346
347
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
		    key.offset >= end) {
Chris Mason's avatar
Chris Mason committed
348
349
			goto out;
		}
350
351
		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
		    key.objectid != inode->i_ino) {
Chris Mason's avatar
Chris Mason committed
352
353
			goto out;
		}
Chris Mason's avatar
Chris Mason committed
354
		if (recow) {
355
			search_start = max(key.offset, start);
Chris Mason's avatar
Chris Mason committed
356
357
			continue;
		}
358
359
360
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
			extent = btrfs_item_ptr(leaf, slot,
						struct btrfs_file_extent_item);
361
			found_type = btrfs_file_extent_type(leaf, extent);
362
363
364
365
366
367
			compression = btrfs_file_extent_compression(leaf,
								    extent);
			encryption = btrfs_file_extent_encryption(leaf,
								  extent);
			other_encoding = btrfs_file_extent_other_encoding(leaf,
								  extent);
Yan Zheng's avatar
Yan Zheng committed
368
369
			if (found_type == BTRFS_FILE_EXTENT_REG ||
			    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
370
371
372
373
374
375
				extent_end =
				     btrfs_file_extent_disk_bytenr(leaf,
								   extent);
				if (extent_end)
					*hint_byte = extent_end;

376
				extent_end = key.offset +
377
				     btrfs_file_extent_num_bytes(leaf, extent);
378
379
				ram_bytes = btrfs_file_extent_ram_bytes(leaf,
								extent);
380
381
382
383
				found_extent = 1;
			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
				found_inline = 1;
				extent_end = key.offset +
384
				     btrfs_file_extent_inline_len(leaf, extent);
385
386
387
			}
		} else {
			extent_end = search_start;
Chris Mason's avatar
Chris Mason committed
388
389
390
		}

		/* we found nothing we can drop */
391
392
393
394
		if ((!found_extent && !found_inline) ||
		    search_start >= extent_end) {
			int nextret;
			u32 nritems;
395
			nritems = btrfs_header_nritems(leaf);
396
397
398
399
			if (slot >= nritems - 1) {
				nextret = btrfs_next_leaf(root, path);
				if (nextret)
					goto out;
Chris Mason's avatar
Chris Mason committed
400
				recow = 1;
401
402
403
404
			} else {
				path->slots[0]++;
			}
			goto next_slot;
Chris Mason's avatar
Chris Mason committed
405
406
		}

407
		if (end <= extent_end && start >= key.offset && found_inline)
408
			*hint_byte = EXTENT_MAP_INLINE;
Zheng Yan's avatar
Zheng Yan committed
409
410
411
412

		if (found_extent) {
			read_extent_buffer(leaf, &old, (unsigned long)extent,
					   sizeof(old));
413
		}
Zheng Yan's avatar
Zheng Yan committed
414

Chris Mason's avatar
Chris Mason committed
415
		if (end < extent_end && end >= key.offset) {
416
			bookend = 1;
417
			if (found_inline && start <= key.offset)
418
				keep = 1;
Chris Mason's avatar
Chris Mason committed
419
		}
Yan Zheng's avatar
Yan Zheng committed
420

421
422
423
424
425
426
427
428
429
430
431
432
433
		if (bookend && found_extent) {
			if (locked_end < extent_end) {
				ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
						locked_end, extent_end - 1,
						GFP_NOFS);
				if (!ret) {
					btrfs_release_path(root, path);
					lock_extent(&BTRFS_I(inode)->io_tree,
						locked_end, extent_end - 1,
						GFP_NOFS);
					locked_end = extent_end;
					continue;
				}
Yan Zheng's avatar
Yan Zheng committed
434
435
				locked_end = extent_end;
			}
436
437
438
439
			disk_bytenr = le64_to_cpu(old.disk_bytenr);
			if (disk_bytenr != 0) {
				ret = btrfs_inc_extent_ref(trans, root,
					   disk_bytenr,
440
441
442
443
					   le64_to_cpu(old.disk_num_bytes), 0,
					   root->root_key.objectid,
					   key.objectid, key.offset -
					   le64_to_cpu(old.offset));
444
445
				BUG_ON(ret);
			}
Yan Zheng's avatar
Yan Zheng committed
446
447
448
449
450
451
452
453
		}

		if (found_inline) {
			u64 mask = root->sectorsize - 1;
			search_start = (extent_end + mask) & ~mask;
		} else
			search_start = extent_end;

Chris Mason's avatar
Chris Mason committed
454
455
456
457
458
		/* truncate existing extent */
		if (start > key.offset) {
			u64 new_num;
			u64 old_num;
			keep = 1;
459
			WARN_ON(start & (root->sectorsize - 1));
Chris Mason's avatar
Chris Mason committed
460
			if (found_extent) {
461
462
463
464
465
466
467
468
				new_num = start - key.offset;
				old_num = btrfs_file_extent_num_bytes(leaf,
								      extent);
				*hint_byte =
					btrfs_file_extent_disk_bytenr(leaf,
								      extent);
				if (btrfs_file_extent_disk_bytenr(leaf,
								  extent)) {
469
470
					inode_sub_bytes(inode, old_num -
							new_num);
Chris Mason's avatar
Chris Mason committed
471
				}
472
473
				btrfs_set_file_extent_num_bytes(leaf,
							extent, new_num);
474
				btrfs_mark_buffer_dirty(leaf);
475
476
477
			} else if (key.offset < inline_limit &&
				   (end > extent_end) &&
				   (inline_limit < extent_end)) {
478
479
				u32 new_size;
				new_size = btrfs_file_extent_calc_inline_size(
480
						   inline_limit - key.offset);
481
482
				inode_sub_bytes(inode, extent_end -
						inline_limit);
Chris Mason's avatar
Chris Mason committed
483
484
485
486
487
488
				btrfs_set_file_extent_ram_bytes(leaf, extent,
							new_size);
				if (!compression && !encryption) {
					btrfs_truncate_item(trans, root, path,
							    new_size, 1);
				}
Chris Mason's avatar
Chris Mason committed
489
490
491
492
			}
		}
		/* delete the entire extent */
		if (!keep) {
493
494
495
			if (found_inline)
				inode_sub_bytes(inode, extent_end -
						key.offset);
Chris Mason's avatar
Chris Mason committed
496
			ret = btrfs_del_item(trans, root, path);
497
			/* TODO update progress marker and return */
Chris Mason's avatar
Chris Mason committed
498
499
			BUG_ON(ret);
			extent = NULL;
Zheng Yan's avatar
Zheng Yan committed
500
501
			btrfs_release_path(root, path);
			/* the extent will be freed later */
Chris Mason's avatar
Chris Mason committed
502
		}
503
		if (bookend && found_inline && start <= key.offset) {
504
505
			u32 new_size;
			new_size = btrfs_file_extent_calc_inline_size(
506
						   extent_end - end);
507
			inode_sub_bytes(inode, end - key.offset);
Chris Mason's avatar
Chris Mason committed
508
509
510
511
512
			btrfs_set_file_extent_ram_bytes(leaf, extent,
							new_size);
			if (!compression && !encryption)
				ret = btrfs_truncate_item(trans, root, path,
							  new_size, 0);
Zheng Yan's avatar
Zheng Yan committed
513
			BUG_ON(ret);
514
		}
Chris Mason's avatar
Chris Mason committed
515
516
517
518
519
520
		/* create bookend, splitting the extent in two */
		if (bookend && found_extent) {
			struct btrfs_key ins;
			ins.objectid = inode->i_ino;
			ins.offset = end;
			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
521

Chris Mason's avatar
Chris Mason committed
522
			btrfs_release_path(root, path);
523
			path->leave_spinning = 1;
Chris Mason's avatar
Chris Mason committed
524
525
			ret = btrfs_insert_empty_item(trans, root, path, &ins,
						      sizeof(*extent));
Zheng Yan's avatar
Zheng Yan committed
526
			BUG_ON(ret);
527

528
529
530
531
532
533
			leaf = path->nodes[0];
			extent = btrfs_item_ptr(leaf, path->slots[0],
						struct btrfs_file_extent_item);
			write_extent_buffer(leaf, &old,
					    (unsigned long)extent, sizeof(old));

534
535
536
537
538
539
			btrfs_set_file_extent_compression(leaf, extent,
							  compression);
			btrfs_set_file_extent_encryption(leaf, extent,
							 encryption);
			btrfs_set_file_extent_other_encoding(leaf, extent,
							     other_encoding);
540
			btrfs_set_file_extent_offset(leaf, extent,
541
542
543
544
545
				    le64_to_cpu(old.offset) + end - key.offset);
			WARN_ON(le64_to_cpu(old.num_bytes) <
				(extent_end - end));
			btrfs_set_file_extent_num_bytes(leaf, extent,
							extent_end - end);
546
547
548
549
550
551
552
553
554

			/*
			 * set the ram bytes to the size of the full extent
			 * before splitting.  This is a worst case flag,
			 * but its the best we can do because we don't know
			 * how splitting affects compression
			 */
			btrfs_set_file_extent_ram_bytes(leaf, extent,
							ram_bytes);
Yan Zheng's avatar
Yan Zheng committed
555
			btrfs_set_file_extent_type(leaf, extent, found_type);
556

557
			btrfs_unlock_up_safe(path, 1);
Chris Mason's avatar
Chris Mason committed
558
			btrfs_mark_buffer_dirty(path->nodes[0]);
559
			btrfs_set_lock_blocking(path->nodes[0]);
Zheng Yan's avatar
Zheng Yan committed
560

561
			path->leave_spinning = 0;
Zheng Yan's avatar
Zheng Yan committed
562
			btrfs_release_path(root, path);
563
			if (disk_bytenr != 0)
564
				inode_add_bytes(inode, extent_end - end);
Zheng Yan's avatar
Zheng Yan committed
565
566
567
		}

		if (found_extent && !keep) {
568
			u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr);
Zheng Yan's avatar
Zheng Yan committed
569

570
			if (old_disk_bytenr != 0) {
571
572
				inode_sub_bytes(inode,
						le64_to_cpu(old.num_bytes));
Zheng Yan's avatar
Zheng Yan committed
573
				ret = btrfs_free_extent(trans, root,
574
						old_disk_bytenr,
Zheng Yan's avatar
Zheng Yan committed
575
						le64_to_cpu(old.disk_num_bytes),
576
577
578
						0, root->root_key.objectid,
						key.objectid, key.offset -
						le64_to_cpu(old.offset));
Zheng Yan's avatar
Zheng Yan committed
579
				BUG_ON(ret);
580
				*hint_byte = old_disk_bytenr;
Zheng Yan's avatar
Zheng Yan committed
581
582
583
584
			}
		}

		if (search_start >= end) {
Chris Mason's avatar
Chris Mason committed
585
586
587
588
589
590
			ret = 0;
			goto out;
		}
	}
out:
	btrfs_free_path(path);
591
592
593
	if (locked_end > orig_locked_end) {
		unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
			      locked_end - 1, GFP_NOFS);
Yan Zheng's avatar
Yan Zheng committed
594
	}
Chris Mason's avatar
Chris Mason committed
595
596
597
	return ret;
}

Yan Zheng's avatar
Yan Zheng committed
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
static int extent_mergeable(struct extent_buffer *leaf, int slot,
			    u64 objectid, u64 bytenr, u64 *start, u64 *end)
{
	struct btrfs_file_extent_item *fi;
	struct btrfs_key key;
	u64 extent_end;

	if (slot < 0 || slot >= btrfs_header_nritems(leaf))
		return 0;

	btrfs_item_key_to_cpu(leaf, &key, slot);
	if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
		return 0;

	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
	    btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
	    btrfs_file_extent_compression(leaf, fi) ||
	    btrfs_file_extent_encryption(leaf, fi) ||
	    btrfs_file_extent_other_encoding(leaf, fi))
		return 0;

	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
	if ((*start && *start != key.offset) || (*end && *end != extent_end))
		return 0;

	*start = key.offset;
	*end = extent_end;
	return 1;
}

/*
 * Mark extent in the range start - end as written.
 *
 * This changes extent type from 'pre-allocated' to 'regular'. If only
 * part of extent is marked as written, the extent will be split into
 * two or three.
 */
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct inode *inode, u64 start, u64 end)
{
	struct extent_buffer *leaf;
	struct btrfs_path *path;
	struct btrfs_file_extent_item *fi;
	struct btrfs_key key;
	u64 bytenr;
	u64 num_bytes;
	u64 extent_end;
647
	u64 orig_offset;
Yan Zheng's avatar
Yan Zheng committed
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
	u64 other_start;
	u64 other_end;
	u64 split = start;
	u64 locked_end = end;
	int extent_type;
	int split_end = 1;
	int ret;

	btrfs_drop_extent_cache(inode, start, end - 1, 0);

	path = btrfs_alloc_path();
	BUG_ON(!path);
again:
	key.objectid = inode->i_ino;
	key.type = BTRFS_EXTENT_DATA_KEY;
	if (split == start)
		key.offset = split;
	else
		key.offset = split - 1;

	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
	if (ret > 0 && path->slots[0] > 0)
		path->slots[0]--;

	leaf = path->nodes[0];
	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
	BUG_ON(key.objectid != inode->i_ino ||
	       key.type != BTRFS_EXTENT_DATA_KEY);
	fi = btrfs_item_ptr(leaf, path->slots[0],
			    struct btrfs_file_extent_item);
	extent_type = btrfs_file_extent_type(leaf, fi);
	BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC);
	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
	BUG_ON(key.offset > start || extent_end < end);

	bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
	num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
685
	orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
Yan Zheng's avatar
Yan Zheng committed
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700

	if (key.offset == start)
		split = end;

	if (key.offset == start && extent_end == end) {
		int del_nr = 0;
		int del_slot = 0;
		other_start = end;
		other_end = 0;
		if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			extent_end = other_end;
			del_slot = path->slots[0] + 1;
			del_nr++;
			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
701
702
						0, root->root_key.objectid,
						inode->i_ino, orig_offset);
Yan Zheng's avatar
Yan Zheng committed
703
704
705
706
707
708
709
710
711
712
			BUG_ON(ret);
		}
		other_start = 0;
		other_end = start;
		if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			key.offset = other_start;
			del_slot = path->slots[0];
			del_nr++;
			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
713
714
						0, root->root_key.objectid,
						inode->i_ino, orig_offset);
Yan Zheng's avatar
Yan Zheng committed
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
			BUG_ON(ret);
		}
		split_end = 0;
		if (del_nr == 0) {
			btrfs_set_file_extent_type(leaf, fi,
						   BTRFS_FILE_EXTENT_REG);
			goto done;
		}

		fi = btrfs_item_ptr(leaf, del_slot - 1,
				    struct btrfs_file_extent_item);
		btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
		btrfs_set_file_extent_num_bytes(leaf, fi,
						extent_end - key.offset);
		btrfs_mark_buffer_dirty(leaf);

		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
		BUG_ON(ret);
733
		goto release;
Yan Zheng's avatar
Yan Zheng committed
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
	} else if (split == start) {
		if (locked_end < extent_end) {
			ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
					locked_end, extent_end - 1, GFP_NOFS);
			if (!ret) {
				btrfs_release_path(root, path);
				lock_extent(&BTRFS_I(inode)->io_tree,
					locked_end, extent_end - 1, GFP_NOFS);
				locked_end = extent_end;
				goto again;
			}
			locked_end = extent_end;
		}
		btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
	} else  {
		BUG_ON(key.offset != start);
		key.offset = split;
751
752
753
		btrfs_set_file_extent_offset(leaf, fi, key.offset -
					     orig_offset);
		btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
Yan Zheng's avatar
Yan Zheng committed
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
		btrfs_set_item_key_safe(trans, root, path, &key);
		extent_end = split;
	}

	if (extent_end == end) {
		split_end = 0;
		extent_type = BTRFS_FILE_EXTENT_REG;
	}
	if (extent_end == end && split == start) {
		other_start = end;
		other_end = 0;
		if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			path->slots[0]++;
			fi = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
			key.offset = split;
			btrfs_set_item_key_safe(trans, root, path, &key);
772
773
			btrfs_set_file_extent_offset(leaf, fi, key.offset -
						     orig_offset);
Yan Zheng's avatar
Yan Zheng committed
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
			btrfs_set_file_extent_num_bytes(leaf, fi,
							other_end - split);
			goto done;
		}
	}
	if (extent_end == end && split == end) {
		other_start = 0;
		other_end = start;
		if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			path->slots[0]--;
			fi = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
			btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
							other_start);
			goto done;
		}
	}

	btrfs_mark_buffer_dirty(leaf);
794

795
796
797
	ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
				   root->root_key.objectid,
				   inode->i_ino, orig_offset);
798
	BUG_ON(ret);
Yan Zheng's avatar
Yan Zheng committed
799
800
801
802
803
804
805
806
807
808
809
810
811
	btrfs_release_path(root, path);

	key.offset = start;
	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
	BUG_ON(ret);

	leaf = path->nodes[0];
	fi = btrfs_item_ptr(leaf, path->slots[0],
			    struct btrfs_file_extent_item);
	btrfs_set_file_extent_generation(leaf, fi, trans->transid);
	btrfs_set_file_extent_type(leaf, fi, extent_type);
	btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
	btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
812
	btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
Yan Zheng's avatar
Yan Zheng committed
813
814
815
816
817
818
819
	btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
	btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
	btrfs_set_file_extent_compression(leaf, fi, 0);
	btrfs_set_file_extent_encryption(leaf, fi, 0);
	btrfs_set_file_extent_other_encoding(leaf, fi, 0);
done:
	btrfs_mark_buffer_dirty(leaf);
820
821

release:
Yan Zheng's avatar
Yan Zheng committed
822
823
824
825
826
827
828
829
830
831
832
833
834
	btrfs_release_path(root, path);
	if (split_end && split == start) {
		split = end;
		goto again;
	}
	if (locked_end > end) {
		unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
			      GFP_NOFS);
	}
	btrfs_free_path(path);
	return 0;
}

Chris Mason's avatar
Chris Mason committed
835
/*
Chris Mason's avatar
Chris Mason committed
836
837
838
 * this gets pages into the page cache and locks them down, it also properly
 * waits for data=ordered extents to finish before allowing the pages to be
 * modified.
Chris Mason's avatar
Chris Mason committed
839
 */
840
static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
841
842
843
			 struct page **pages, size_t num_pages,
			 loff_t pos, unsigned long first_index,
			 unsigned long last_index, size_t write_bytes)
Chris Mason's avatar
Chris Mason committed
844
845
846
{
	int i;
	unsigned long index = pos >> PAGE_CACHE_SHIFT;
847
	struct inode *inode = fdentry(file)->d_inode;
Chris Mason's avatar
Chris Mason committed
848
	int err = 0;
849
	u64 start_pos;
850
	u64 last_pos;
851

852
	start_pos = pos & ~((u64)root->sectorsize - 1);
853
	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
Chris Mason's avatar
Chris Mason committed
854

Yan Zheng's avatar
Yan Zheng committed
855
856
857
858
859
860
	if (start_pos > inode->i_size) {
		err = btrfs_cont_expand(inode, start_pos);
		if (err)
			return err;
	}

Chris Mason's avatar
Chris Mason committed
861
	memset(pages, 0, num_pages * sizeof(struct page *));
862
again:
Chris Mason's avatar
Chris Mason committed
863
864
865
866
	for (i = 0; i < num_pages; i++) {
		pages[i] = grab_cache_page(inode->i_mapping, index + i);
		if (!pages[i]) {
			err = -ENOMEM;
867
			BUG_ON(1);
Chris Mason's avatar
Chris Mason committed
868
		}
Chris Mason's avatar
Chris Mason committed
869
		wait_on_page_writeback(pages[i]);
Chris Mason's avatar
Chris Mason committed
870
	}
871
	if (start_pos < inode->i_size) {
872
		struct btrfs_ordered_extent *ordered;
873
874
		lock_extent(&BTRFS_I(inode)->io_tree,
			    start_pos, last_pos - 1, GFP_NOFS);
875
876
		ordered = btrfs_lookup_first_ordered_extent(inode,
							    last_pos - 1);
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
		if (ordered &&
		    ordered->file_offset + ordered->len > start_pos &&
		    ordered->file_offset < last_pos) {
			btrfs_put_ordered_extent(ordered);
			unlock_extent(&BTRFS_I(inode)->io_tree,
				      start_pos, last_pos - 1, GFP_NOFS);
			for (i = 0; i < num_pages; i++) {
				unlock_page(pages[i]);
				page_cache_release(pages[i]);
			}
			btrfs_wait_ordered_range(inode, start_pos,
						 last_pos - start_pos);
			goto again;
		}
		if (ordered)
			btrfs_put_ordered_extent(ordered);

894
895
896
		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
				  GFP_NOFS);
897
898
		unlock_extent(&BTRFS_I(inode)->io_tree,
			      start_pos, last_pos - 1, GFP_NOFS);
899
	}
900
	for (i = 0; i < num_pages; i++) {
901
		clear_page_dirty_for_io(pages[i]);
902
903
904
		set_page_extent_mapped(pages[i]);
		WARN_ON(!PageLocked(pages[i]));
	}
Chris Mason's avatar
Chris Mason committed
905
906
907
908
909
910
911
	return 0;
}

static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
{
	loff_t pos;
912
913
914
	loff_t start_pos;
	ssize_t num_written = 0;
	ssize_t err = 0;
Chris Mason's avatar
Chris Mason committed
915
	int ret = 0;
916
	struct inode *inode = fdentry(file)->d_inode;
Chris Mason's avatar
Chris Mason committed
917
	struct btrfs_root *root = BTRFS_I(inode)->root;
918
919
	struct page **pages = NULL;
	int nrptrs;
Chris Mason's avatar
Chris Mason committed
920
921
922
	struct page *pinned[2];
	unsigned long first_index;
	unsigned long last_index;
923
924
925
926
	int will_write;

	will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) ||
		      (file->f_flags & O_DIRECT));
927
928
929

	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
Chris Mason's avatar
Chris Mason committed
930
931
	pinned[0] = NULL;
	pinned[1] = NULL;
932

Chris Mason's avatar
Chris Mason committed
933
	pos = *ppos;
934
935
	start_pos = pos;

Chris Mason's avatar
Chris Mason committed
936
937
938
939
	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
	current->backing_dev_info = inode->i_mapping->backing_dev_info;
	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
	if (err)
940
		goto out_nolock;
Chris Mason's avatar
Chris Mason committed
941
	if (count == 0)
942
		goto out_nolock;
943

944
	err = file_remove_suid(file);
Chris Mason's avatar
Chris Mason committed
945
	if (err)
946
		goto out_nolock;
Chris Mason's avatar
Chris Mason committed
947
948
	file_update_time(file);

949
	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
Chris Mason's avatar
Chris Mason committed
950
951

	mutex_lock(&inode->i_mutex);
952
	BTRFS_I(inode)->sequence++;
Chris Mason's avatar
Chris Mason committed
953
954
955
956
957
958
959
960
961
962
963
	first_index = pos >> PAGE_CACHE_SHIFT;
	last_index = (pos + count) >> PAGE_CACHE_SHIFT;

	/*
	 * there are lots of better ways to do this, but this code
	 * makes sure the first and last page in the file range are
	 * up to date and ready for cow
	 */
	if ((pos & (PAGE_CACHE_SIZE - 1))) {
		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
		if (!PageUptodate(pinned[0])) {
Chris Mason's avatar
Chris Mason committed
964
			ret = btrfs_readpage(NULL, pinned[0]);
Chris Mason's avatar
Chris Mason committed
965
966
967
968
969
970
971
972
973
			BUG_ON(ret);
			wait_on_page_locked(pinned[0]);
		} else {
			unlock_page(pinned[0]);
		}
	}
	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
		if (!PageUptodate(pinned[1])) {
Chris Mason's avatar
Chris Mason committed
974
			ret = btrfs_readpage(NULL, pinned[1]);
Chris Mason's avatar
Chris Mason committed
975
976
977
978
979
980
981
			BUG_ON(ret);
			wait_on_page_locked(pinned[1]);
		} else {
			unlock_page(pinned[1]);
		}
	}

982
	while (count > 0) {
Chris Mason's avatar
Chris Mason committed
983
		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
984
985
		size_t write_bytes = min(count, nrptrs *
					(size_t)PAGE_CACHE_SIZE -
986
					 offset);
Chris Mason's avatar
Chris Mason committed
987
988
989
		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
					PAGE_CACHE_SHIFT;

990
		WARN_ON(num_pages > nrptrs);
991
		memset(pages, 0, sizeof(struct page *) * nrptrs);
992

993
		ret = btrfs_check_data_free_space(root, inode, write_bytes);
994
995
996
		if (ret)
			goto out;

Chris Mason's avatar
Chris Mason committed
997
998
		ret = prepare_pages(root, file, pages, num_pages,
				    pos, first_index, last_index,
999
				    write_bytes);
1000
1001
1002
		if (ret) {
			btrfs_free_reserved_data_space(root, inode,
						       write_bytes);
1003
			goto out;
1004
		}
Chris Mason's avatar
Chris Mason committed
1005
1006
1007

		ret = btrfs_copy_from_user(pos, num_pages,
					   write_bytes, pages, buf);
1008
		if (ret) {
1009
1010
			btrfs_free_reserved_data_space(root, inode,
						       write_bytes);
1011
1012
1013
			btrfs_drop_pages(pages, num_pages);
			goto out;
		}
Chris Mason's avatar
Chris Mason committed
1014
1015
1016
1017

		ret = dirty_and_release_pages(NULL, root, file, pages,
					      num_pages, pos, write_bytes);
		btrfs_drop_pages(pages, num_pages);
1018
1019
1020
		if (ret) {
			btrfs_free_reserved_data_space(root, inode,
						       write_bytes);
1021
			goto out;
1022
		}
Chris Mason's avatar
Chris Mason committed
1023

1024
1025
1026
		if (will_write) {
			btrfs_fdatawrite_range(inode->i_mapping, pos,
					       pos + write_bytes - 1,
1027
					       WB_SYNC_ALL);
1028
1029
1030
1031
1032
1033
1034
1035
1036
		} else {
			balance_dirty_pages_ratelimited_nr(inode->i_mapping,
							   num_pages);
			if (num_pages <
			    (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
				btrfs_btree_balance_dirty(root, 1);
			btrfs_throttle(root);
		}

Chris Mason's avatar
Chris Mason committed
1037
1038
1039
1040
1041
1042
1043
1044
		buf += write_bytes;
		count -= write_bytes;
		pos += write_bytes;
		num_written += write_bytes;

		cond_resched();
	}
out:
1045
	mutex_unlock(&inode->i_mutex);
1046
1047
	if (ret)
		err = ret;
1048

1049
out_nolock:
1050
	kfree(pages);
Chris Mason's avatar
Chris Mason committed
1051
1052
1053
1054
1055
	if (pinned[0])
		page_cache_release(pinned[0]);
	if (pinned[1])
		page_cache_release(pinned[1]);
	*ppos = pos;
1056

1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
	/*
	 * we want to make sure fsync finds this change
	 * but we haven't joined a transaction running right now.
	 *
	 * Later on, someone is sure to update the inode and get the
	 * real transid recorded.
	 *
	 * We set last_trans now to the fs_info generation + 1,
	 * this will either be one more than the running transaction
	 * or the generation used for the next transaction if there isn't
	 * one running right now.
	 */
	BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;

1071
	if (num_written > 0 && will_write) {
1072
1073
		struct btrfs_trans_handle *trans;

1074
1075
		err = btrfs_wait_ordered_range(inode, start_pos, num_written);
		if (err)
1076
			num_written = err;
1077

1078
1079
1080
1081
1082
		if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
			trans = btrfs_start_transaction(root, 1);
			ret = btrfs_log_dentry_safe(trans, root,
						    file->f_dentry);
			if (ret == 0) {
1083
1084
1085
1086
1087
				ret = btrfs_sync_log(trans, root);
				if (ret == 0)
					btrfs_end_transaction(trans, root);
				else
					btrfs_commit_transaction(trans, root);
1088
1089
1090
1091
1092
1093
1094
1095
			} else {
				btrfs_commit_transaction(trans, root);
			}
		}
		if (file->f_flags & O_DIRECT) {
			invalidate_mapping_pages(inode->i_mapping,
			      start_pos >> PAGE_CACHE_SHIFT,
			     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1096
		}
1097
	}
Chris Mason's avatar
Chris Mason committed
1098
1099
1100
1101
	current->backing_dev_info = NULL;
	return num_written ? num_written : err;
}

1102
int btrfs_release_file(struct inode *inode, struct file *filp)
1103
{
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
	/*
	 * ordered_data_close is set by settattr when we are about to truncate
	 * a file from a non-zero size to a zero size.  This tries to
	 * flush down new bytes that may have been written if the
	 * application were using truncate to replace a file in place.
	 */
	if (BTRFS_I(inode)->ordered_data_close) {
		BTRFS_I(inode)->ordered_data_close = 0;
		btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
		if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
			filemap_flush(inode->i_mapping);
	}
Sage Weil's avatar
Sage Weil committed
1116
1117
	if (filp->private_data)
		btrfs_ioctl_trans_end(filp);
1118
1119
1120
	return 0;
}

Chris Mason's avatar
Chris Mason committed
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
/*
 * fsync call for both files and directories.  This logs the inode into
 * the tree log instead of forcing full commits whenever possible.
 *
 * It needs to call filemap_fdatawait so that all ordered extent updates are
 * in the metadata btree are up to date for copying to the log.
 *
 * It drops the inode mutex before doing the tree log commit.  This is an
 * important optimization for directories because holding the mutex prevents
 * new operations on the dir while we write to disk.
 */
1132
int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
Chris Mason's avatar
Chris Mason committed
1133
1134
1135
{
	struct inode *inode = dentry->d_inode;
	struct btrfs_root *root = BTRFS_I(inode)->root;
1136
	int ret = 0;
Chris Mason's avatar
Chris Mason committed
1137
1138
1139
	struct btrfs_trans_handle *trans;

	/*
1140
1141
	 * check the transaction that last modified this inode
	 * and see if its already been committed
Chris Mason's avatar
Chris Mason committed
1142
	 */
1143
1144
	if (!BTRFS_I(inode)->last_trans)
		goto out;
1145

1146
1147
1148
1149
1150
1151
1152
1153
1154
	mutex_lock(&root->fs_info->trans_mutex);
	if (BTRFS_I(inode)->last_trans <=
	    root->fs_info->last_trans_committed) {
		BTRFS_I(inode)->last_trans = 0;
		mutex_unlock(&root->fs_info->trans_mutex);
		goto out;
	}
	mutex_unlock(&root->fs_info->trans_mutex);

Yan Zheng's avatar
Yan Zheng committed
1155
	root->log_batch++;
1156
1157
	filemap_fdatawrite(inode->i_mapping);
	btrfs_wait_ordered_range(inode, 0, (u64)-1);
Yan Zheng's avatar
Yan Zheng committed
1158
	root->log_batch++;
1159

1160
1161
	if (datasync && !(inode->i_state & I_DIRTY_PAGES))
		goto out;
1162
	/*
1163
1164
	 * ok we haven't committed the transaction yet, lets do a commit
	 */
1165
	if (file && file->private_data)
Sage Weil's avatar
Sage Weil committed
1166
1167
		btrfs_ioctl_trans_end(file);

Chris Mason's avatar
Chris Mason committed
1168
1169
1170
1171
1172
	trans = btrfs_start_transaction(root, 1);
	if (!trans) {
		ret = -ENOMEM;
		goto out;
	}
1173

1174
	ret = btrfs_log_dentry_safe(trans, root, dentry);
1175
	if (ret