file.c 29.7 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Chris Mason's avatar
Chris Mason committed
19
20
21
22
23
24
25
26
27
28
29
30
31
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
32
#include <linux/version.h>
Chris Mason's avatar
Chris Mason committed
33
34
35
36
37
38
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
39
40
#include "tree-log.h"
#include "locking.h"
41
#include "compat.h"
Chris Mason's avatar
Chris Mason committed
42
43


44
45
46
47
static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
					 int write_bytes,
					 struct page **prepared_pages,
					 const char __user * buf)
Chris Mason's avatar
Chris Mason committed
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
{
	long page_fault = 0;
	int i;
	int offset = pos & (PAGE_CACHE_SIZE - 1);

	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
		size_t count = min_t(size_t,
				     PAGE_CACHE_SIZE - offset, write_bytes);
		struct page *page = prepared_pages[i];
		fault_in_pages_readable(buf, count);

		/* Copy data from userspace to the current page */
		kmap(page);
		page_fault = __copy_from_user(page_address(page) + offset,
					      buf, count);
		/* Flush processor's dcache for this page */
		flush_dcache_page(page);
		kunmap(page);
		buf += count;
		write_bytes -= count;

		if (page_fault)
			break;
	}
	return page_fault ? -EFAULT : 0;
}

75
static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
Chris Mason's avatar
Chris Mason committed
76
77
78
79
80
{
	size_t i;
	for (i = 0; i < num_pages; i++) {
		if (!pages[i])
			break;
Chris Mason's avatar
Chris Mason committed
81
		ClearPageChecked(pages[i]);
Chris Mason's avatar
Chris Mason committed
82
83
84
85
86
87
		unlock_page(pages[i]);
		mark_page_accessed(pages[i]);
		page_cache_release(pages[i]);
	}
}

88
static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
89
				struct btrfs_root *root, struct inode *inode,
90
91
92
				u64 offset, size_t size,
				struct page **pages, size_t page_offset,
				int num_pages)
93
94
95
{
	struct btrfs_key key;
	struct btrfs_path *path;
96
97
98
	struct extent_buffer *leaf;
	char *kaddr;
	unsigned long ptr;
99
	struct btrfs_file_extent_item *ei;
100
	struct page *page;
101
102
103
	u32 datasize;
	int err = 0;
	int ret;
104
105
	int i;
	ssize_t cur_size;
106
107
108
109
110
111
112
113
114
115
116

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	btrfs_set_trans_block_group(trans, inode);

	key.objectid = inode->i_ino;
	key.offset = offset;
	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);

117
118
	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
	if (ret < 0) {
119
120
121
		err = ret;
		goto fail;
	}
122
	if (ret == 1) {
123
124
125
126
127
		struct btrfs_key found_key;

		if (path->slots[0] == 0)
			goto insert;

128
129
		path->slots[0]--;
		leaf = path->nodes[0];
130
131
132
133
134
135
136
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);

		if (found_key.objectid != inode->i_ino)
			goto insert;

		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
			goto insert;
137
138
139
140
141
142
143
144
145
146
147
148
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);

		if (btrfs_file_extent_type(leaf, ei) !=
		    BTRFS_FILE_EXTENT_INLINE) {
			goto insert;
		}
		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		ret = 0;
	}
	if (ret == 0) {
		u32 found_size;
149
		u64 found_end;
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

		leaf = path->nodes[0];
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);

		if (btrfs_file_extent_type(leaf, ei) !=
		    BTRFS_FILE_EXTENT_INLINE) {
			err = ret;
			btrfs_print_leaf(root, leaf);
			printk("found wasn't inline offset %Lu inode %lu\n",
			       offset, inode->i_ino);
			goto fail;
		}
		found_size = btrfs_file_extent_inline_len(leaf,
					  btrfs_item_nr(leaf, path->slots[0]));
165
		found_end = key.offset + found_size;
166

167
		if (found_end < offset + size) {
168
169
			btrfs_release_path(root, path);
			ret = btrfs_search_slot(trans, root, &key, path,
170
						offset + size - found_end, 1);
171
			BUG_ON(ret != 0);
172

173
			ret = btrfs_extend_item(trans, root, path,
174
						offset + size - found_end);
175
176
177
178
179
180
181
			if (ret) {
				err = ret;
				goto fail;
			}
			leaf = path->nodes[0];
			ei = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
Chris Mason's avatar
Chris Mason committed
182
			inode->i_blocks += (offset + size - found_end) >> 9;
183
		}
184
185
186
187
		if (found_end < offset) {
			ptr = btrfs_file_extent_inline_start(ei) + found_size;
			memset_extent_buffer(leaf, 0, ptr, offset - found_end);
		}
188
189
190
	} else {
insert:
		btrfs_release_path(root, path);
191
		datasize = offset + size - key.offset;
Chris Mason's avatar
Chris Mason committed
192
		inode->i_blocks += datasize >> 9;
193
		datasize = btrfs_file_extent_calc_inline_size(datasize);
194
195
196
197
198
199
200
201
202
203
204
205
206
		ret = btrfs_insert_empty_item(trans, root, path, &key,
					      datasize);
		if (ret) {
			err = ret;
			printk("got bad ret %d\n", ret);
			goto fail;
		}
		leaf = path->nodes[0];
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);
		btrfs_set_file_extent_generation(leaf, ei, trans->transid);
		btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
	}
207
	ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset;
208
209
210
211
212
213

	cur_size = size;
	i = 0;
	while (size > 0) {
		page = pages[i];
		kaddr = kmap_atomic(page, KM_USER0);
Jens Axboe's avatar
Jens Axboe committed
214
		cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size);
215
216
217
218
219
220
221
222
223
224
		write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size);
		kunmap_atomic(kaddr, KM_USER0);
		page_offset = 0;
		ptr += cur_size;
		size -= cur_size;
		if (i >= num_pages) {
			printk("i %d num_pages %d\n", i, num_pages);
		}
		i++;
	}
225
	btrfs_mark_buffer_dirty(leaf);
226
227
228
229
230
fail:
	btrfs_free_path(path);
	return err;
}

231
static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
Chris Mason's avatar
Chris Mason committed
232
233
234
235
236
237
238
239
				   struct btrfs_root *root,
				   struct file *file,
				   struct page **pages,
				   size_t num_pages,
				   loff_t pos,
				   size_t write_bytes)
{
	int err = 0;
240
	int i;
241
	struct inode *inode = fdentry(file)->d_inode;
242
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
243
244
	u64 hint_byte;
	u64 num_bytes;
245
246
247
	u64 start_pos;
	u64 end_of_last_block;
	u64 end_pos = pos + write_bytes;
248
	u64 inline_size;
249
	int did_inline = 0;
250
	loff_t isize = i_size_read(inode);
Chris Mason's avatar
Chris Mason committed
251

252
	start_pos = pos & ~((u64)root->sectorsize - 1);
253
254
	num_bytes = (write_bytes + pos - start_pos +
		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
Chris Mason's avatar
Chris Mason committed
255

256
257
	end_of_last_block = start_pos + num_bytes - 1;

258
	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
Chris Mason's avatar
Chris Mason committed
259
	trans = btrfs_join_transaction(root, 1);
260
261
262
263
264
	if (!trans) {
		err = -ENOMEM;
		goto out_unlock;
	}
	btrfs_set_trans_block_group(trans, inode);
265
	hint_byte = 0;
266
267

	if ((end_of_last_block & 4095) == 0) {
268
		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
269
	}
270
	set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
271
272
273

	/* FIXME...EIEIO, ENOSPC and more */
	/* insert any holes we need to create */
274
	if (isize < start_pos) {
275
276
		u64 last_pos_in_file;
		u64 hole_size;
277
		u64 mask = root->sectorsize - 1;
278
		last_pos_in_file = (isize + mask) & ~mask;
279
		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
280
281
282
		if (hole_size > 0) {
			btrfs_wait_ordered_range(inode, last_pos_in_file,
						 last_pos_in_file + hole_size);
283
			mutex_lock(&BTRFS_I(inode)->extent_mutex);
Chris Mason's avatar
Chris Mason committed
284
285
286
			err = btrfs_drop_extents(trans, root, inode,
						 last_pos_in_file,
						 last_pos_in_file + hole_size,
287
						 last_pos_in_file,
288
						 &hint_byte);
Chris Mason's avatar
Chris Mason committed
289
290
291
			if (err)
				goto failed;

292
293
294
			err = btrfs_insert_file_extent(trans, root,
						       inode->i_ino,
						       last_pos_in_file,
Sage Weil's avatar
Sage Weil committed
295
						       0, 0, hole_size, 0);
296
297
			btrfs_drop_extent_cache(inode, last_pos_in_file,
					last_pos_in_file + hole_size -1);
298
			mutex_unlock(&BTRFS_I(inode)->extent_mutex);
299
			btrfs_check_file(root, inode);
300
301
		}
		if (err)
Chris Mason's avatar
Chris Mason committed
302
			goto failed;
303
304
305
306
307
308
	}

	/*
	 * either allocate an extent for the new bytes or setup the key
	 * to show we are doing inline data in the extent
	 */
309
310
	inline_size = end_pos;
	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
311
312
	    inline_size > root->fs_info->max_inline ||
	    (inline_size & (root->sectorsize -1)) == 0 ||
313
	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
314
315
316
317
		/* check for reserved extents on each page, we don't want
		 * to reset the delalloc bit on things that already have
		 * extents reserved.
		 */
318
		btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
319
320
321
		for (i = 0; i < num_pages; i++) {
			struct page *p = pages[i];
			SetPageUptodate(p);
322
			ClearPageChecked(p);
323
			set_page_dirty(p);
Chris Mason's avatar
Chris Mason committed
324
		}
325
	} else {
326
		u64 aligned_end;
327
		/* step one, delete the existing extents in this range */
328
329
		aligned_end = (pos + write_bytes + root->sectorsize - 1) &
			~((u64)root->sectorsize - 1);
330
		mutex_lock(&BTRFS_I(inode)->extent_mutex);
Chris Mason's avatar
Chris Mason committed
331
		err = btrfs_drop_extents(trans, root, inode, start_pos,
332
					 aligned_end, aligned_end, &hint_byte);
Chris Mason's avatar
Chris Mason committed
333
334
		if (err)
			goto failed;
335
336
337
		if (isize > inline_size)
			inline_size = min_t(u64, isize, aligned_end);
		inline_size -= start_pos;
338
		err = insert_inline_extent(trans, root, inode, start_pos,
339
					   inline_size, pages, 0, num_pages);
340
		btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
341
		BUG_ON(err);
342
		mutex_unlock(&BTRFS_I(inode)->extent_mutex);
343
344
345
346
347
348
349

		/*
		 * an ugly way to do all the prop accounting around
		 * the page bits and mapping tags
		 */
		set_page_writeback(pages[0]);
		end_page_writeback(pages[0]);
350
		did_inline = 1;
351
352
353
	}
	if (end_pos > isize) {
		i_size_write(inode, end_pos);
354
355
		if (did_inline)
			BTRFS_I(inode)->disk_i_size = end_pos;
356
		btrfs_update_inode(trans, root, inode);
Chris Mason's avatar
Chris Mason committed
357
358
	}
failed:
359
	err = btrfs_end_transaction(trans, root);
360
out_unlock:
361
	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
Chris Mason's avatar
Chris Mason committed
362
363
364
	return err;
}

365
int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
366
367
{
	struct extent_map *em;
368
369
	struct extent_map *split = NULL;
	struct extent_map *split2 = NULL;
370
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
371
	u64 len = end - start + 1;
372
373
	int ret;
	int testend = 1;
374

375
	WARN_ON(end < start);
376
	if (end == (u64)-1) {
377
		len = (u64)-1;
378
379
		testend = 0;
	}
380
	while(1) {
381
382
383
384
385
		if (!split)
			split = alloc_extent_map(GFP_NOFS);
		if (!split2)
			split2 = alloc_extent_map(GFP_NOFS);

386
		spin_lock(&em_tree->lock);
387
		em = lookup_extent_mapping(em_tree, start, len);
388
389
		if (!em) {
			spin_unlock(&em_tree->lock);
390
			break;
391
		}
392
		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
393
		remove_extent_mapping(em_tree, em);
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423

		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    em->start < start) {
			split->start = em->start;
			split->len = start - em->start;
			split->block_start = em->block_start;
			split->bdev = em->bdev;
			split->flags = em->flags;
			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = split2;
			split2 = NULL;
		}
		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    testend && em->start + em->len > start + len) {
			u64 diff = start + len - em->start;

			split->start = start + len;
			split->len = em->start + em->len - (start + len);
			split->bdev = em->bdev;
			split->flags = em->flags;

			split->block_start = em->block_start + diff;

			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = NULL;
		}
424
425
		spin_unlock(&em_tree->lock);

426
427
428
429
430
		/* once for us */
		free_extent_map(em);
		/* once for the tree*/
		free_extent_map(em);
	}
431
432
433
434
	if (split)
		free_extent_map(split);
	if (split2)
		free_extent_map(split2);
435
436
437
	return 0;
}

438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
{
	return 0;
#if 0
	struct btrfs_path *path;
	struct btrfs_key found_key;
	struct extent_buffer *leaf;
	struct btrfs_file_extent_item *extent;
	u64 last_offset = 0;
	int nritems;
	int slot;
	int found_type;
	int ret;
	int err = 0;
	u64 extent_end = 0;

	path = btrfs_alloc_path();
	ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
				       last_offset, 0);
	while(1) {
		nritems = btrfs_header_nritems(path->nodes[0]);
		if (path->slots[0] >= nritems) {
			ret = btrfs_next_leaf(root, path);
			if (ret)
				goto out;
			nritems = btrfs_header_nritems(path->nodes[0]);
		}
		slot = path->slots[0];
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, slot);
		if (found_key.objectid != inode->i_ino)
			break;
		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
			goto out;

Chris Mason's avatar
Chris Mason committed
473
		if (found_key.offset < last_offset) {
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
			WARN_ON(1);
			btrfs_print_leaf(root, leaf);
			printk("inode %lu found offset %Lu expected %Lu\n",
			       inode->i_ino, found_key.offset, last_offset);
			err = 1;
			goto out;
		}
		extent = btrfs_item_ptr(leaf, slot,
					struct btrfs_file_extent_item);
		found_type = btrfs_file_extent_type(leaf, extent);
		if (found_type == BTRFS_FILE_EXTENT_REG) {
			extent_end = found_key.offset +
			     btrfs_file_extent_num_bytes(leaf, extent);
		} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
			struct btrfs_item *item;
			item = btrfs_item_nr(leaf, slot);
			extent_end = found_key.offset +
			     btrfs_file_extent_inline_len(leaf, item);
			extent_end = (extent_end + root->sectorsize - 1) &
				~((u64)root->sectorsize -1 );
		}
		last_offset = extent_end;
		path->slots[0]++;
	}
Chris Mason's avatar
Chris Mason committed
498
	if (0 && last_offset < inode->i_size) {
499
500
501
502
503
504
505
506
507
508
509
510
511
		WARN_ON(1);
		btrfs_print_leaf(root, leaf);
		printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino,
		       last_offset, inode->i_size);
		err = 1;

	}
out:
	btrfs_free_path(path);
	return err;
#endif
}

Chris Mason's avatar
Chris Mason committed
512
513
514
515
516
517
518
519
520
/*
 * this is very complex, but the basic idea is to drop all extents
 * in the range start - end.  hint_block is filled in with a block number
 * that would be a good hint to the block allocator for this file.
 *
 * If an extent intersects the range but is not entirely inside the range
 * it is either truncated or split.  Anything entirely inside the range
 * is deleted from the tree.
 */
521
int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
Chris Mason's avatar
Chris Mason committed
522
		       struct btrfs_root *root, struct inode *inode,
523
		       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
Chris Mason's avatar
Chris Mason committed
524
{
525
526
	u64 extent_end = 0;
	u64 search_start = start;
Zheng Yan's avatar
Zheng Yan committed
527
528
529
	u64 leaf_start;
	u64 root_gen;
	u64 root_owner;
530
	struct extent_buffer *leaf;
Chris Mason's avatar
Chris Mason committed
531
532
	struct btrfs_file_extent_item *extent;
	struct btrfs_path *path;
533
534
535
536
	struct btrfs_key key;
	struct btrfs_file_extent_item old;
	int keep;
	int slot;
Chris Mason's avatar
Chris Mason committed
537
538
539
540
	int bookend;
	int found_type;
	int found_extent;
	int found_inline;
Chris Mason's avatar
Chris Mason committed
541
	int recow;
542
	int ret;
Chris Mason's avatar
Chris Mason committed
543

544
545
	btrfs_drop_extent_cache(inode, start, end - 1);

Chris Mason's avatar
Chris Mason committed
546
547
548
549
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	while(1) {
Chris Mason's avatar
Chris Mason committed
550
		recow = 0;
Chris Mason's avatar
Chris Mason committed
551
552
553
554
555
556
557
558
559
560
561
562
		btrfs_release_path(root, path);
		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
					       search_start, -1);
		if (ret < 0)
			goto out;
		if (ret > 0) {
			if (path->slots[0] == 0) {
				ret = 0;
				goto out;
			}
			path->slots[0]--;
		}
563
next_slot:
Chris Mason's avatar
Chris Mason committed
564
565
566
567
		keep = 0;
		bookend = 0;
		found_extent = 0;
		found_inline = 0;
Zheng Yan's avatar
Zheng Yan committed
568
569
570
		leaf_start = 0;
		root_gen = 0;
		root_owner = 0;
Chris Mason's avatar
Chris Mason committed
571
		extent = NULL;
572
		leaf = path->nodes[0];
Chris Mason's avatar
Chris Mason committed
573
		slot = path->slots[0];
574
		ret = 0;
575
		btrfs_item_key_to_cpu(leaf, &key, slot);
576
577
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
		    key.offset >= end) {
Chris Mason's avatar
Chris Mason committed
578
579
			goto out;
		}
580
581
		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
		    key.objectid != inode->i_ino) {
Chris Mason's avatar
Chris Mason committed
582
583
			goto out;
		}
Chris Mason's avatar
Chris Mason committed
584
585
586
587
		if (recow) {
			search_start = key.offset;
			continue;
		}
588
589
590
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
			extent = btrfs_item_ptr(leaf, slot,
						struct btrfs_file_extent_item);
591
			found_type = btrfs_file_extent_type(leaf, extent);
592
			if (found_type == BTRFS_FILE_EXTENT_REG) {
593
594
595
596
597
598
				extent_end =
				     btrfs_file_extent_disk_bytenr(leaf,
								   extent);
				if (extent_end)
					*hint_byte = extent_end;

599
				extent_end = key.offset +
600
				     btrfs_file_extent_num_bytes(leaf, extent);
601
602
				found_extent = 1;
			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
603
604
				struct btrfs_item *item;
				item = btrfs_item_nr(leaf, slot);
605
606
				found_inline = 1;
				extent_end = key.offset +
607
				     btrfs_file_extent_inline_len(leaf, item);
608
609
610
			}
		} else {
			extent_end = search_start;
Chris Mason's avatar
Chris Mason committed
611
612
613
		}

		/* we found nothing we can drop */
614
615
616
617
		if ((!found_extent && !found_inline) ||
		    search_start >= extent_end) {
			int nextret;
			u32 nritems;
618
			nritems = btrfs_header_nritems(leaf);
619
620
621
622
			if (slot >= nritems - 1) {
				nextret = btrfs_next_leaf(root, path);
				if (nextret)
					goto out;
Chris Mason's avatar
Chris Mason committed
623
				recow = 1;
624
625
626
627
			} else {
				path->slots[0]++;
			}
			goto next_slot;
Chris Mason's avatar
Chris Mason committed
628
629
630
		}

		if (found_inline) {
631
			u64 mask = root->sectorsize - 1;
Chris Mason's avatar
Chris Mason committed
632
633
634
			search_start = (extent_end + mask) & ~mask;
		} else
			search_start = extent_end;
Yan's avatar
Yan committed
635
		if (end <= extent_end && start >= key.offset && found_inline) {
636
			*hint_byte = EXTENT_MAP_INLINE;
Zheng Yan's avatar
Zheng Yan committed
637
638
639
640
641
642
643
644
645
			goto out;
		}

		if (found_extent) {
			read_extent_buffer(leaf, &old, (unsigned long)extent,
					   sizeof(old));
			root_gen = btrfs_header_generation(leaf);
			root_owner = btrfs_header_owner(leaf);
			leaf_start = leaf->start;
646
		}
Zheng Yan's avatar
Zheng Yan committed
647

Chris Mason's avatar
Chris Mason committed
648
		if (end < extent_end && end >= key.offset) {
649
			bookend = 1;
650
			if (found_inline && start <= key.offset)
651
				keep = 1;
Chris Mason's avatar
Chris Mason committed
652
653
654
655
656
657
		}
		/* truncate existing extent */
		if (start > key.offset) {
			u64 new_num;
			u64 old_num;
			keep = 1;
658
			WARN_ON(start & (root->sectorsize - 1));
Chris Mason's avatar
Chris Mason committed
659
			if (found_extent) {
660
661
662
663
664
665
666
667
				new_num = start - key.offset;
				old_num = btrfs_file_extent_num_bytes(leaf,
								      extent);
				*hint_byte =
					btrfs_file_extent_disk_bytenr(leaf,
								      extent);
				if (btrfs_file_extent_disk_bytenr(leaf,
								  extent)) {
Chris Mason's avatar
Chris Mason committed
668
					dec_i_blocks(inode, old_num - new_num);
Chris Mason's avatar
Chris Mason committed
669
				}
670
671
				btrfs_set_file_extent_num_bytes(leaf, extent,
								new_num);
672
				btrfs_mark_buffer_dirty(leaf);
673
674
675
			} else if (key.offset < inline_limit &&
				   (end > extent_end) &&
				   (inline_limit < extent_end)) {
676
677
				u32 new_size;
				new_size = btrfs_file_extent_calc_inline_size(
678
						   inline_limit - key.offset);
Chris Mason's avatar
Chris Mason committed
679
680
				dec_i_blocks(inode, (extent_end - key.offset) -
					(inline_limit - key.offset));
681
				btrfs_truncate_item(trans, root, path,
682
						    new_size, 1);
Chris Mason's avatar
Chris Mason committed
683
684
685
686
687
			}
		}
		/* delete the entire extent */
		if (!keep) {
			ret = btrfs_del_item(trans, root, path);
688
			/* TODO update progress marker and return */
Chris Mason's avatar
Chris Mason committed
689
690
			BUG_ON(ret);
			extent = NULL;
Zheng Yan's avatar
Zheng Yan committed
691
692
			btrfs_release_path(root, path);
			/* the extent will be freed later */
Chris Mason's avatar
Chris Mason committed
693
		}
694
		if (bookend && found_inline && start <= key.offset) {
695
696
			u32 new_size;
			new_size = btrfs_file_extent_calc_inline_size(
697
						   extent_end - end);
Chris Mason's avatar
Chris Mason committed
698
699
			dec_i_blocks(inode, (extent_end - key.offset) -
					(extent_end - end));
Zheng Yan's avatar
Zheng Yan committed
700
701
702
			ret = btrfs_truncate_item(trans, root, path,
						  new_size, 0);
			BUG_ON(ret);
703
		}
Chris Mason's avatar
Chris Mason committed
704
705
		/* create bookend, splitting the extent in two */
		if (bookend && found_extent) {
Zheng Yan's avatar
Zheng Yan committed
706
			u64 disk_bytenr;
Chris Mason's avatar
Chris Mason committed
707
708
709
710
711
712
713
			struct btrfs_key ins;
			ins.objectid = inode->i_ino;
			ins.offset = end;
			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
			btrfs_release_path(root, path);
			ret = btrfs_insert_empty_item(trans, root, path, &ins,
						      sizeof(*extent));
Zheng Yan's avatar
Zheng Yan committed
714
			BUG_ON(ret);
715

716
717
718
719
720
721
722
			leaf = path->nodes[0];
			extent = btrfs_item_ptr(leaf, path->slots[0],
						struct btrfs_file_extent_item);
			write_extent_buffer(leaf, &old,
					    (unsigned long)extent, sizeof(old));

			btrfs_set_file_extent_offset(leaf, extent,
723
724
725
726
727
				    le64_to_cpu(old.offset) + end - key.offset);
			WARN_ON(le64_to_cpu(old.num_bytes) <
				(extent_end - end));
			btrfs_set_file_extent_num_bytes(leaf, extent,
							extent_end - end);
728
			btrfs_set_file_extent_type(leaf, extent,
Chris Mason's avatar
Chris Mason committed
729
						   BTRFS_FILE_EXTENT_REG);
730

Chris Mason's avatar
Chris Mason committed
731
			btrfs_mark_buffer_dirty(path->nodes[0]);
Zheng Yan's avatar
Zheng Yan committed
732
733
734
735
736
737
738
739
740
741
742
743
744
745

			disk_bytenr = le64_to_cpu(old.disk_bytenr);
			if (disk_bytenr != 0) {
				ret = btrfs_inc_extent_ref(trans, root,
						disk_bytenr,
						le64_to_cpu(old.disk_num_bytes),
						leaf->start,
						root->root_key.objectid,
						trans->transid,
						ins.objectid, ins.offset);
				BUG_ON(ret);
			}
			btrfs_release_path(root, path);
			if (disk_bytenr != 0) {
Chris Mason's avatar
Chris Mason committed
746
				inode->i_blocks +=
747
748
				      btrfs_file_extent_num_bytes(leaf,
								  extent) >> 9;
Chris Mason's avatar
Chris Mason committed
749
			}
Zheng Yan's avatar
Zheng Yan committed
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
		}

		if (found_extent && !keep) {
			u64 disk_bytenr = le64_to_cpu(old.disk_bytenr);

			if (disk_bytenr != 0) {
				dec_i_blocks(inode, le64_to_cpu(old.num_bytes));
				ret = btrfs_free_extent(trans, root,
						disk_bytenr,
						le64_to_cpu(old.disk_num_bytes),
						leaf_start, root_owner,
						root_gen, key.objectid,
						key.offset, 0);
				BUG_ON(ret);
				*hint_byte = disk_bytenr;
			}
		}

		if (search_start >= end) {
Chris Mason's avatar
Chris Mason committed
769
770
771
772
773
774
			ret = 0;
			goto out;
		}
	}
out:
	btrfs_free_path(path);
Chris Mason's avatar
Chris Mason committed
775
	btrfs_check_file(root, inode);
Chris Mason's avatar
Chris Mason committed
776
777
778
779
780
781
	return ret;
}

/*
 * this gets pages into the page cache and locks them down
 */
782
static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
783
784
785
			 struct page **pages, size_t num_pages,
			 loff_t pos, unsigned long first_index,
			 unsigned long last_index, size_t write_bytes)
Chris Mason's avatar
Chris Mason committed
786
787
788
{
	int i;
	unsigned long index = pos >> PAGE_CACHE_SHIFT;
789
	struct inode *inode = fdentry(file)->d_inode;
Chris Mason's avatar
Chris Mason committed
790
	int err = 0;
791
	u64 start_pos;
792
	u64 last_pos;
793

794
	start_pos = pos & ~((u64)root->sectorsize - 1);
795
	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
Chris Mason's avatar
Chris Mason committed
796
797

	memset(pages, 0, num_pages * sizeof(struct page *));
798
again:
Chris Mason's avatar
Chris Mason committed
799
800
801
802
	for (i = 0; i < num_pages; i++) {
		pages[i] = grab_cache_page(inode->i_mapping, index + i);
		if (!pages[i]) {
			err = -ENOMEM;
803
			BUG_ON(1);
Chris Mason's avatar
Chris Mason committed
804
		}
Chris Mason's avatar
Chris Mason committed
805
		wait_on_page_writeback(pages[i]);
Chris Mason's avatar
Chris Mason committed
806
	}
807
	if (start_pos < inode->i_size) {
808
		struct btrfs_ordered_extent *ordered;
809
810
		lock_extent(&BTRFS_I(inode)->io_tree,
			    start_pos, last_pos - 1, GFP_NOFS);
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
		ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
		if (ordered &&
		    ordered->file_offset + ordered->len > start_pos &&
		    ordered->file_offset < last_pos) {
			btrfs_put_ordered_extent(ordered);
			unlock_extent(&BTRFS_I(inode)->io_tree,
				      start_pos, last_pos - 1, GFP_NOFS);
			for (i = 0; i < num_pages; i++) {
				unlock_page(pages[i]);
				page_cache_release(pages[i]);
			}
			btrfs_wait_ordered_range(inode, start_pos,
						 last_pos - start_pos);
			goto again;
		}
		if (ordered)
			btrfs_put_ordered_extent(ordered);

829
830
831
		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
				  GFP_NOFS);
832
833
		unlock_extent(&BTRFS_I(inode)->io_tree,
			      start_pos, last_pos - 1, GFP_NOFS);
834
	}
835
	for (i = 0; i < num_pages; i++) {
836
		clear_page_dirty_for_io(pages[i]);
837
838
839
		set_page_extent_mapped(pages[i]);
		WARN_ON(!PageLocked(pages[i]));
	}
Chris Mason's avatar
Chris Mason committed
840
841
842
843
844
845
846
	return 0;
}

static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
{
	loff_t pos;
847
848
849
	loff_t start_pos;
	ssize_t num_written = 0;
	ssize_t err = 0;
Chris Mason's avatar
Chris Mason committed
850
	int ret = 0;
851
	struct inode *inode = fdentry(file)->d_inode;
Chris Mason's avatar
Chris Mason committed
852
	struct btrfs_root *root = BTRFS_I(inode)->root;
853
854
	struct page **pages = NULL;
	int nrptrs;
Chris Mason's avatar
Chris Mason committed
855
856
857
	struct page *pinned[2];
	unsigned long first_index;
	unsigned long last_index;
858
859
860

	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
Chris Mason's avatar
Chris Mason committed
861
862
	pinned[0] = NULL;
	pinned[1] = NULL;
863

Chris Mason's avatar
Chris Mason committed
864
	pos = *ppos;
865
866
	start_pos = pos;

Chris Mason's avatar
Chris Mason committed
867
868
869
870
	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
	current->backing_dev_info = inode->i_mapping->backing_dev_info;
	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
	if (err)
871
		goto out_nolock;
Chris Mason's avatar
Chris Mason committed
872
	if (count == 0)
873
		goto out_nolock;
874
875
876
#ifdef REMOVE_SUID_PATH
	err = remove_suid(&file->f_path);
#else
877
878
879
# if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
	err = file_remove_suid(file);
# else
880
	err = remove_suid(fdentry(file));
881
# endif
882
#endif
Chris Mason's avatar
Chris Mason committed
883
	if (err)
884
		goto out_nolock;
Chris Mason's avatar
Chris Mason committed
885
886
	file_update_time(file);

887
	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
Chris Mason's avatar
Chris Mason committed
888
889
890
891
892

	mutex_lock(&inode->i_mutex);
	first_index = pos >> PAGE_CACHE_SHIFT;
	last_index = (pos + count) >> PAGE_CACHE_SHIFT;

893
894
895
896
897
898
899
900
	/*
	 * if this is a nodatasum mount, force summing off for the inode
	 * all the time.  That way a later mount with summing on won't
	 * get confused
	 */
	if (btrfs_test_opt(root, NODATASUM))
		btrfs_set_flag(inode, NODATASUM);

Chris Mason's avatar
Chris Mason committed
901
902
903
904
905
906
907
908
	/*
	 * there are lots of better ways to do this, but this code
	 * makes sure the first and last page in the file range are
	 * up to date and ready for cow
	 */
	if ((pos & (PAGE_CACHE_SIZE - 1))) {
		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
		if (!PageUptodate(pinned[0])) {
Chris Mason's avatar
Chris Mason committed
909
			ret = btrfs_readpage(NULL, pinned[0]);
Chris Mason's avatar
Chris Mason committed
910
911
912
913
914
915
916
917
918
			BUG_ON(ret);
			wait_on_page_locked(pinned[0]);
		} else {
			unlock_page(pinned[0]);
		}
	}
	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
		if (!PageUptodate(pinned[1])) {
Chris Mason's avatar
Chris Mason committed
919
			ret = btrfs_readpage(NULL, pinned[1]);
Chris Mason's avatar
Chris Mason committed
920
921
922
923
924
925
926
927
928
			BUG_ON(ret);
			wait_on_page_locked(pinned[1]);
		} else {
			unlock_page(pinned[1]);
		}
	}

	while(count > 0) {
		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
929
930
		size_t write_bytes = min(count, nrptrs *
					(size_t)PAGE_CACHE_SIZE -
931
					 offset);
Chris Mason's avatar
Chris Mason committed
932
933
934
		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
					PAGE_CACHE_SHIFT;

935
		WARN_ON(num_pages > nrptrs);
Chris Mason's avatar
Chris Mason committed
936
		memset(pages, 0, sizeof(pages));
937
938
939
940
941

		ret = btrfs_check_free_space(root, write_bytes, 0);
		if (ret)
			goto out;

Chris Mason's avatar
Chris Mason committed
942
943
		ret = prepare_pages(root, file, pages, num_pages,
				    pos, first_index, last_index,
944
				    write_bytes);
945
946
		if (ret)
			goto out;
Chris Mason's avatar
Chris Mason committed
947
948
949

		ret = btrfs_copy_from_user(pos, num_pages,
					   write_bytes, pages, buf);
950
951
952
953
		if (ret) {
			btrfs_drop_pages(pages, num_pages);
			goto out;
		}
Chris Mason's avatar
Chris Mason committed
954
955
956
957

		ret = dirty_and_release_pages(NULL, root, file, pages,
					      num_pages, pos, write_bytes);
		btrfs_drop_pages(pages, num_pages);
958
959
		if (ret)
			goto out;
Chris Mason's avatar
Chris Mason committed
960
961
962
963
964
965

		buf += write_bytes;
		count -= write_bytes;
		pos += write_bytes;
		num_written += write_bytes;

966
		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
967
968
		if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
			btrfs_btree_balance_dirty(root, 1);
969
		btrfs_throttle(root);
Chris Mason's avatar
Chris Mason committed
970
971
972
		cond_resched();
	}
out:
973
	mutex_unlock(&inode->i_mutex);
974

975
out_nolock:
976
	kfree(pages);
Chris Mason's avatar
Chris Mason committed
977
978
979
980
981
	if (pinned[0])
		page_cache_release(pinned[0]);
	if (pinned[1])
		page_cache_release(pinned[1]);
	*ppos = pos;
982
983

	if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
984
985
986
987
988
989
990
991
992
993
		struct btrfs_trans_handle *trans;

		err = btrfs_fdatawrite_range(inode->i_mapping, start_pos,
					     start_pos + num_written -1,
					     WB_SYNC_NONE);
		if (err < 0)
			num_written = err;

		err = btrfs_wait_on_page_writeback_range(inode->i_mapping,
				 start_pos, start_pos + num_written - 1);
994
995
		if (err < 0)
			num_written = err;
996
997
998
999
1000

		trans = btrfs_start_transaction(root, 1);
		ret = btrfs_log_dentry_safe(trans, root, file->f_dentry);
		if (ret == 0) {
			btrfs_sync_log(trans, root);
For faster browsing, not all history is shown. View entire blame