file.c 29.6 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Chris Mason's avatar
Chris Mason committed
19
20
21
22
23
24
25
26
27
28
29
30
31
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
32
#include <linux/version.h>
Chris Mason's avatar
Chris Mason committed
33
34
35
36
37
38
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
39
40
#include "tree-log.h"
#include "locking.h"
41
#include "compat.h"
Chris Mason's avatar
Chris Mason committed
42
43


44
45
46
47
static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
					 int write_bytes,
					 struct page **prepared_pages,
					 const char __user * buf)
Chris Mason's avatar
Chris Mason committed
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
{
	long page_fault = 0;
	int i;
	int offset = pos & (PAGE_CACHE_SIZE - 1);

	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
		size_t count = min_t(size_t,
				     PAGE_CACHE_SIZE - offset, write_bytes);
		struct page *page = prepared_pages[i];
		fault_in_pages_readable(buf, count);

		/* Copy data from userspace to the current page */
		kmap(page);
		page_fault = __copy_from_user(page_address(page) + offset,
					      buf, count);
		/* Flush processor's dcache for this page */
		flush_dcache_page(page);
		kunmap(page);
		buf += count;
		write_bytes -= count;

		if (page_fault)
			break;
	}
	return page_fault ? -EFAULT : 0;
}

75
static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
Chris Mason's avatar
Chris Mason committed
76
77
78
79
80
{
	size_t i;
	for (i = 0; i < num_pages; i++) {
		if (!pages[i])
			break;
Chris Mason's avatar
Chris Mason committed
81
		ClearPageChecked(pages[i]);
Chris Mason's avatar
Chris Mason committed
82
83
84
85
86
87
		unlock_page(pages[i]);
		mark_page_accessed(pages[i]);
		page_cache_release(pages[i]);
	}
}

88
static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
89
				struct btrfs_root *root, struct inode *inode,
90
91
92
				u64 offset, size_t size,
				struct page **pages, size_t page_offset,
				int num_pages)
93
94
95
{
	struct btrfs_key key;
	struct btrfs_path *path;
96
97
98
	struct extent_buffer *leaf;
	char *kaddr;
	unsigned long ptr;
99
	struct btrfs_file_extent_item *ei;
100
	struct page *page;
101
102
103
	u32 datasize;
	int err = 0;
	int ret;
104
105
	int i;
	ssize_t cur_size;
106
107
108
109
110
111
112
113
114
115
116

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	btrfs_set_trans_block_group(trans, inode);

	key.objectid = inode->i_ino;
	key.offset = offset;
	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);

117
118
	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
	if (ret < 0) {
119
120
121
		err = ret;
		goto fail;
	}
122
	if (ret == 1) {
123
124
125
126
127
		struct btrfs_key found_key;

		if (path->slots[0] == 0)
			goto insert;

128
129
		path->slots[0]--;
		leaf = path->nodes[0];
130
131
132
133
134
135
136
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);

		if (found_key.objectid != inode->i_ino)
			goto insert;

		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
			goto insert;
137
138
139
140
141
142
143
144
145
146
147
148
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);

		if (btrfs_file_extent_type(leaf, ei) !=
		    BTRFS_FILE_EXTENT_INLINE) {
			goto insert;
		}
		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		ret = 0;
	}
	if (ret == 0) {
		u32 found_size;
149
		u64 found_end;
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

		leaf = path->nodes[0];
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);

		if (btrfs_file_extent_type(leaf, ei) !=
		    BTRFS_FILE_EXTENT_INLINE) {
			err = ret;
			btrfs_print_leaf(root, leaf);
			printk("found wasn't inline offset %Lu inode %lu\n",
			       offset, inode->i_ino);
			goto fail;
		}
		found_size = btrfs_file_extent_inline_len(leaf,
					  btrfs_item_nr(leaf, path->slots[0]));
165
		found_end = key.offset + found_size;
166

167
		if (found_end < offset + size) {
168
169
			btrfs_release_path(root, path);
			ret = btrfs_search_slot(trans, root, &key, path,
170
						offset + size - found_end, 1);
171
			BUG_ON(ret != 0);
172

173
			ret = btrfs_extend_item(trans, root, path,
174
						offset + size - found_end);
175
176
177
178
179
180
181
			if (ret) {
				err = ret;
				goto fail;
			}
			leaf = path->nodes[0];
			ei = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
Chris Mason's avatar
Chris Mason committed
182
			inode->i_blocks += (offset + size - found_end) >> 9;
183
		}
184
185
186
187
		if (found_end < offset) {
			ptr = btrfs_file_extent_inline_start(ei) + found_size;
			memset_extent_buffer(leaf, 0, ptr, offset - found_end);
		}
188
189
190
	} else {
insert:
		btrfs_release_path(root, path);
191
		datasize = offset + size - key.offset;
Chris Mason's avatar
Chris Mason committed
192
		inode->i_blocks += datasize >> 9;
193
		datasize = btrfs_file_extent_calc_inline_size(datasize);
194
195
196
197
198
199
200
201
202
203
204
205
206
		ret = btrfs_insert_empty_item(trans, root, path, &key,
					      datasize);
		if (ret) {
			err = ret;
			printk("got bad ret %d\n", ret);
			goto fail;
		}
		leaf = path->nodes[0];
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);
		btrfs_set_file_extent_generation(leaf, ei, trans->transid);
		btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
	}
207
	ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset;
208
209
210
211
212
213

	cur_size = size;
	i = 0;
	while (size > 0) {
		page = pages[i];
		kaddr = kmap_atomic(page, KM_USER0);
Jens Axboe's avatar
Jens Axboe committed
214
		cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size);
215
216
217
218
219
220
221
222
223
224
		write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size);
		kunmap_atomic(kaddr, KM_USER0);
		page_offset = 0;
		ptr += cur_size;
		size -= cur_size;
		if (i >= num_pages) {
			printk("i %d num_pages %d\n", i, num_pages);
		}
		i++;
	}
225
	btrfs_mark_buffer_dirty(leaf);
226
227
228
229
230
fail:
	btrfs_free_path(path);
	return err;
}

231
static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
Chris Mason's avatar
Chris Mason committed
232
233
234
235
236
237
238
239
				   struct btrfs_root *root,
				   struct file *file,
				   struct page **pages,
				   size_t num_pages,
				   loff_t pos,
				   size_t write_bytes)
{
	int err = 0;
240
	int i;
241
	struct inode *inode = fdentry(file)->d_inode;
242
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
243
244
	u64 hint_byte;
	u64 num_bytes;
245
246
247
	u64 start_pos;
	u64 end_of_last_block;
	u64 end_pos = pos + write_bytes;
248
	u64 inline_size;
249
	int did_inline = 0;
250
	loff_t isize = i_size_read(inode);
Chris Mason's avatar
Chris Mason committed
251

252
	start_pos = pos & ~((u64)root->sectorsize - 1);
253
254
	num_bytes = (write_bytes + pos - start_pos +
		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
Chris Mason's avatar
Chris Mason committed
255

256
257
	end_of_last_block = start_pos + num_bytes - 1;

258
	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
Chris Mason's avatar
Chris Mason committed
259
	trans = btrfs_join_transaction(root, 1);
260
261
262
263
264
	if (!trans) {
		err = -ENOMEM;
		goto out_unlock;
	}
	btrfs_set_trans_block_group(trans, inode);
265
	hint_byte = 0;
266
267

	if ((end_of_last_block & 4095) == 0) {
268
		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
269
	}
270
	set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
271
272
273

	/* FIXME...EIEIO, ENOSPC and more */
	/* insert any holes we need to create */
274
	if (isize < start_pos) {
275
276
		u64 last_pos_in_file;
		u64 hole_size;
277
		u64 mask = root->sectorsize - 1;
278
		last_pos_in_file = (isize + mask) & ~mask;
279
		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
280
281
282
		if (hole_size > 0) {
			btrfs_wait_ordered_range(inode, last_pos_in_file,
						 last_pos_in_file + hole_size);
283
			mutex_lock(&BTRFS_I(inode)->extent_mutex);
Chris Mason's avatar
Chris Mason committed
284
285
286
			err = btrfs_drop_extents(trans, root, inode,
						 last_pos_in_file,
						 last_pos_in_file + hole_size,
287
						 last_pos_in_file,
288
						 &hint_byte);
Chris Mason's avatar
Chris Mason committed
289
290
291
			if (err)
				goto failed;

292
293
294
			err = btrfs_insert_file_extent(trans, root,
						       inode->i_ino,
						       last_pos_in_file,
Sage Weil's avatar
Sage Weil committed
295
						       0, 0, hole_size, 0);
296
297
			btrfs_drop_extent_cache(inode, last_pos_in_file,
					last_pos_in_file + hole_size -1);
298
			mutex_unlock(&BTRFS_I(inode)->extent_mutex);
299
			btrfs_check_file(root, inode);
300
301
		}
		if (err)
Chris Mason's avatar
Chris Mason committed
302
			goto failed;
303
304
305
306
307
308
	}

	/*
	 * either allocate an extent for the new bytes or setup the key
	 * to show we are doing inline data in the extent
	 */
309
310
	inline_size = end_pos;
	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
311
312
	    inline_size > root->fs_info->max_inline ||
	    (inline_size & (root->sectorsize -1)) == 0 ||
313
	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
314
315
316
317
		/* check for reserved extents on each page, we don't want
		 * to reset the delalloc bit on things that already have
		 * extents reserved.
		 */
318
		btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
319
320
321
		for (i = 0; i < num_pages; i++) {
			struct page *p = pages[i];
			SetPageUptodate(p);
322
			ClearPageChecked(p);
323
			set_page_dirty(p);
Chris Mason's avatar
Chris Mason committed
324
		}
325
	} else {
326
		u64 aligned_end;
327
		/* step one, delete the existing extents in this range */
328
329
		aligned_end = (pos + write_bytes + root->sectorsize - 1) &
			~((u64)root->sectorsize - 1);
330
		mutex_lock(&BTRFS_I(inode)->extent_mutex);
Chris Mason's avatar
Chris Mason committed
331
		err = btrfs_drop_extents(trans, root, inode, start_pos,
332
					 aligned_end, aligned_end, &hint_byte);
Chris Mason's avatar
Chris Mason committed
333
334
		if (err)
			goto failed;
335
336
337
		if (isize > inline_size)
			inline_size = min_t(u64, isize, aligned_end);
		inline_size -= start_pos;
338
		err = insert_inline_extent(trans, root, inode, start_pos,
339
					   inline_size, pages, 0, num_pages);
340
		btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
341
		BUG_ON(err);
342
		mutex_unlock(&BTRFS_I(inode)->extent_mutex);
343
344
345
346
347
348
349

		/*
		 * an ugly way to do all the prop accounting around
		 * the page bits and mapping tags
		 */
		set_page_writeback(pages[0]);
		end_page_writeback(pages[0]);
350
		did_inline = 1;
351
352
353
	}
	if (end_pos > isize) {
		i_size_write(inode, end_pos);
354
355
		if (did_inline)
			BTRFS_I(inode)->disk_i_size = end_pos;
356
		btrfs_update_inode(trans, root, inode);
Chris Mason's avatar
Chris Mason committed
357
358
	}
failed:
359
	err = btrfs_end_transaction(trans, root);
360
out_unlock:
361
	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
Chris Mason's avatar
Chris Mason committed
362
363
364
	return err;
}

365
int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
366
367
{
	struct extent_map *em;
368
369
	struct extent_map *split = NULL;
	struct extent_map *split2 = NULL;
370
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
371
	u64 len = end - start + 1;
372
373
	int ret;
	int testend = 1;
374

375
	WARN_ON(end < start);
376
	if (end == (u64)-1) {
377
		len = (u64)-1;
378
379
		testend = 0;
	}
380
	while(1) {
381
382
383
384
385
		if (!split)
			split = alloc_extent_map(GFP_NOFS);
		if (!split2)
			split2 = alloc_extent_map(GFP_NOFS);

386
		spin_lock(&em_tree->lock);
387
		em = lookup_extent_mapping(em_tree, start, len);
388
389
		if (!em) {
			spin_unlock(&em_tree->lock);
390
			break;
391
		}
392
		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
393
		remove_extent_mapping(em_tree, em);
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423

		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    em->start < start) {
			split->start = em->start;
			split->len = start - em->start;
			split->block_start = em->block_start;
			split->bdev = em->bdev;
			split->flags = em->flags;
			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = split2;
			split2 = NULL;
		}
		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    testend && em->start + em->len > start + len) {
			u64 diff = start + len - em->start;

			split->start = start + len;
			split->len = em->start + em->len - (start + len);
			split->bdev = em->bdev;
			split->flags = em->flags;

			split->block_start = em->block_start + diff;

			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = NULL;
		}
424
425
		spin_unlock(&em_tree->lock);

426
427
428
429
430
		/* once for us */
		free_extent_map(em);
		/* once for the tree*/
		free_extent_map(em);
	}
431
432
433
434
	if (split)
		free_extent_map(split);
	if (split2)
		free_extent_map(split2);
435
436
437
	return 0;
}

438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
{
	return 0;
#if 0
	struct btrfs_path *path;
	struct btrfs_key found_key;
	struct extent_buffer *leaf;
	struct btrfs_file_extent_item *extent;
	u64 last_offset = 0;
	int nritems;
	int slot;
	int found_type;
	int ret;
	int err = 0;
	u64 extent_end = 0;

	path = btrfs_alloc_path();
	ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
				       last_offset, 0);
	while(1) {
		nritems = btrfs_header_nritems(path->nodes[0]);
		if (path->slots[0] >= nritems) {
			ret = btrfs_next_leaf(root, path);
			if (ret)
				goto out;
			nritems = btrfs_header_nritems(path->nodes[0]);
		}
		slot = path->slots[0];
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, slot);
		if (found_key.objectid != inode->i_ino)
			break;
		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
			goto out;

Chris Mason's avatar
Chris Mason committed
473
		if (found_key.offset < last_offset) {
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
			WARN_ON(1);
			btrfs_print_leaf(root, leaf);
			printk("inode %lu found offset %Lu expected %Lu\n",
			       inode->i_ino, found_key.offset, last_offset);
			err = 1;
			goto out;
		}
		extent = btrfs_item_ptr(leaf, slot,
					struct btrfs_file_extent_item);
		found_type = btrfs_file_extent_type(leaf, extent);
		if (found_type == BTRFS_FILE_EXTENT_REG) {
			extent_end = found_key.offset +
			     btrfs_file_extent_num_bytes(leaf, extent);
		} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
			struct btrfs_item *item;
			item = btrfs_item_nr(leaf, slot);
			extent_end = found_key.offset +
			     btrfs_file_extent_inline_len(leaf, item);
			extent_end = (extent_end + root->sectorsize - 1) &
				~((u64)root->sectorsize -1 );
		}
		last_offset = extent_end;
		path->slots[0]++;
	}
Chris Mason's avatar
Chris Mason committed
498
	if (0 && last_offset < inode->i_size) {
499
500
501
502
503
504
505
506
507
508
509
510
511
		WARN_ON(1);
		btrfs_print_leaf(root, leaf);
		printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino,
		       last_offset, inode->i_size);
		err = 1;

	}
out:
	btrfs_free_path(path);
	return err;
#endif
}

Chris Mason's avatar
Chris Mason committed
512
513
514
515
516
517
518
519
520
/*
 * this is very complex, but the basic idea is to drop all extents
 * in the range start - end.  hint_block is filled in with a block number
 * that would be a good hint to the block allocator for this file.
 *
 * If an extent intersects the range but is not entirely inside the range
 * it is either truncated or split.  Anything entirely inside the range
 * is deleted from the tree.
 */
521
int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
Chris Mason's avatar
Chris Mason committed
522
		       struct btrfs_root *root, struct inode *inode,
523
		       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
Chris Mason's avatar
Chris Mason committed
524
{
525
526
	u64 extent_end = 0;
	u64 search_start = start;
527
	struct extent_buffer *leaf;
Chris Mason's avatar
Chris Mason committed
528
529
	struct btrfs_file_extent_item *extent;
	struct btrfs_path *path;
530
531
532
533
	struct btrfs_key key;
	struct btrfs_file_extent_item old;
	int keep;
	int slot;
Chris Mason's avatar
Chris Mason committed
534
535
536
537
	int bookend;
	int found_type;
	int found_extent;
	int found_inline;
Chris Mason's avatar
Chris Mason committed
538
	int recow;
539
	int ret;
Chris Mason's avatar
Chris Mason committed
540

541
542
	btrfs_drop_extent_cache(inode, start, end - 1);

Chris Mason's avatar
Chris Mason committed
543
544
545
546
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	while(1) {
Chris Mason's avatar
Chris Mason committed
547
		recow = 0;
Chris Mason's avatar
Chris Mason committed
548
549
550
551
552
553
554
555
556
557
558
559
		btrfs_release_path(root, path);
		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
					       search_start, -1);
		if (ret < 0)
			goto out;
		if (ret > 0) {
			if (path->slots[0] == 0) {
				ret = 0;
				goto out;
			}
			path->slots[0]--;
		}
560
next_slot:
Chris Mason's avatar
Chris Mason committed
561
562
563
564
565
		keep = 0;
		bookend = 0;
		found_extent = 0;
		found_inline = 0;
		extent = NULL;
566
		leaf = path->nodes[0];
Chris Mason's avatar
Chris Mason committed
567
		slot = path->slots[0];
568
		ret = 0;
569
		btrfs_item_key_to_cpu(leaf, &key, slot);
570
571
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
		    key.offset >= end) {
Chris Mason's avatar
Chris Mason committed
572
573
			goto out;
		}
574
575
		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
		    key.objectid != inode->i_ino) {
Chris Mason's avatar
Chris Mason committed
576
577
			goto out;
		}
Chris Mason's avatar
Chris Mason committed
578
579
580
581
		if (recow) {
			search_start = key.offset;
			continue;
		}
582
583
584
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
			extent = btrfs_item_ptr(leaf, slot,
						struct btrfs_file_extent_item);
585
			found_type = btrfs_file_extent_type(leaf, extent);
586
			if (found_type == BTRFS_FILE_EXTENT_REG) {
587
588
589
590
591
592
				extent_end =
				     btrfs_file_extent_disk_bytenr(leaf,
								   extent);
				if (extent_end)
					*hint_byte = extent_end;

593
				extent_end = key.offset +
594
				     btrfs_file_extent_num_bytes(leaf, extent);
595
596
				found_extent = 1;
			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
597
598
				struct btrfs_item *item;
				item = btrfs_item_nr(leaf, slot);
599
600
				found_inline = 1;
				extent_end = key.offset +
601
				     btrfs_file_extent_inline_len(leaf, item);
602
603
604
			}
		} else {
			extent_end = search_start;
Chris Mason's avatar
Chris Mason committed
605
606
607
		}

		/* we found nothing we can drop */
608
609
610
611
		if ((!found_extent && !found_inline) ||
		    search_start >= extent_end) {
			int nextret;
			u32 nritems;
612
			nritems = btrfs_header_nritems(leaf);
613
614
615
616
			if (slot >= nritems - 1) {
				nextret = btrfs_next_leaf(root, path);
				if (nextret)
					goto out;
Chris Mason's avatar
Chris Mason committed
617
				recow = 1;
618
619
620
621
			} else {
				path->slots[0]++;
			}
			goto next_slot;
Chris Mason's avatar
Chris Mason committed
622
623
624
		}

		if (found_inline) {
625
			u64 mask = root->sectorsize - 1;
Chris Mason's avatar
Chris Mason committed
626
627
628
			search_start = (extent_end + mask) & ~mask;
		} else
			search_start = extent_end;
Yan's avatar
Yan committed
629
		if (end <= extent_end && start >= key.offset && found_inline) {
630
			*hint_byte = EXTENT_MAP_INLINE;
Yan's avatar
Yan committed
631
			continue;
632
		}
Chris Mason's avatar
Chris Mason committed
633
634
		if (end < extent_end && end >= key.offset) {
			if (found_extent) {
635
636
637
638
				u64 disk_bytenr =
				    btrfs_file_extent_disk_bytenr(leaf, extent);
				u64 disk_num_bytes =
				    btrfs_file_extent_disk_num_bytes(leaf,
639
640
641
642
								      extent);
				read_extent_buffer(leaf, &old,
						   (unsigned long)extent,
						   sizeof(old));
643
				if (disk_bytenr != 0) {
Chris Mason's avatar
Chris Mason committed
644
					ret = btrfs_inc_extent_ref(trans, root,
645
646
647
648
					         disk_bytenr, disk_num_bytes,
						 root->root_key.objectid,
						 trans->transid,
						 key.objectid, end);
Chris Mason's avatar
Chris Mason committed
649
650
651
					BUG_ON(ret);
				}
			}
652
			bookend = 1;
653
			if (found_inline && start <= key.offset)
654
				keep = 1;
Chris Mason's avatar
Chris Mason committed
655
656
657
658
659
660
		}
		/* truncate existing extent */
		if (start > key.offset) {
			u64 new_num;
			u64 old_num;
			keep = 1;
661
			WARN_ON(start & (root->sectorsize - 1));
Chris Mason's avatar
Chris Mason committed
662
			if (found_extent) {
663
664
665
666
667
668
669
670
				new_num = start - key.offset;
				old_num = btrfs_file_extent_num_bytes(leaf,
								      extent);
				*hint_byte =
					btrfs_file_extent_disk_bytenr(leaf,
								      extent);
				if (btrfs_file_extent_disk_bytenr(leaf,
								  extent)) {
Chris Mason's avatar
Chris Mason committed
671
					dec_i_blocks(inode, old_num - new_num);
Chris Mason's avatar
Chris Mason committed
672
				}
673
674
				btrfs_set_file_extent_num_bytes(leaf, extent,
								new_num);
675
				btrfs_mark_buffer_dirty(leaf);
676
677
678
			} else if (key.offset < inline_limit &&
				   (end > extent_end) &&
				   (inline_limit < extent_end)) {
679
680
				u32 new_size;
				new_size = btrfs_file_extent_calc_inline_size(
681
						   inline_limit - key.offset);
Chris Mason's avatar
Chris Mason committed
682
683
				dec_i_blocks(inode, (extent_end - key.offset) -
					(inline_limit - key.offset));
684
				btrfs_truncate_item(trans, root, path,
685
						    new_size, 1);
Chris Mason's avatar
Chris Mason committed
686
687
688
689
			}
		}
		/* delete the entire extent */
		if (!keep) {
690
691
692
			u64 disk_bytenr = 0;
			u64 disk_num_bytes = 0;
			u64 extent_num_bytes = 0;
693
			u64 root_gen;
694
			u64 root_owner;
695

696
697
			root_gen = btrfs_header_generation(leaf);
			root_owner = btrfs_header_owner(leaf);
Chris Mason's avatar
Chris Mason committed
698
			if (found_extent) {
699
700
				disk_bytenr =
				      btrfs_file_extent_disk_bytenr(leaf,
701
								     extent);
702
703
				disk_num_bytes =
				      btrfs_file_extent_disk_num_bytes(leaf,
704
								       extent);
705
706
707
708
709
				extent_num_bytes =
				      btrfs_file_extent_num_bytes(leaf, extent);
				*hint_byte =
					btrfs_file_extent_disk_bytenr(leaf,
								      extent);
Chris Mason's avatar
Chris Mason committed
710
711
			}
			ret = btrfs_del_item(trans, root, path);
712
			/* TODO update progress marker and return */
Chris Mason's avatar
Chris Mason committed
713
714
715
			BUG_ON(ret);
			btrfs_release_path(root, path);
			extent = NULL;
716
			if (found_extent && disk_bytenr != 0) {
Chris Mason's avatar
Chris Mason committed
717
				dec_i_blocks(inode, extent_num_bytes);
Chris Mason's avatar
Chris Mason committed
718
				ret = btrfs_free_extent(trans, root,
719
720
						disk_bytenr,
						disk_num_bytes,
721
						root_owner,
722
723
						root_gen, inode->i_ino,
						key.offset, 0);
Chris Mason's avatar
Chris Mason committed
724
725
726
727
728
729
730
731
732
733
			}

			BUG_ON(ret);
			if (!bookend && search_start >= end) {
				ret = 0;
				goto out;
			}
			if (!bookend)
				continue;
		}
734
		if (bookend && found_inline && start <= key.offset) {
735
736
			u32 new_size;
			new_size = btrfs_file_extent_calc_inline_size(
737
						   extent_end - end);
Chris Mason's avatar
Chris Mason committed
738
739
			dec_i_blocks(inode, (extent_end - key.offset) -
					(extent_end - end));
740
741
			btrfs_truncate_item(trans, root, path, new_size, 0);
		}
Chris Mason's avatar
Chris Mason committed
742
743
744
745
746
747
748
749
750
		/* create bookend, splitting the extent in two */
		if (bookend && found_extent) {
			struct btrfs_key ins;
			ins.objectid = inode->i_ino;
			ins.offset = end;
			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
			btrfs_release_path(root, path);
			ret = btrfs_insert_empty_item(trans, root, path, &ins,
						      sizeof(*extent));
751

752
			leaf = path->nodes[0];
753
			if (ret) {
754
755
				btrfs_print_leaf(root, leaf);
				printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep);
756
			}
Chris Mason's avatar
Chris Mason committed
757
			BUG_ON(ret);
758
759
760
761
762
763
			extent = btrfs_item_ptr(leaf, path->slots[0],
						struct btrfs_file_extent_item);
			write_extent_buffer(leaf, &old,
					    (unsigned long)extent, sizeof(old));

			btrfs_set_file_extent_offset(leaf, extent,
764
765
766
767
768
				    le64_to_cpu(old.offset) + end - key.offset);
			WARN_ON(le64_to_cpu(old.num_bytes) <
				(extent_end - end));
			btrfs_set_file_extent_num_bytes(leaf, extent,
							extent_end - end);
769
			btrfs_set_file_extent_type(leaf, extent,
Chris Mason's avatar
Chris Mason committed
770
						   BTRFS_FILE_EXTENT_REG);
771

Chris Mason's avatar
Chris Mason committed
772
			btrfs_mark_buffer_dirty(path->nodes[0]);
773
			if (le64_to_cpu(old.disk_bytenr) != 0) {
Chris Mason's avatar
Chris Mason committed
774
				inode->i_blocks +=
775
776
				      btrfs_file_extent_num_bytes(leaf,
								  extent) >> 9;
Chris Mason's avatar
Chris Mason committed
777
778
779
780
781
782
783
			}
			ret = 0;
			goto out;
		}
	}
out:
	btrfs_free_path(path);
Chris Mason's avatar
Chris Mason committed
784
	btrfs_check_file(root, inode);
Chris Mason's avatar
Chris Mason committed
785
786
787
788
789
790
	return ret;
}

/*
 * this gets pages into the page cache and locks them down
 */
791
static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
792
793
794
			 struct page **pages, size_t num_pages,
			 loff_t pos, unsigned long first_index,
			 unsigned long last_index, size_t write_bytes)
Chris Mason's avatar
Chris Mason committed
795
796
797
{
	int i;
	unsigned long index = pos >> PAGE_CACHE_SHIFT;
798
	struct inode *inode = fdentry(file)->d_inode;
Chris Mason's avatar
Chris Mason committed
799
	int err = 0;
800
	u64 start_pos;
801
	u64 last_pos;
802

803
	start_pos = pos & ~((u64)root->sectorsize - 1);
804
	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
Chris Mason's avatar
Chris Mason committed
805
806

	memset(pages, 0, num_pages * sizeof(struct page *));
807
again:
Chris Mason's avatar
Chris Mason committed
808
809
810
811
	for (i = 0; i < num_pages; i++) {
		pages[i] = grab_cache_page(inode->i_mapping, index + i);
		if (!pages[i]) {
			err = -ENOMEM;
812
			BUG_ON(1);
Chris Mason's avatar
Chris Mason committed
813
		}
Chris Mason's avatar
Chris Mason committed
814
		wait_on_page_writeback(pages[i]);
Chris Mason's avatar
Chris Mason committed
815
	}
816
	if (start_pos < inode->i_size) {
817
		struct btrfs_ordered_extent *ordered;
818
819
		lock_extent(&BTRFS_I(inode)->io_tree,
			    start_pos, last_pos - 1, GFP_NOFS);
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
		ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
		if (ordered &&
		    ordered->file_offset + ordered->len > start_pos &&
		    ordered->file_offset < last_pos) {
			btrfs_put_ordered_extent(ordered);
			unlock_extent(&BTRFS_I(inode)->io_tree,
				      start_pos, last_pos - 1, GFP_NOFS);
			for (i = 0; i < num_pages; i++) {
				unlock_page(pages[i]);
				page_cache_release(pages[i]);
			}
			btrfs_wait_ordered_range(inode, start_pos,
						 last_pos - start_pos);
			goto again;
		}
		if (ordered)
			btrfs_put_ordered_extent(ordered);

838
839
840
		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
				  GFP_NOFS);
841
842
		unlock_extent(&BTRFS_I(inode)->io_tree,
			      start_pos, last_pos - 1, GFP_NOFS);
843
	}
844
	for (i = 0; i < num_pages; i++) {
845
		clear_page_dirty_for_io(pages[i]);
846
847
848
		set_page_extent_mapped(pages[i]);
		WARN_ON(!PageLocked(pages[i]));
	}
Chris Mason's avatar
Chris Mason committed
849
850
851
852
853
854
855
	return 0;
}

static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
{
	loff_t pos;
856
857
858
	loff_t start_pos;
	ssize_t num_written = 0;
	ssize_t err = 0;
Chris Mason's avatar
Chris Mason committed
859
	int ret = 0;
860
	struct inode *inode = fdentry(file)->d_inode;
Chris Mason's avatar
Chris Mason committed
861
	struct btrfs_root *root = BTRFS_I(inode)->root;
862
863
	struct page **pages = NULL;
	int nrptrs;
Chris Mason's avatar
Chris Mason committed
864
865
866
	struct page *pinned[2];
	unsigned long first_index;
	unsigned long last_index;
867
868
869

	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
Chris Mason's avatar
Chris Mason committed
870
871
	pinned[0] = NULL;
	pinned[1] = NULL;
872

Chris Mason's avatar
Chris Mason committed
873
	pos = *ppos;
874
875
	start_pos = pos;

Chris Mason's avatar
Chris Mason committed
876
877
878
879
	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
	current->backing_dev_info = inode->i_mapping->backing_dev_info;
	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
	if (err)
880
		goto out_nolock;
Chris Mason's avatar
Chris Mason committed
881
	if (count == 0)
882
		goto out_nolock;
883
884
885
#ifdef REMOVE_SUID_PATH
	err = remove_suid(&file->f_path);
#else
886
887
888
# if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
	err = file_remove_suid(file);
# else
889
	err = remove_suid(fdentry(file));
890
# endif
891
#endif
Chris Mason's avatar
Chris Mason committed
892
	if (err)
893
		goto out_nolock;
Chris Mason's avatar
Chris Mason committed
894
895
	file_update_time(file);

896
	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
Chris Mason's avatar
Chris Mason committed
897
898
899
900
901

	mutex_lock(&inode->i_mutex);
	first_index = pos >> PAGE_CACHE_SHIFT;
	last_index = (pos + count) >> PAGE_CACHE_SHIFT;

902
903
904
905
906
907
908
909
	/*
	 * if this is a nodatasum mount, force summing off for the inode
	 * all the time.  That way a later mount with summing on won't
	 * get confused
	 */
	if (btrfs_test_opt(root, NODATASUM))
		btrfs_set_flag(inode, NODATASUM);

Chris Mason's avatar
Chris Mason committed
910
911
912
913
914
915
916
917
	/*
	 * there are lots of better ways to do this, but this code
	 * makes sure the first and last page in the file range are
	 * up to date and ready for cow
	 */
	if ((pos & (PAGE_CACHE_SIZE - 1))) {
		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
		if (!PageUptodate(pinned[0])) {
Chris Mason's avatar
Chris Mason committed
918
			ret = btrfs_readpage(NULL, pinned[0]);
Chris Mason's avatar
Chris Mason committed
919
920
921
922
923
924
925
926
927
			BUG_ON(ret);
			wait_on_page_locked(pinned[0]);
		} else {
			unlock_page(pinned[0]);
		}
	}
	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
		if (!PageUptodate(pinned[1])) {
Chris Mason's avatar
Chris Mason committed
928
			ret = btrfs_readpage(NULL, pinned[1]);
Chris Mason's avatar
Chris Mason committed
929
930
931
932
933
934
935
936
937
			BUG_ON(ret);
			wait_on_page_locked(pinned[1]);
		} else {
			unlock_page(pinned[1]);
		}
	}

	while(count > 0) {
		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
938
939
		size_t write_bytes = min(count, nrptrs *
					(size_t)PAGE_CACHE_SIZE -
940
					 offset);
Chris Mason's avatar
Chris Mason committed
941
942
943
		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
					PAGE_CACHE_SHIFT;

944
		WARN_ON(num_pages > nrptrs);
Chris Mason's avatar
Chris Mason committed
945
		memset(pages, 0, sizeof(pages));
946
947
948
949
950

		ret = btrfs_check_free_space(root, write_bytes, 0);
		if (ret)
			goto out;

Chris Mason's avatar
Chris Mason committed
951
952
		ret = prepare_pages(root, file, pages, num_pages,
				    pos, first_index, last_index,
953
				    write_bytes);
954
955
		if (ret)
			goto out;
Chris Mason's avatar
Chris Mason committed
956
957
958

		ret = btrfs_copy_from_user(pos, num_pages,
					   write_bytes, pages, buf);
959
960
961
962
		if (ret) {
			btrfs_drop_pages(pages, num_pages);
			goto out;
		}
Chris Mason's avatar
Chris Mason committed
963
964
965
966

		ret = dirty_and_release_pages(NULL, root, file, pages,
					      num_pages, pos, write_bytes);
		btrfs_drop_pages(pages, num_pages);
967
968
		if (ret)
			goto out;
Chris Mason's avatar
Chris Mason committed
969
970
971
972
973
974

		buf += write_bytes;
		count -= write_bytes;
		pos += write_bytes;
		num_written += write_bytes;

975
		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
976
977
		if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
			btrfs_btree_balance_dirty(root, 1);
978
		btrfs_throttle(root);
Chris Mason's avatar
Chris Mason committed
979
980
981
		cond_resched();
	}
out:
982
	mutex_unlock(&inode->i_mutex);
983

984
out_nolock:
985
	kfree(pages);
Chris Mason's avatar
Chris Mason committed
986
987
988
989
990
	if (pinned[0])
		page_cache_release(pinned[0]);
	if (pinned[1])
		page_cache_release(pinned[1]);
	*ppos = pos;
991
992

	if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
993
994
995
996
997
998
999
1000
		struct btrfs_trans_handle *trans;

		err = btrfs_fdatawrite_range(inode->i_mapping, start_pos,
					     start_pos + num_written -1,
					     WB_SYNC_NONE);
		if (err < 0)
			num_written = err;

For faster browsing, not all history is shown. View entire blame