file.c 31.2 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Chris Mason's avatar
Chris Mason committed
19
20
21
22
23
24
25
26
27
28
29
30
31
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
32
#include <linux/version.h>
Chris Mason's avatar
Chris Mason committed
33
34
35
36
37
38
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
39
40
#include "tree-log.h"
#include "locking.h"
41
#include "compat.h"
Chris Mason's avatar
Chris Mason committed
42
43


Chris Mason's avatar
Chris Mason committed
44
45
46
/* simple helper to fault in pages and copy.  This should go away
 * and be replaced with calls into generic code.
 */
47
48
49
50
static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
					 int write_bytes,
					 struct page **prepared_pages,
					 const char __user * buf)
Chris Mason's avatar
Chris Mason committed
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
{
	long page_fault = 0;
	int i;
	int offset = pos & (PAGE_CACHE_SIZE - 1);

	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
		size_t count = min_t(size_t,
				     PAGE_CACHE_SIZE - offset, write_bytes);
		struct page *page = prepared_pages[i];
		fault_in_pages_readable(buf, count);

		/* Copy data from userspace to the current page */
		kmap(page);
		page_fault = __copy_from_user(page_address(page) + offset,
					      buf, count);
		/* Flush processor's dcache for this page */
		flush_dcache_page(page);
		kunmap(page);
		buf += count;
		write_bytes -= count;

		if (page_fault)
			break;
	}
	return page_fault ? -EFAULT : 0;
}

Chris Mason's avatar
Chris Mason committed
78
79
80
/*
 * unlocks pages after btrfs_file_write is done with them
 */
81
static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
Chris Mason's avatar
Chris Mason committed
82
83
84
85
86
{
	size_t i;
	for (i = 0; i < num_pages; i++) {
		if (!pages[i])
			break;
Chris Mason's avatar
Chris Mason committed
87
88
89
90
		/* page checked is some magic around finding pages that
		 * have been modified without going through btrfs_set_page_dirty
		 * clear it here
		 */
Chris Mason's avatar
Chris Mason committed
91
		ClearPageChecked(pages[i]);
Chris Mason's avatar
Chris Mason committed
92
93
94
95
96
97
		unlock_page(pages[i]);
		mark_page_accessed(pages[i]);
		page_cache_release(pages[i]);
	}
}

Chris Mason's avatar
Chris Mason committed
98
99
100
101
/* this does all the hard work for inserting an inline extent into
 * the btree.  Any existing inline extent is extended as required to make room,
 * otherwise things are inserted as required into the btree
 */
102
static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
103
				struct btrfs_root *root, struct inode *inode,
104
105
106
				u64 offset, size_t size,
				struct page **pages, size_t page_offset,
				int num_pages)
107
108
109
{
	struct btrfs_key key;
	struct btrfs_path *path;
110
111
112
	struct extent_buffer *leaf;
	char *kaddr;
	unsigned long ptr;
113
	struct btrfs_file_extent_item *ei;
114
	struct page *page;
115
116
117
	u32 datasize;
	int err = 0;
	int ret;
118
119
	int i;
	ssize_t cur_size;
120
121
122
123
124
125
126
127
128
129
130

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	btrfs_set_trans_block_group(trans, inode);

	key.objectid = inode->i_ino;
	key.offset = offset;
	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);

131
132
	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
	if (ret < 0) {
133
134
135
		err = ret;
		goto fail;
	}
136
	if (ret == 1) {
137
138
139
140
141
		struct btrfs_key found_key;

		if (path->slots[0] == 0)
			goto insert;

142
143
		path->slots[0]--;
		leaf = path->nodes[0];
144
145
146
147
148
149
150
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);

		if (found_key.objectid != inode->i_ino)
			goto insert;

		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
			goto insert;
151
152
153
154
155
156
157
158
159
160
161
162
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);

		if (btrfs_file_extent_type(leaf, ei) !=
		    BTRFS_FILE_EXTENT_INLINE) {
			goto insert;
		}
		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		ret = 0;
	}
	if (ret == 0) {
		u32 found_size;
163
		u64 found_end;
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178

		leaf = path->nodes[0];
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);

		if (btrfs_file_extent_type(leaf, ei) !=
		    BTRFS_FILE_EXTENT_INLINE) {
			err = ret;
			btrfs_print_leaf(root, leaf);
			printk("found wasn't inline offset %Lu inode %lu\n",
			       offset, inode->i_ino);
			goto fail;
		}
		found_size = btrfs_file_extent_inline_len(leaf,
					  btrfs_item_nr(leaf, path->slots[0]));
179
		found_end = key.offset + found_size;
180

181
		if (found_end < offset + size) {
182
183
			btrfs_release_path(root, path);
			ret = btrfs_search_slot(trans, root, &key, path,
184
						offset + size - found_end, 1);
185
			BUG_ON(ret != 0);
186

187
			ret = btrfs_extend_item(trans, root, path,
188
						offset + size - found_end);
189
190
191
192
193
194
195
			if (ret) {
				err = ret;
				goto fail;
			}
			leaf = path->nodes[0];
			ei = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
Chris Mason's avatar
Chris Mason committed
196
			inode->i_blocks += (offset + size - found_end) >> 9;
197
		}
198
199
200
201
		if (found_end < offset) {
			ptr = btrfs_file_extent_inline_start(ei) + found_size;
			memset_extent_buffer(leaf, 0, ptr, offset - found_end);
		}
202
203
204
	} else {
insert:
		btrfs_release_path(root, path);
205
		datasize = offset + size - key.offset;
Chris Mason's avatar
Chris Mason committed
206
		inode->i_blocks += datasize >> 9;
207
		datasize = btrfs_file_extent_calc_inline_size(datasize);
208
209
210
211
212
213
214
215
216
217
218
219
220
		ret = btrfs_insert_empty_item(trans, root, path, &key,
					      datasize);
		if (ret) {
			err = ret;
			printk("got bad ret %d\n", ret);
			goto fail;
		}
		leaf = path->nodes[0];
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);
		btrfs_set_file_extent_generation(leaf, ei, trans->transid);
		btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
	}
221
	ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset;
222
223
224
225
226
227

	cur_size = size;
	i = 0;
	while (size > 0) {
		page = pages[i];
		kaddr = kmap_atomic(page, KM_USER0);
Jens Axboe's avatar
Jens Axboe committed
228
		cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size);
229
230
231
232
233
234
235
236
237
238
		write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size);
		kunmap_atomic(kaddr, KM_USER0);
		page_offset = 0;
		ptr += cur_size;
		size -= cur_size;
		if (i >= num_pages) {
			printk("i %d num_pages %d\n", i, num_pages);
		}
		i++;
	}
239
	btrfs_mark_buffer_dirty(leaf);
240
241
242
243
244
fail:
	btrfs_free_path(path);
	return err;
}

Chris Mason's avatar
Chris Mason committed
245
246
247
248
249
250
251
252
/*
 * after copy_from_user, pages need to be dirtied and we need to make
 * sure holes are created between the current EOF and the start of
 * any next extents (if required).
 *
 * this also makes the decision about creating an inline extent vs
 * doing real data extents, marking pages dirty and delalloc as required.
 */
253
static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
Chris Mason's avatar
Chris Mason committed
254
255
256
257
258
259
260
261
				   struct btrfs_root *root,
				   struct file *file,
				   struct page **pages,
				   size_t num_pages,
				   loff_t pos,
				   size_t write_bytes)
{
	int err = 0;
262
	int i;
263
	struct inode *inode = fdentry(file)->d_inode;
264
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
265
266
	u64 hint_byte;
	u64 num_bytes;
267
268
269
	u64 start_pos;
	u64 end_of_last_block;
	u64 end_pos = pos + write_bytes;
270
	u64 inline_size;
271
	int did_inline = 0;
272
	loff_t isize = i_size_read(inode);
Chris Mason's avatar
Chris Mason committed
273

274
	start_pos = pos & ~((u64)root->sectorsize - 1);
275
276
	num_bytes = (write_bytes + pos - start_pos +
		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
Chris Mason's avatar
Chris Mason committed
277

278
279
	end_of_last_block = start_pos + num_bytes - 1;

280
	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
Chris Mason's avatar
Chris Mason committed
281
	trans = btrfs_join_transaction(root, 1);
282
283
284
285
286
	if (!trans) {
		err = -ENOMEM;
		goto out_unlock;
	}
	btrfs_set_trans_block_group(trans, inode);
287
	hint_byte = 0;
288
289

	if ((end_of_last_block & 4095) == 0) {
290
		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
291
	}
292
	set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
293
294
295

	/* FIXME...EIEIO, ENOSPC and more */
	/* insert any holes we need to create */
296
	if (isize < start_pos) {
297
298
		u64 last_pos_in_file;
		u64 hole_size;
299
		u64 mask = root->sectorsize - 1;
300
		last_pos_in_file = (isize + mask) & ~mask;
301
		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
302
303
304
		if (hole_size > 0) {
			btrfs_wait_ordered_range(inode, last_pos_in_file,
						 last_pos_in_file + hole_size);
305
			mutex_lock(&BTRFS_I(inode)->extent_mutex);
Chris Mason's avatar
Chris Mason committed
306
307
308
			err = btrfs_drop_extents(trans, root, inode,
						 last_pos_in_file,
						 last_pos_in_file + hole_size,
309
						 last_pos_in_file,
310
						 &hint_byte);
Chris Mason's avatar
Chris Mason committed
311
312
313
			if (err)
				goto failed;

314
315
316
			err = btrfs_insert_file_extent(trans, root,
						       inode->i_ino,
						       last_pos_in_file,
Sage Weil's avatar
Sage Weil committed
317
						       0, 0, hole_size, 0);
318
			btrfs_drop_extent_cache(inode, last_pos_in_file,
319
					last_pos_in_file + hole_size - 1, 0);
320
			mutex_unlock(&BTRFS_I(inode)->extent_mutex);
321
			btrfs_check_file(root, inode);
322
323
		}
		if (err)
Chris Mason's avatar
Chris Mason committed
324
			goto failed;
325
326
327
328
329
330
	}

	/*
	 * either allocate an extent for the new bytes or setup the key
	 * to show we are doing inline data in the extent
	 */
331
332
	inline_size = end_pos;
	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
333
334
	    inline_size > root->fs_info->max_inline ||
	    (inline_size & (root->sectorsize -1)) == 0 ||
335
	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
336
337
338
339
		/* check for reserved extents on each page, we don't want
		 * to reset the delalloc bit on things that already have
		 * extents reserved.
		 */
340
		btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
341
342
343
		for (i = 0; i < num_pages; i++) {
			struct page *p = pages[i];
			SetPageUptodate(p);
344
			ClearPageChecked(p);
345
			set_page_dirty(p);
Chris Mason's avatar
Chris Mason committed
346
		}
347
	} else {
348
		u64 aligned_end;
349
		/* step one, delete the existing extents in this range */
350
351
		aligned_end = (pos + write_bytes + root->sectorsize - 1) &
			~((u64)root->sectorsize - 1);
352
		mutex_lock(&BTRFS_I(inode)->extent_mutex);
Chris Mason's avatar
Chris Mason committed
353
		err = btrfs_drop_extents(trans, root, inode, start_pos,
354
					 aligned_end, aligned_end, &hint_byte);
Chris Mason's avatar
Chris Mason committed
355
356
		if (err)
			goto failed;
357
358
359
		if (isize > inline_size)
			inline_size = min_t(u64, isize, aligned_end);
		inline_size -= start_pos;
360
		err = insert_inline_extent(trans, root, inode, start_pos,
361
					   inline_size, pages, 0, num_pages);
362
		btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0);
363
		BUG_ON(err);
364
		mutex_unlock(&BTRFS_I(inode)->extent_mutex);
365
366
367
368
369
370
371

		/*
		 * an ugly way to do all the prop accounting around
		 * the page bits and mapping tags
		 */
		set_page_writeback(pages[0]);
		end_page_writeback(pages[0]);
372
		did_inline = 1;
373
374
375
	}
	if (end_pos > isize) {
		i_size_write(inode, end_pos);
376
377
		if (did_inline)
			BTRFS_I(inode)->disk_i_size = end_pos;
378
		btrfs_update_inode(trans, root, inode);
Chris Mason's avatar
Chris Mason committed
379
380
	}
failed:
381
	err = btrfs_end_transaction(trans, root);
382
out_unlock:
383
	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
Chris Mason's avatar
Chris Mason committed
384
385
386
	return err;
}

Chris Mason's avatar
Chris Mason committed
387
388
389
390
/*
 * this drops all the extents in the cache that intersect the range
 * [start, end].  Existing extents are split as required.
 */
391
392
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
			    int skip_pinned)
393
394
{
	struct extent_map *em;
395
396
	struct extent_map *split = NULL;
	struct extent_map *split2 = NULL;
397
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
398
	u64 len = end - start + 1;
399
400
	int ret;
	int testend = 1;
401
	unsigned long flags;
402

403
	WARN_ON(end < start);
404
	if (end == (u64)-1) {
405
		len = (u64)-1;
406
407
		testend = 0;
	}
408
	while(1) {
409
410
411
412
413
		if (!split)
			split = alloc_extent_map(GFP_NOFS);
		if (!split2)
			split2 = alloc_extent_map(GFP_NOFS);

414
		spin_lock(&em_tree->lock);
415
		em = lookup_extent_mapping(em_tree, start, len);
416
417
		if (!em) {
			spin_unlock(&em_tree->lock);
418
			break;
419
		}
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
		flags = em->flags;
		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
			spin_unlock(&em_tree->lock);
			if (em->start <= start &&
			    (!testend || em->start + em->len >= start + len)) {
				free_extent_map(em);
				break;
			}
			if (start < em->start) {
				len = em->start - start;
			} else {
				len = start + len - (em->start + em->len);
				start = em->start + em->len;
			}
			free_extent_map(em);
			continue;
		}
437
		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
438
		remove_extent_mapping(em_tree, em);
439
440
441
442
443
444
445

		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    em->start < start) {
			split->start = em->start;
			split->len = start - em->start;
			split->block_start = em->block_start;
			split->bdev = em->bdev;
446
			split->flags = flags;
447
448
449
450
451
452
453
454
455
456
457
458
459
			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = split2;
			split2 = NULL;
		}
		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    testend && em->start + em->len > start + len) {
			u64 diff = start + len - em->start;

			split->start = start + len;
			split->len = em->start + em->len - (start + len);
			split->bdev = em->bdev;
460
			split->flags = flags;
461
462
463
464
465
466
467
468

			split->block_start = em->block_start + diff;

			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = NULL;
		}
469
470
		spin_unlock(&em_tree->lock);

471
472
473
474
475
		/* once for us */
		free_extent_map(em);
		/* once for the tree*/
		free_extent_map(em);
	}
476
477
478
479
	if (split)
		free_extent_map(split);
	if (split2)
		free_extent_map(split2);
480
481
482
	return 0;
}

483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
{
	return 0;
#if 0
	struct btrfs_path *path;
	struct btrfs_key found_key;
	struct extent_buffer *leaf;
	struct btrfs_file_extent_item *extent;
	u64 last_offset = 0;
	int nritems;
	int slot;
	int found_type;
	int ret;
	int err = 0;
	u64 extent_end = 0;

	path = btrfs_alloc_path();
	ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
				       last_offset, 0);
	while(1) {
		nritems = btrfs_header_nritems(path->nodes[0]);
		if (path->slots[0] >= nritems) {
			ret = btrfs_next_leaf(root, path);
			if (ret)
				goto out;
			nritems = btrfs_header_nritems(path->nodes[0]);
		}
		slot = path->slots[0];
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, slot);
		if (found_key.objectid != inode->i_ino)
			break;
		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
			goto out;

Chris Mason's avatar
Chris Mason committed
518
		if (found_key.offset < last_offset) {
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
			WARN_ON(1);
			btrfs_print_leaf(root, leaf);
			printk("inode %lu found offset %Lu expected %Lu\n",
			       inode->i_ino, found_key.offset, last_offset);
			err = 1;
			goto out;
		}
		extent = btrfs_item_ptr(leaf, slot,
					struct btrfs_file_extent_item);
		found_type = btrfs_file_extent_type(leaf, extent);
		if (found_type == BTRFS_FILE_EXTENT_REG) {
			extent_end = found_key.offset +
			     btrfs_file_extent_num_bytes(leaf, extent);
		} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
			struct btrfs_item *item;
			item = btrfs_item_nr(leaf, slot);
			extent_end = found_key.offset +
			     btrfs_file_extent_inline_len(leaf, item);
			extent_end = (extent_end + root->sectorsize - 1) &
				~((u64)root->sectorsize -1 );
		}
		last_offset = extent_end;
		path->slots[0]++;
	}
Chris Mason's avatar
Chris Mason committed
543
	if (0 && last_offset < inode->i_size) {
544
545
546
547
548
549
550
551
552
553
554
555
556
		WARN_ON(1);
		btrfs_print_leaf(root, leaf);
		printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino,
		       last_offset, inode->i_size);
		err = 1;

	}
out:
	btrfs_free_path(path);
	return err;
#endif
}

Chris Mason's avatar
Chris Mason committed
557
558
559
560
561
562
563
564
/*
 * this is very complex, but the basic idea is to drop all extents
 * in the range start - end.  hint_block is filled in with a block number
 * that would be a good hint to the block allocator for this file.
 *
 * If an extent intersects the range but is not entirely inside the range
 * it is either truncated or split.  Anything entirely inside the range
 * is deleted from the tree.
Chris Mason's avatar
Chris Mason committed
565
566
567
 *
 * inline_limit is used to tell this code which offsets in the file to keep
 * if they contain inline extents.
Chris Mason's avatar
Chris Mason committed
568
 */
569
int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
Chris Mason's avatar
Chris Mason committed
570
		       struct btrfs_root *root, struct inode *inode,
571
		       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
Chris Mason's avatar
Chris Mason committed
572
{
573
574
	u64 extent_end = 0;
	u64 search_start = start;
Zheng Yan's avatar
Zheng Yan committed
575
576
577
	u64 leaf_start;
	u64 root_gen;
	u64 root_owner;
578
	struct extent_buffer *leaf;
Chris Mason's avatar
Chris Mason committed
579
580
	struct btrfs_file_extent_item *extent;
	struct btrfs_path *path;
581
582
583
584
	struct btrfs_key key;
	struct btrfs_file_extent_item old;
	int keep;
	int slot;
Chris Mason's avatar
Chris Mason committed
585
586
587
588
	int bookend;
	int found_type;
	int found_extent;
	int found_inline;
Chris Mason's avatar
Chris Mason committed
589
	int recow;
590
	int ret;
Chris Mason's avatar
Chris Mason committed
591

592
	btrfs_drop_extent_cache(inode, start, end - 1, 0);
593

Chris Mason's avatar
Chris Mason committed
594
595
596
597
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	while(1) {
Chris Mason's avatar
Chris Mason committed
598
		recow = 0;
Chris Mason's avatar
Chris Mason committed
599
600
601
602
603
604
605
606
607
608
609
610
		btrfs_release_path(root, path);
		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
					       search_start, -1);
		if (ret < 0)
			goto out;
		if (ret > 0) {
			if (path->slots[0] == 0) {
				ret = 0;
				goto out;
			}
			path->slots[0]--;
		}
611
next_slot:
Chris Mason's avatar
Chris Mason committed
612
613
614
615
		keep = 0;
		bookend = 0;
		found_extent = 0;
		found_inline = 0;
Zheng Yan's avatar
Zheng Yan committed
616
617
618
		leaf_start = 0;
		root_gen = 0;
		root_owner = 0;
Chris Mason's avatar
Chris Mason committed
619
		extent = NULL;
620
		leaf = path->nodes[0];
Chris Mason's avatar
Chris Mason committed
621
		slot = path->slots[0];
622
		ret = 0;
623
		btrfs_item_key_to_cpu(leaf, &key, slot);
624
625
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
		    key.offset >= end) {
Chris Mason's avatar
Chris Mason committed
626
627
			goto out;
		}
628
629
		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
		    key.objectid != inode->i_ino) {
Chris Mason's avatar
Chris Mason committed
630
631
			goto out;
		}
Chris Mason's avatar
Chris Mason committed
632
633
634
635
		if (recow) {
			search_start = key.offset;
			continue;
		}
636
637
638
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
			extent = btrfs_item_ptr(leaf, slot,
						struct btrfs_file_extent_item);
639
			found_type = btrfs_file_extent_type(leaf, extent);
640
			if (found_type == BTRFS_FILE_EXTENT_REG) {
641
642
643
644
645
646
				extent_end =
				     btrfs_file_extent_disk_bytenr(leaf,
								   extent);
				if (extent_end)
					*hint_byte = extent_end;

647
				extent_end = key.offset +
648
				     btrfs_file_extent_num_bytes(leaf, extent);
649
650
				found_extent = 1;
			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
651
652
				struct btrfs_item *item;
				item = btrfs_item_nr(leaf, slot);
653
654
				found_inline = 1;
				extent_end = key.offset +
655
				     btrfs_file_extent_inline_len(leaf, item);
656
657
658
			}
		} else {
			extent_end = search_start;
Chris Mason's avatar
Chris Mason committed
659
660
661
		}

		/* we found nothing we can drop */
662
663
664
665
		if ((!found_extent && !found_inline) ||
		    search_start >= extent_end) {
			int nextret;
			u32 nritems;
666
			nritems = btrfs_header_nritems(leaf);
667
668
669
670
			if (slot >= nritems - 1) {
				nextret = btrfs_next_leaf(root, path);
				if (nextret)
					goto out;
Chris Mason's avatar
Chris Mason committed
671
				recow = 1;
672
673
674
675
			} else {
				path->slots[0]++;
			}
			goto next_slot;
Chris Mason's avatar
Chris Mason committed
676
677
678
		}

		if (found_inline) {
679
			u64 mask = root->sectorsize - 1;
Chris Mason's avatar
Chris Mason committed
680
681
682
			search_start = (extent_end + mask) & ~mask;
		} else
			search_start = extent_end;
Yan's avatar
Yan committed
683
		if (end <= extent_end && start >= key.offset && found_inline) {
684
			*hint_byte = EXTENT_MAP_INLINE;
Zheng Yan's avatar
Zheng Yan committed
685
686
687
688
689
690
691
692
693
			goto out;
		}

		if (found_extent) {
			read_extent_buffer(leaf, &old, (unsigned long)extent,
					   sizeof(old));
			root_gen = btrfs_header_generation(leaf);
			root_owner = btrfs_header_owner(leaf);
			leaf_start = leaf->start;
694
		}
Zheng Yan's avatar
Zheng Yan committed
695

Chris Mason's avatar
Chris Mason committed
696
		if (end < extent_end && end >= key.offset) {
697
			bookend = 1;
698
			if (found_inline && start <= key.offset)
699
				keep = 1;
Chris Mason's avatar
Chris Mason committed
700
701
702
703
704
705
		}
		/* truncate existing extent */
		if (start > key.offset) {
			u64 new_num;
			u64 old_num;
			keep = 1;
706
			WARN_ON(start & (root->sectorsize - 1));
Chris Mason's avatar
Chris Mason committed
707
			if (found_extent) {
708
709
710
711
712
713
714
715
				new_num = start - key.offset;
				old_num = btrfs_file_extent_num_bytes(leaf,
								      extent);
				*hint_byte =
					btrfs_file_extent_disk_bytenr(leaf,
								      extent);
				if (btrfs_file_extent_disk_bytenr(leaf,
								  extent)) {
Chris Mason's avatar
Chris Mason committed
716
					dec_i_blocks(inode, old_num - new_num);
Chris Mason's avatar
Chris Mason committed
717
				}
718
719
				btrfs_set_file_extent_num_bytes(leaf, extent,
								new_num);
720
				btrfs_mark_buffer_dirty(leaf);
721
722
723
			} else if (key.offset < inline_limit &&
				   (end > extent_end) &&
				   (inline_limit < extent_end)) {
724
725
				u32 new_size;
				new_size = btrfs_file_extent_calc_inline_size(
726
						   inline_limit - key.offset);
Chris Mason's avatar
Chris Mason committed
727
728
				dec_i_blocks(inode, (extent_end - key.offset) -
					(inline_limit - key.offset));
729
				btrfs_truncate_item(trans, root, path,
730
						    new_size, 1);
Chris Mason's avatar
Chris Mason committed
731
732
733
734
735
			}
		}
		/* delete the entire extent */
		if (!keep) {
			ret = btrfs_del_item(trans, root, path);
736
			/* TODO update progress marker and return */
Chris Mason's avatar
Chris Mason committed
737
738
			BUG_ON(ret);
			extent = NULL;
Zheng Yan's avatar
Zheng Yan committed
739
740
			btrfs_release_path(root, path);
			/* the extent will be freed later */
Chris Mason's avatar
Chris Mason committed
741
		}
742
		if (bookend && found_inline && start <= key.offset) {
743
744
			u32 new_size;
			new_size = btrfs_file_extent_calc_inline_size(
745
						   extent_end - end);
Chris Mason's avatar
Chris Mason committed
746
747
			dec_i_blocks(inode, (extent_end - key.offset) -
					(extent_end - end));
Zheng Yan's avatar
Zheng Yan committed
748
749
750
			ret = btrfs_truncate_item(trans, root, path,
						  new_size, 0);
			BUG_ON(ret);
751
		}
Chris Mason's avatar
Chris Mason committed
752
753
		/* create bookend, splitting the extent in two */
		if (bookend && found_extent) {
Zheng Yan's avatar
Zheng Yan committed
754
			u64 disk_bytenr;
Chris Mason's avatar
Chris Mason committed
755
756
757
758
759
760
761
			struct btrfs_key ins;
			ins.objectid = inode->i_ino;
			ins.offset = end;
			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
			btrfs_release_path(root, path);
			ret = btrfs_insert_empty_item(trans, root, path, &ins,
						      sizeof(*extent));
Zheng Yan's avatar
Zheng Yan committed
762
			BUG_ON(ret);
763

764
765
766
767
768
769
770
			leaf = path->nodes[0];
			extent = btrfs_item_ptr(leaf, path->slots[0],
						struct btrfs_file_extent_item);
			write_extent_buffer(leaf, &old,
					    (unsigned long)extent, sizeof(old));

			btrfs_set_file_extent_offset(leaf, extent,
771
772
773
774
775
				    le64_to_cpu(old.offset) + end - key.offset);
			WARN_ON(le64_to_cpu(old.num_bytes) <
				(extent_end - end));
			btrfs_set_file_extent_num_bytes(leaf, extent,
							extent_end - end);
776
			btrfs_set_file_extent_type(leaf, extent,
Chris Mason's avatar
Chris Mason committed
777
						   BTRFS_FILE_EXTENT_REG);
778

Chris Mason's avatar
Chris Mason committed
779
			btrfs_mark_buffer_dirty(path->nodes[0]);
Zheng Yan's avatar
Zheng Yan committed
780
781
782
783
784
785
786
787
788
789
790
791
792
793

			disk_bytenr = le64_to_cpu(old.disk_bytenr);
			if (disk_bytenr != 0) {
				ret = btrfs_inc_extent_ref(trans, root,
						disk_bytenr,
						le64_to_cpu(old.disk_num_bytes),
						leaf->start,
						root->root_key.objectid,
						trans->transid,
						ins.objectid, ins.offset);
				BUG_ON(ret);
			}
			btrfs_release_path(root, path);
			if (disk_bytenr != 0) {
Chris Mason's avatar
Chris Mason committed
794
				inode->i_blocks +=
795
796
				      btrfs_file_extent_num_bytes(leaf,
								  extent) >> 9;
Chris Mason's avatar
Chris Mason committed
797
			}
Zheng Yan's avatar
Zheng Yan committed
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
		}

		if (found_extent && !keep) {
			u64 disk_bytenr = le64_to_cpu(old.disk_bytenr);

			if (disk_bytenr != 0) {
				dec_i_blocks(inode, le64_to_cpu(old.num_bytes));
				ret = btrfs_free_extent(trans, root,
						disk_bytenr,
						le64_to_cpu(old.disk_num_bytes),
						leaf_start, root_owner,
						root_gen, key.objectid,
						key.offset, 0);
				BUG_ON(ret);
				*hint_byte = disk_bytenr;
			}
		}

		if (search_start >= end) {
Chris Mason's avatar
Chris Mason committed
817
818
819
820
821
822
			ret = 0;
			goto out;
		}
	}
out:
	btrfs_free_path(path);
Chris Mason's avatar
Chris Mason committed
823
	btrfs_check_file(root, inode);
Chris Mason's avatar
Chris Mason committed
824
825
826
827
	return ret;
}

/*
Chris Mason's avatar
Chris Mason committed
828
829
830
 * this gets pages into the page cache and locks them down, it also properly
 * waits for data=ordered extents to finish before allowing the pages to be
 * modified.
Chris Mason's avatar
Chris Mason committed
831
 */
832
static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
833
834
835
			 struct page **pages, size_t num_pages,
			 loff_t pos, unsigned long first_index,
			 unsigned long last_index, size_t write_bytes)
Chris Mason's avatar
Chris Mason committed
836
837
838
{
	int i;
	unsigned long index = pos >> PAGE_CACHE_SHIFT;
839
	struct inode *inode = fdentry(file)->d_inode;
Chris Mason's avatar
Chris Mason committed
840
	int err = 0;
841
	u64 start_pos;
842
	u64 last_pos;
843

844
	start_pos = pos & ~((u64)root->sectorsize - 1);
845
	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
Chris Mason's avatar
Chris Mason committed
846
847

	memset(pages, 0, num_pages * sizeof(struct page *));
848
again:
Chris Mason's avatar
Chris Mason committed
849
850
851
852
	for (i = 0; i < num_pages; i++) {
		pages[i] = grab_cache_page(inode->i_mapping, index + i);
		if (!pages[i]) {
			err = -ENOMEM;
853
			BUG_ON(1);
Chris Mason's avatar
Chris Mason committed
854
		}
Chris Mason's avatar
Chris Mason committed
855
		wait_on_page_writeback(pages[i]);
Chris Mason's avatar
Chris Mason committed
856
	}
857
	if (start_pos < inode->i_size) {
858
		struct btrfs_ordered_extent *ordered;
859
860
		lock_extent(&BTRFS_I(inode)->io_tree,
			    start_pos, last_pos - 1, GFP_NOFS);
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
		ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
		if (ordered &&
		    ordered->file_offset + ordered->len > start_pos &&
		    ordered->file_offset < last_pos) {
			btrfs_put_ordered_extent(ordered);
			unlock_extent(&BTRFS_I(inode)->io_tree,
				      start_pos, last_pos - 1, GFP_NOFS);
			for (i = 0; i < num_pages; i++) {
				unlock_page(pages[i]);
				page_cache_release(pages[i]);
			}
			btrfs_wait_ordered_range(inode, start_pos,
						 last_pos - start_pos);
			goto again;
		}
		if (ordered)
			btrfs_put_ordered_extent(ordered);

879
880
881
		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
				  GFP_NOFS);
882
883
		unlock_extent(&BTRFS_I(inode)->io_tree,
			      start_pos, last_pos - 1, GFP_NOFS);
884
	}
885
	for (i = 0; i < num_pages; i++) {
886
		clear_page_dirty_for_io(pages[i]);
887
888
889
		set_page_extent_mapped(pages[i]);
		WARN_ON(!PageLocked(pages[i]));
	}
Chris Mason's avatar
Chris Mason committed
890
891
892
893
894
895
896
	return 0;
}

static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
{
	loff_t pos;
897
898
899
	loff_t start_pos;
	ssize_t num_written = 0;
	ssize_t err = 0;
Chris Mason's avatar
Chris Mason committed
900
	int ret = 0;
901
	struct inode *inode = fdentry(file)->d_inode;
Chris Mason's avatar
Chris Mason committed
902
	struct btrfs_root *root = BTRFS_I(inode)->root;
903
904
	struct page **pages = NULL;
	int nrptrs;
Chris Mason's avatar
Chris Mason committed
905
906
907
	struct page *pinned[2];
	unsigned long first_index;
	unsigned long last_index;
908
909
910
911
	int will_write;

	will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) ||
		      (file->f_flags & O_DIRECT));
912
913
914

	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
Chris Mason's avatar
Chris Mason committed
915
916
	pinned[0] = NULL;
	pinned[1] = NULL;
917

Chris Mason's avatar
Chris Mason committed
918
	pos = *ppos;
919
920
	start_pos = pos;

Chris Mason's avatar
Chris Mason committed
921
922
923
924
	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
	current->backing_dev_info = inode->i_mapping->backing_dev_info;
	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
	if (err)
925
		goto out_nolock;
Chris Mason's avatar
Chris Mason committed
926
	if (count == 0)
927
		goto out_nolock;
928

929
	err = file_remove_suid(file);
Chris Mason's avatar
Chris Mason committed
930
	if (err)
931
		goto out_nolock;
Chris Mason's avatar
Chris Mason committed
932
933
	file_update_time(file);

934
	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
Chris Mason's avatar
Chris Mason committed
935
936
937
938
939

	mutex_lock(&inode->i_mutex);
	first_index = pos >> PAGE_CACHE_SHIFT;
	last_index = (pos + count) >> PAGE_CACHE_SHIFT;

940
941
942
943
944
945
946
947
	/*
	 * if this is a nodatasum mount, force summing off for the inode
	 * all the time.  That way a later mount with summing on won't
	 * get confused
	 */
	if (btrfs_test_opt(root, NODATASUM))
		btrfs_set_flag(inode, NODATASUM);

Chris Mason's avatar
Chris Mason committed
948
949
950
951
952
953
954
955
	/*
	 * there are lots of better ways to do this, but this code
	 * makes sure the first and last page in the file range are
	 * up to date and ready for cow
	 */
	if ((pos & (PAGE_CACHE_SIZE - 1))) {
		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
		if (!PageUptodate(pinned[0])) {
Chris Mason's avatar
Chris Mason committed
956
			ret = btrfs_readpage(NULL, pinned[0]);
Chris Mason's avatar
Chris Mason committed
957
958
959
960
961
962
963
964
965
			BUG_ON(ret);
			wait_on_page_locked(pinned[0]);
		} else {
			unlock_page(pinned[0]);
		}
	}
	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
		if (!PageUptodate(pinned[1])) {
Chris Mason's avatar
Chris Mason committed
966
			ret = btrfs_readpage(NULL, pinned[1]);
Chris Mason's avatar
Chris Mason committed
967
968
969
970
971
972
973
974
975
			BUG_ON(ret);
			wait_on_page_locked(pinned[1]);
		} else {
			unlock_page(pinned[1]);
		}
	}

	while(count > 0) {
		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
976
977
		size_t write_bytes = min(count, nrptrs *
					(size_t)PAGE_CACHE_SIZE -
978
					 offset);
Chris Mason's avatar
Chris Mason committed
979
980
981
		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
					PAGE_CACHE_SHIFT;

982
		WARN_ON(num_pages > nrptrs);
Chris Mason's avatar
Chris Mason committed
983
		memset(pages, 0, sizeof(pages));
984
985
986
987
988

		ret = btrfs_check_free_space(root, write_bytes, 0);
		if (ret)
			goto out;

Chris Mason's avatar
Chris Mason committed
989
990
		ret = prepare_pages(root, file, pages, num_pages,
				    pos, first_index, last_index,
991
				    write_bytes);
992
993
		if (ret)
			goto out;
Chris Mason's avatar
Chris Mason committed
994
995
996

		ret = btrfs_copy_from_user(pos, num_pages,
					   write_bytes, pages, buf);
997
998
999
1000
		if (ret) {
			btrfs_drop_pages(pages, num_pages);
			goto out;
		}
For faster browsing, not all history is shown. View entire blame