file.c 34.6 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Chris Mason's avatar
Chris Mason committed
19
20
21
22
23
24
25
26
27
28
29
30
31
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
32
#include <linux/version.h>
Chris Mason's avatar
Chris Mason committed
33
34
35
36
37
38
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
39
40
#include "tree-log.h"
#include "locking.h"
41
#include "compat.h"
Chris Mason's avatar
Chris Mason committed
42
43


Chris Mason's avatar
Chris Mason committed
44
45
46
/* simple helper to fault in pages and copy.  This should go away
 * and be replaced with calls into generic code.
 */
47
48
49
50
static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
					 int write_bytes,
					 struct page **prepared_pages,
					 const char __user * buf)
Chris Mason's avatar
Chris Mason committed
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
{
	long page_fault = 0;
	int i;
	int offset = pos & (PAGE_CACHE_SIZE - 1);

	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
		size_t count = min_t(size_t,
				     PAGE_CACHE_SIZE - offset, write_bytes);
		struct page *page = prepared_pages[i];
		fault_in_pages_readable(buf, count);

		/* Copy data from userspace to the current page */
		kmap(page);
		page_fault = __copy_from_user(page_address(page) + offset,
					      buf, count);
		/* Flush processor's dcache for this page */
		flush_dcache_page(page);
		kunmap(page);
		buf += count;
		write_bytes -= count;

		if (page_fault)
			break;
	}
	return page_fault ? -EFAULT : 0;
}

Chris Mason's avatar
Chris Mason committed
78
79
80
/*
 * unlocks pages after btrfs_file_write is done with them
 */
81
static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
Chris Mason's avatar
Chris Mason committed
82
83
84
85
86
{
	size_t i;
	for (i = 0; i < num_pages; i++) {
		if (!pages[i])
			break;
Chris Mason's avatar
Chris Mason committed
87
88
89
90
		/* page checked is some magic around finding pages that
		 * have been modified without going through btrfs_set_page_dirty
		 * clear it here
		 */
Chris Mason's avatar
Chris Mason committed
91
		ClearPageChecked(pages[i]);
Chris Mason's avatar
Chris Mason committed
92
93
94
95
96
97
		unlock_page(pages[i]);
		mark_page_accessed(pages[i]);
		page_cache_release(pages[i]);
	}
}

Chris Mason's avatar
Chris Mason committed
98
99
100
101
102
103
104
105
/*
 * after copy_from_user, pages need to be dirtied and we need to make
 * sure holes are created between the current EOF and the start of
 * any next extents (if required).
 *
 * this also makes the decision about creating an inline extent vs
 * doing real data extents, marking pages dirty and delalloc as required.
 */
106
static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
Chris Mason's avatar
Chris Mason committed
107
108
109
110
111
112
113
114
				   struct btrfs_root *root,
				   struct file *file,
				   struct page **pages,
				   size_t num_pages,
				   loff_t pos,
				   size_t write_bytes)
{
	int err = 0;
115
	int i;
116
	struct inode *inode = fdentry(file)->d_inode;
117
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
118
119
	u64 hint_byte;
	u64 num_bytes;
120
121
122
123
	u64 start_pos;
	u64 end_of_last_block;
	u64 end_pos = pos + write_bytes;
	loff_t isize = i_size_read(inode);
Chris Mason's avatar
Chris Mason committed
124

125
	start_pos = pos & ~((u64)root->sectorsize - 1);
126
127
	num_bytes = (write_bytes + pos - start_pos +
		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
Chris Mason's avatar
Chris Mason committed
128

129
130
	end_of_last_block = start_pos + num_bytes - 1;

131
	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
Chris Mason's avatar
Chris Mason committed
132
	trans = btrfs_join_transaction(root, 1);
133
134
135
136
137
	if (!trans) {
		err = -ENOMEM;
		goto out_unlock;
	}
	btrfs_set_trans_block_group(trans, inode);
138
	hint_byte = 0;
139
140

	if ((end_of_last_block & 4095) == 0) {
141
		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
142
	}
143
	set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
144

145
146
147
	/* check for reserved extents on each page, we don't want
	 * to reset the delalloc bit on things that already have
	 * extents reserved.
148
	 */
149
150
151
152
153
154
	btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
	for (i = 0; i < num_pages; i++) {
		struct page *p = pages[i];
		SetPageUptodate(p);
		ClearPageChecked(p);
		set_page_dirty(p);
155
156
157
158
	}
	if (end_pos > isize) {
		i_size_write(inode, end_pos);
		btrfs_update_inode(trans, root, inode);
Chris Mason's avatar
Chris Mason committed
159
	}
160
	err = btrfs_end_transaction(trans, root);
161
out_unlock:
162
	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
Chris Mason's avatar
Chris Mason committed
163
164
165
	return err;
}

Chris Mason's avatar
Chris Mason committed
166
167
168
169
/*
 * this drops all the extents in the cache that intersect the range
 * [start, end].  Existing extents are split as required.
 */
170
171
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
			    int skip_pinned)
172
173
{
	struct extent_map *em;
174
175
	struct extent_map *split = NULL;
	struct extent_map *split2 = NULL;
176
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
177
	u64 len = end - start + 1;
178
179
	int ret;
	int testend = 1;
180
	unsigned long flags;
181
	int compressed = 0;
182

183
	WARN_ON(end < start);
184
	if (end == (u64)-1) {
185
		len = (u64)-1;
186
187
		testend = 0;
	}
188
	while(1) {
189
190
191
192
193
		if (!split)
			split = alloc_extent_map(GFP_NOFS);
		if (!split2)
			split2 = alloc_extent_map(GFP_NOFS);

194
		spin_lock(&em_tree->lock);
195
		em = lookup_extent_mapping(em_tree, start, len);
196
197
		if (!em) {
			spin_unlock(&em_tree->lock);
198
			break;
199
		}
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
		flags = em->flags;
		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
			spin_unlock(&em_tree->lock);
			if (em->start <= start &&
			    (!testend || em->start + em->len >= start + len)) {
				free_extent_map(em);
				break;
			}
			if (start < em->start) {
				len = em->start - start;
			} else {
				len = start + len - (em->start + em->len);
				start = em->start + em->len;
			}
			free_extent_map(em);
			continue;
		}
217
		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
218
		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
219
		remove_extent_mapping(em_tree, em);
220
221
222
223
224

		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    em->start < start) {
			split->start = em->start;
			split->len = start - em->start;
225
			split->orig_start = em->orig_start;
226
			split->block_start = em->block_start;
227
228
229
230
231
232

			if (compressed)
				split->block_len = em->block_len;
			else
				split->block_len = split->len;

233
			split->bdev = em->bdev;
234
			split->flags = flags;
235
236
237
238
239
240
241
242
243
244
245
246
247
			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = split2;
			split2 = NULL;
		}
		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    testend && em->start + em->len > start + len) {
			u64 diff = start + len - em->start;

			split->start = start + len;
			split->len = em->start + em->len - (start + len);
			split->bdev = em->bdev;
248
			split->flags = flags;
249

250
251
252
			if (compressed) {
				split->block_len = em->block_len;
				split->block_start = em->block_start;
253
				split->orig_start = em->orig_start;
254
255
256
			} else {
				split->block_len = split->len;
				split->block_start = em->block_start + diff;
257
				split->orig_start = split->start;
258
			}
259
260
261
262
263
264

			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = NULL;
		}
265
266
		spin_unlock(&em_tree->lock);

267
268
269
270
271
		/* once for us */
		free_extent_map(em);
		/* once for the tree*/
		free_extent_map(em);
	}
272
273
274
275
	if (split)
		free_extent_map(split);
	if (split2)
		free_extent_map(split2);
276
277
278
	return 0;
}

279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
{
	return 0;
#if 0
	struct btrfs_path *path;
	struct btrfs_key found_key;
	struct extent_buffer *leaf;
	struct btrfs_file_extent_item *extent;
	u64 last_offset = 0;
	int nritems;
	int slot;
	int found_type;
	int ret;
	int err = 0;
	u64 extent_end = 0;

	path = btrfs_alloc_path();
	ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
				       last_offset, 0);
	while(1) {
		nritems = btrfs_header_nritems(path->nodes[0]);
		if (path->slots[0] >= nritems) {
			ret = btrfs_next_leaf(root, path);
			if (ret)
				goto out;
			nritems = btrfs_header_nritems(path->nodes[0]);
		}
		slot = path->slots[0];
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, slot);
		if (found_key.objectid != inode->i_ino)
			break;
		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
			goto out;

Chris Mason's avatar
Chris Mason committed
314
		if (found_key.offset < last_offset) {
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
			WARN_ON(1);
			btrfs_print_leaf(root, leaf);
			printk("inode %lu found offset %Lu expected %Lu\n",
			       inode->i_ino, found_key.offset, last_offset);
			err = 1;
			goto out;
		}
		extent = btrfs_item_ptr(leaf, slot,
					struct btrfs_file_extent_item);
		found_type = btrfs_file_extent_type(leaf, extent);
		if (found_type == BTRFS_FILE_EXTENT_REG) {
			extent_end = found_key.offset +
			     btrfs_file_extent_num_bytes(leaf, extent);
		} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
			struct btrfs_item *item;
			item = btrfs_item_nr(leaf, slot);
			extent_end = found_key.offset +
332
			     btrfs_file_extent_inline_len(leaf, extent);
333
334
335
336
337
338
			extent_end = (extent_end + root->sectorsize - 1) &
				~((u64)root->sectorsize -1 );
		}
		last_offset = extent_end;
		path->slots[0]++;
	}
Chris Mason's avatar
Chris Mason committed
339
	if (0 && last_offset < inode->i_size) {
340
341
342
343
344
345
346
347
348
349
350
351
352
		WARN_ON(1);
		btrfs_print_leaf(root, leaf);
		printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino,
		       last_offset, inode->i_size);
		err = 1;

	}
out:
	btrfs_free_path(path);
	return err;
#endif
}

Chris Mason's avatar
Chris Mason committed
353
354
355
356
357
358
359
360
/*
 * this is very complex, but the basic idea is to drop all extents
 * in the range start - end.  hint_block is filled in with a block number
 * that would be a good hint to the block allocator for this file.
 *
 * If an extent intersects the range but is not entirely inside the range
 * it is either truncated or split.  Anything entirely inside the range
 * is deleted from the tree.
Chris Mason's avatar
Chris Mason committed
361
362
363
 *
 * inline_limit is used to tell this code which offsets in the file to keep
 * if they contain inline extents.
Chris Mason's avatar
Chris Mason committed
364
 */
365
int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
Chris Mason's avatar
Chris Mason committed
366
		       struct btrfs_root *root, struct inode *inode,
367
		       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
Chris Mason's avatar
Chris Mason committed
368
{
369
	u64 extent_end = 0;
Yan Zheng's avatar
Yan Zheng committed
370
	u64 locked_end = end;
371
	u64 search_start = start;
Zheng Yan's avatar
Zheng Yan committed
372
	u64 leaf_start;
373
	u64 ram_bytes = 0;
374
375
	u64 orig_parent = 0;
	u64 disk_bytenr = 0;
Chris Mason's avatar
Chris Mason committed
376
377
	u8 compression;
	u8 encryption;
378
	u16 other_encoding = 0;
Zheng Yan's avatar
Zheng Yan committed
379
380
	u64 root_gen;
	u64 root_owner;
381
	struct extent_buffer *leaf;
Chris Mason's avatar
Chris Mason committed
382
383
	struct btrfs_file_extent_item *extent;
	struct btrfs_path *path;
384
385
386
387
	struct btrfs_key key;
	struct btrfs_file_extent_item old;
	int keep;
	int slot;
Chris Mason's avatar
Chris Mason committed
388
	int bookend;
Yan Zheng's avatar
Yan Zheng committed
389
	int found_type = 0;
Chris Mason's avatar
Chris Mason committed
390
391
	int found_extent;
	int found_inline;
Chris Mason's avatar
Chris Mason committed
392
	int recow;
393
	int ret;
Chris Mason's avatar
Chris Mason committed
394

395
	inline_limit = 0;
396
	btrfs_drop_extent_cache(inode, start, end - 1, 0);
397

Chris Mason's avatar
Chris Mason committed
398
399
400
401
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	while(1) {
Chris Mason's avatar
Chris Mason committed
402
		recow = 0;
Chris Mason's avatar
Chris Mason committed
403
404
405
406
407
408
409
410
411
412
413
414
		btrfs_release_path(root, path);
		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
					       search_start, -1);
		if (ret < 0)
			goto out;
		if (ret > 0) {
			if (path->slots[0] == 0) {
				ret = 0;
				goto out;
			}
			path->slots[0]--;
		}
415
next_slot:
Chris Mason's avatar
Chris Mason committed
416
417
418
419
		keep = 0;
		bookend = 0;
		found_extent = 0;
		found_inline = 0;
Zheng Yan's avatar
Zheng Yan committed
420
421
422
		leaf_start = 0;
		root_gen = 0;
		root_owner = 0;
Chris Mason's avatar
Chris Mason committed
423
424
		compression = 0;
		encryption = 0;
Chris Mason's avatar
Chris Mason committed
425
		extent = NULL;
426
		leaf = path->nodes[0];
Chris Mason's avatar
Chris Mason committed
427
		slot = path->slots[0];
428
		ret = 0;
429
		btrfs_item_key_to_cpu(leaf, &key, slot);
430
431
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
		    key.offset >= end) {
Chris Mason's avatar
Chris Mason committed
432
433
			goto out;
		}
434
435
		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
		    key.objectid != inode->i_ino) {
Chris Mason's avatar
Chris Mason committed
436
437
			goto out;
		}
Chris Mason's avatar
Chris Mason committed
438
		if (recow) {
439
			search_start = max(key.offset, start);
Chris Mason's avatar
Chris Mason committed
440
441
			continue;
		}
442
443
444
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
			extent = btrfs_item_ptr(leaf, slot,
						struct btrfs_file_extent_item);
445
			found_type = btrfs_file_extent_type(leaf, extent);
446
447
448
449
450
451
			compression = btrfs_file_extent_compression(leaf,
								    extent);
			encryption = btrfs_file_extent_encryption(leaf,
								  extent);
			other_encoding = btrfs_file_extent_other_encoding(leaf,
								  extent);
Yan Zheng's avatar
Yan Zheng committed
452
453
			if (found_type == BTRFS_FILE_EXTENT_REG ||
			    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
454
455
456
457
458
459
				extent_end =
				     btrfs_file_extent_disk_bytenr(leaf,
								   extent);
				if (extent_end)
					*hint_byte = extent_end;

460
				extent_end = key.offset +
461
				     btrfs_file_extent_num_bytes(leaf, extent);
462
463
				ram_bytes = btrfs_file_extent_ram_bytes(leaf,
								extent);
464
465
466
467
				found_extent = 1;
			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
				found_inline = 1;
				extent_end = key.offset +
468
				     btrfs_file_extent_inline_len(leaf, extent);
469
470
471
			}
		} else {
			extent_end = search_start;
Chris Mason's avatar
Chris Mason committed
472
473
474
		}

		/* we found nothing we can drop */
475
476
477
478
		if ((!found_extent && !found_inline) ||
		    search_start >= extent_end) {
			int nextret;
			u32 nritems;
479
			nritems = btrfs_header_nritems(leaf);
480
481
482
483
			if (slot >= nritems - 1) {
				nextret = btrfs_next_leaf(root, path);
				if (nextret)
					goto out;
Chris Mason's avatar
Chris Mason committed
484
				recow = 1;
485
486
487
488
			} else {
				path->slots[0]++;
			}
			goto next_slot;
Chris Mason's avatar
Chris Mason committed
489
490
		}

491
		if (end <= extent_end && start >= key.offset && found_inline)
492
			*hint_byte = EXTENT_MAP_INLINE;
Zheng Yan's avatar
Zheng Yan committed
493
494
495
496
497
498
499

		if (found_extent) {
			read_extent_buffer(leaf, &old, (unsigned long)extent,
					   sizeof(old));
			root_gen = btrfs_header_generation(leaf);
			root_owner = btrfs_header_owner(leaf);
			leaf_start = leaf->start;
500
		}
Zheng Yan's avatar
Zheng Yan committed
501

Chris Mason's avatar
Chris Mason committed
502
		if (end < extent_end && end >= key.offset) {
503
			bookend = 1;
504
			if (found_inline && start <= key.offset)
505
				keep = 1;
Chris Mason's avatar
Chris Mason committed
506
		}
Yan Zheng's avatar
Yan Zheng committed
507

508
509
510
511
512
513
514
515
516
517
518
519
520
		if (bookend && found_extent) {
			if (locked_end < extent_end) {
				ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
						locked_end, extent_end - 1,
						GFP_NOFS);
				if (!ret) {
					btrfs_release_path(root, path);
					lock_extent(&BTRFS_I(inode)->io_tree,
						locked_end, extent_end - 1,
						GFP_NOFS);
					locked_end = extent_end;
					continue;
				}
Yan Zheng's avatar
Yan Zheng committed
521
522
				locked_end = extent_end;
			}
523
524
525
526
527
528
529
530
531
532
			orig_parent = path->nodes[0]->start;
			disk_bytenr = le64_to_cpu(old.disk_bytenr);
			if (disk_bytenr != 0) {
				ret = btrfs_inc_extent_ref(trans, root,
					   disk_bytenr,
					   le64_to_cpu(old.disk_num_bytes),
					   orig_parent, root->root_key.objectid,
					   trans->transid, inode->i_ino);
				BUG_ON(ret);
			}
Yan Zheng's avatar
Yan Zheng committed
533
534
535
536
537
538
539
540
		}

		if (found_inline) {
			u64 mask = root->sectorsize - 1;
			search_start = (extent_end + mask) & ~mask;
		} else
			search_start = extent_end;

Chris Mason's avatar
Chris Mason committed
541
542
543
544
545
		/* truncate existing extent */
		if (start > key.offset) {
			u64 new_num;
			u64 old_num;
			keep = 1;
546
			WARN_ON(start & (root->sectorsize - 1));
Chris Mason's avatar
Chris Mason committed
547
			if (found_extent) {
548
549
550
551
552
553
554
555
				new_num = start - key.offset;
				old_num = btrfs_file_extent_num_bytes(leaf,
								      extent);
				*hint_byte =
					btrfs_file_extent_disk_bytenr(leaf,
								      extent);
				if (btrfs_file_extent_disk_bytenr(leaf,
								  extent)) {
556
557
					inode_sub_bytes(inode, old_num -
							new_num);
Chris Mason's avatar
Chris Mason committed
558
				}
559
560
561
562
563
564
				if (!compression && !encryption) {
					btrfs_set_file_extent_ram_bytes(leaf,
							extent, new_num);
				}
				btrfs_set_file_extent_num_bytes(leaf,
							extent, new_num);
565
				btrfs_mark_buffer_dirty(leaf);
566
567
568
			} else if (key.offset < inline_limit &&
				   (end > extent_end) &&
				   (inline_limit < extent_end)) {
569
570
				u32 new_size;
				new_size = btrfs_file_extent_calc_inline_size(
571
						   inline_limit - key.offset);
572
573
				inode_sub_bytes(inode, extent_end -
						inline_limit);
Chris Mason's avatar
Chris Mason committed
574
575
576
577
578
579
				btrfs_set_file_extent_ram_bytes(leaf, extent,
							new_size);
				if (!compression && !encryption) {
					btrfs_truncate_item(trans, root, path,
							    new_size, 1);
				}
Chris Mason's avatar
Chris Mason committed
580
581
582
583
			}
		}
		/* delete the entire extent */
		if (!keep) {
584
585
586
			if (found_inline)
				inode_sub_bytes(inode, extent_end -
						key.offset);
Chris Mason's avatar
Chris Mason committed
587
			ret = btrfs_del_item(trans, root, path);
588
			/* TODO update progress marker and return */
Chris Mason's avatar
Chris Mason committed
589
590
			BUG_ON(ret);
			extent = NULL;
Zheng Yan's avatar
Zheng Yan committed
591
592
			btrfs_release_path(root, path);
			/* the extent will be freed later */
Chris Mason's avatar
Chris Mason committed
593
		}
594
		if (bookend && found_inline && start <= key.offset) {
595
596
			u32 new_size;
			new_size = btrfs_file_extent_calc_inline_size(
597
						   extent_end - end);
598
			inode_sub_bytes(inode, end - key.offset);
Chris Mason's avatar
Chris Mason committed
599
600
601
602
603
			btrfs_set_file_extent_ram_bytes(leaf, extent,
							new_size);
			if (!compression && !encryption)
				ret = btrfs_truncate_item(trans, root, path,
							  new_size, 0);
Zheng Yan's avatar
Zheng Yan committed
604
			BUG_ON(ret);
605
		}
Chris Mason's avatar
Chris Mason committed
606
607
608
609
610
611
		/* create bookend, splitting the extent in two */
		if (bookend && found_extent) {
			struct btrfs_key ins;
			ins.objectid = inode->i_ino;
			ins.offset = end;
			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
612

Chris Mason's avatar
Chris Mason committed
613
614
615
			btrfs_release_path(root, path);
			ret = btrfs_insert_empty_item(trans, root, path, &ins,
						      sizeof(*extent));
Zheng Yan's avatar
Zheng Yan committed
616
			BUG_ON(ret);
617

618
619
620
621
622
623
			leaf = path->nodes[0];
			extent = btrfs_item_ptr(leaf, path->slots[0],
						struct btrfs_file_extent_item);
			write_extent_buffer(leaf, &old,
					    (unsigned long)extent, sizeof(old));

624
625
626
627
628
629
			btrfs_set_file_extent_compression(leaf, extent,
							  compression);
			btrfs_set_file_extent_encryption(leaf, extent,
							 encryption);
			btrfs_set_file_extent_other_encoding(leaf, extent,
							     other_encoding);
630
			btrfs_set_file_extent_offset(leaf, extent,
631
632
633
634
635
				    le64_to_cpu(old.offset) + end - key.offset);
			WARN_ON(le64_to_cpu(old.num_bytes) <
				(extent_end - end));
			btrfs_set_file_extent_num_bytes(leaf, extent,
							extent_end - end);
636
637
638
639
640
641
642
643
644

			/*
			 * set the ram bytes to the size of the full extent
			 * before splitting.  This is a worst case flag,
			 * but its the best we can do because we don't know
			 * how splitting affects compression
			 */
			btrfs_set_file_extent_ram_bytes(leaf, extent,
							ram_bytes);
Yan Zheng's avatar
Yan Zheng committed
645
			btrfs_set_file_extent_type(leaf, extent, found_type);
646

Chris Mason's avatar
Chris Mason committed
647
			btrfs_mark_buffer_dirty(path->nodes[0]);
Zheng Yan's avatar
Zheng Yan committed
648
649

			if (disk_bytenr != 0) {
650
651
652
				ret = btrfs_update_extent_ref(trans, root,
						disk_bytenr, orig_parent,
					        leaf->start,
Zheng Yan's avatar
Zheng Yan committed
653
						root->root_key.objectid,
654
						trans->transid, ins.objectid);
655

Zheng Yan's avatar
Zheng Yan committed
656
657
658
659
				BUG_ON(ret);
			}
			btrfs_release_path(root, path);
			if (disk_bytenr != 0) {
660
				inode_add_bytes(inode, extent_end - end);
Chris Mason's avatar
Chris Mason committed
661
			}
Zheng Yan's avatar
Zheng Yan committed
662
663
664
665
666
667
		}

		if (found_extent && !keep) {
			u64 disk_bytenr = le64_to_cpu(old.disk_bytenr);

			if (disk_bytenr != 0) {
668
669
				inode_sub_bytes(inode,
						le64_to_cpu(old.num_bytes));
Zheng Yan's avatar
Zheng Yan committed
670
671
672
673
				ret = btrfs_free_extent(trans, root,
						disk_bytenr,
						le64_to_cpu(old.disk_num_bytes),
						leaf_start, root_owner,
674
						root_gen, key.objectid, 0);
Zheng Yan's avatar
Zheng Yan committed
675
676
677
678
679
680
				BUG_ON(ret);
				*hint_byte = disk_bytenr;
			}
		}

		if (search_start >= end) {
Chris Mason's avatar
Chris Mason committed
681
682
683
684
685
686
			ret = 0;
			goto out;
		}
	}
out:
	btrfs_free_path(path);
Yan Zheng's avatar
Yan Zheng committed
687
688
689
690
	if (locked_end > end) {
		unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
			      GFP_NOFS);
	}
Chris Mason's avatar
Chris Mason committed
691
	btrfs_check_file(root, inode);
Chris Mason's avatar
Chris Mason committed
692
693
694
	return ret;
}

Yan Zheng's avatar
Yan Zheng committed
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
static int extent_mergeable(struct extent_buffer *leaf, int slot,
			    u64 objectid, u64 bytenr, u64 *start, u64 *end)
{
	struct btrfs_file_extent_item *fi;
	struct btrfs_key key;
	u64 extent_end;

	if (slot < 0 || slot >= btrfs_header_nritems(leaf))
		return 0;

	btrfs_item_key_to_cpu(leaf, &key, slot);
	if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
		return 0;

	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
	    btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
	    btrfs_file_extent_compression(leaf, fi) ||
	    btrfs_file_extent_encryption(leaf, fi) ||
	    btrfs_file_extent_other_encoding(leaf, fi))
		return 0;

	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
	if ((*start && *start != key.offset) || (*end && *end != extent_end))
		return 0;

	*start = key.offset;
	*end = extent_end;
	return 1;
}

/*
 * Mark extent in the range start - end as written.
 *
 * This changes extent type from 'pre-allocated' to 'regular'. If only
 * part of extent is marked as written, the extent will be split into
 * two or three.
 */
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct inode *inode, u64 start, u64 end)
{
	struct extent_buffer *leaf;
	struct btrfs_path *path;
	struct btrfs_file_extent_item *fi;
	struct btrfs_key key;
	u64 bytenr;
	u64 num_bytes;
	u64 extent_end;
	u64 extent_offset;
	u64 other_start;
	u64 other_end;
	u64 split = start;
	u64 locked_end = end;
	int extent_type;
	int split_end = 1;
	int ret;

	btrfs_drop_extent_cache(inode, start, end - 1, 0);

	path = btrfs_alloc_path();
	BUG_ON(!path);
again:
	key.objectid = inode->i_ino;
	key.type = BTRFS_EXTENT_DATA_KEY;
	if (split == start)
		key.offset = split;
	else
		key.offset = split - 1;

	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
	if (ret > 0 && path->slots[0] > 0)
		path->slots[0]--;

	leaf = path->nodes[0];
	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
	BUG_ON(key.objectid != inode->i_ino ||
	       key.type != BTRFS_EXTENT_DATA_KEY);
	fi = btrfs_item_ptr(leaf, path->slots[0],
			    struct btrfs_file_extent_item);
	extent_type = btrfs_file_extent_type(leaf, fi);
	BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC);
	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
	BUG_ON(key.offset > start || extent_end < end);

	bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
	num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
	extent_offset = btrfs_file_extent_offset(leaf, fi);

	if (key.offset == start)
		split = end;

	if (key.offset == start && extent_end == end) {
		int del_nr = 0;
		int del_slot = 0;
		u64 leaf_owner = btrfs_header_owner(leaf);
		u64 leaf_gen = btrfs_header_generation(leaf);
		other_start = end;
		other_end = 0;
		if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			extent_end = other_end;
			del_slot = path->slots[0] + 1;
			del_nr++;
			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
						leaf->start, leaf_owner,
						leaf_gen, inode->i_ino, 0);
			BUG_ON(ret);
		}
		other_start = 0;
		other_end = start;
		if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			key.offset = other_start;
			del_slot = path->slots[0];
			del_nr++;
			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
						leaf->start, leaf_owner,
						leaf_gen, inode->i_ino, 0);
			BUG_ON(ret);
		}
		split_end = 0;
		if (del_nr == 0) {
			btrfs_set_file_extent_type(leaf, fi,
						   BTRFS_FILE_EXTENT_REG);
			goto done;
		}

		fi = btrfs_item_ptr(leaf, del_slot - 1,
				    struct btrfs_file_extent_item);
		btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
		btrfs_set_file_extent_num_bytes(leaf, fi,
						extent_end - key.offset);
		btrfs_mark_buffer_dirty(leaf);

		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
		BUG_ON(ret);
		goto done;
	} else if (split == start) {
		if (locked_end < extent_end) {
			ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
					locked_end, extent_end - 1, GFP_NOFS);
			if (!ret) {
				btrfs_release_path(root, path);
				lock_extent(&BTRFS_I(inode)->io_tree,
					locked_end, extent_end - 1, GFP_NOFS);
				locked_end = extent_end;
				goto again;
			}
			locked_end = extent_end;
		}
		btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
		extent_offset += split - key.offset;
	} else  {
		BUG_ON(key.offset != start);
		btrfs_set_file_extent_offset(leaf, fi, extent_offset +
					     split - key.offset);
		btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
		key.offset = split;
		btrfs_set_item_key_safe(trans, root, path, &key);
		extent_end = split;
	}

	if (extent_end == end) {
		split_end = 0;
		extent_type = BTRFS_FILE_EXTENT_REG;
	}
	if (extent_end == end && split == start) {
		other_start = end;
		other_end = 0;
		if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			path->slots[0]++;
			fi = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
			key.offset = split;
			btrfs_set_item_key_safe(trans, root, path, &key);
			btrfs_set_file_extent_offset(leaf, fi, extent_offset);
			btrfs_set_file_extent_num_bytes(leaf, fi,
							other_end - split);
			goto done;
		}
	}
	if (extent_end == end && split == end) {
		other_start = 0;
		other_end = start;
		if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			path->slots[0]--;
			fi = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
			btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
							other_start);
			goto done;
		}
	}

	btrfs_mark_buffer_dirty(leaf);
	btrfs_release_path(root, path);

	key.offset = start;
	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
	BUG_ON(ret);

	leaf = path->nodes[0];
	fi = btrfs_item_ptr(leaf, path->slots[0],
			    struct btrfs_file_extent_item);
	btrfs_set_file_extent_generation(leaf, fi, trans->transid);
	btrfs_set_file_extent_type(leaf, fi, extent_type);
	btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
	btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
	btrfs_set_file_extent_offset(leaf, fi, extent_offset);
	btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
	btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
	btrfs_set_file_extent_compression(leaf, fi, 0);
	btrfs_set_file_extent_encryption(leaf, fi, 0);
	btrfs_set_file_extent_other_encoding(leaf, fi, 0);

	ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
				   leaf->start, root->root_key.objectid,
				   trans->transid, inode->i_ino);
	BUG_ON(ret);
done:
	btrfs_mark_buffer_dirty(leaf);
	btrfs_release_path(root, path);
	if (split_end && split == start) {
		split = end;
		goto again;
	}
	if (locked_end > end) {
		unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
			      GFP_NOFS);
	}
	btrfs_free_path(path);
	return 0;
}

Chris Mason's avatar
Chris Mason committed
932
/*
Chris Mason's avatar
Chris Mason committed
933
934
935
 * this gets pages into the page cache and locks them down, it also properly
 * waits for data=ordered extents to finish before allowing the pages to be
 * modified.
Chris Mason's avatar
Chris Mason committed
936
 */
937
static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
938
939
940
			 struct page **pages, size_t num_pages,
			 loff_t pos, unsigned long first_index,
			 unsigned long last_index, size_t write_bytes)
Chris Mason's avatar
Chris Mason committed
941
942
943
{
	int i;
	unsigned long index = pos >> PAGE_CACHE_SHIFT;
944
	struct inode *inode = fdentry(file)->d_inode;
Chris Mason's avatar
Chris Mason committed
945
	int err = 0;
946
	u64 start_pos;
947
	u64 last_pos;
948

949
	start_pos = pos & ~((u64)root->sectorsize - 1);
950
	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
Chris Mason's avatar
Chris Mason committed
951

Yan Zheng's avatar
Yan Zheng committed
952
953
954
955
956
957
	if (start_pos > inode->i_size) {
		err = btrfs_cont_expand(inode, start_pos);
		if (err)
			return err;
	}

Chris Mason's avatar
Chris Mason committed
958
	memset(pages, 0, num_pages * sizeof(struct page *));
959
again:
Chris Mason's avatar
Chris Mason committed
960
961
962
963
	for (i = 0; i < num_pages; i++) {
		pages[i] = grab_cache_page(inode->i_mapping, index + i);
		if (!pages[i]) {
			err = -ENOMEM;
964
			BUG_ON(1);
Chris Mason's avatar
Chris Mason committed
965
		}
Chris Mason's avatar
Chris Mason committed
966
		wait_on_page_writeback(pages[i]);
Chris Mason's avatar
Chris Mason committed
967
	}
968
	if (start_pos < inode->i_size) {
969
		struct btrfs_ordered_extent *ordered;
970
971
		lock_extent(&BTRFS_I(inode)->io_tree,
			    start_pos, last_pos - 1, GFP_NOFS);
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
		ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
		if (ordered &&
		    ordered->file_offset + ordered->len > start_pos &&
		    ordered->file_offset < last_pos) {
			btrfs_put_ordered_extent(ordered);
			unlock_extent(&BTRFS_I(inode)->io_tree,
				      start_pos, last_pos - 1, GFP_NOFS);
			for (i = 0; i < num_pages; i++) {
				unlock_page(pages[i]);
				page_cache_release(pages[i]);
			}
			btrfs_wait_ordered_range(inode, start_pos,
						 last_pos - start_pos);
			goto again;
		}
		if (ordered)
			btrfs_put_ordered_extent(ordered);

990
991
992
		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
				  GFP_NOFS);
993
994
		unlock_extent(&BTRFS_I(inode)->io_tree,
			      start_pos, last_pos - 1, GFP_NOFS);
995
	}
996
	for (i = 0; i < num_pages; i++) {
997
		clear_page_dirty_for_io(pages[i]);
998
999
1000
		set_page_extent_mapped(pages[i]);
		WARN_ON(!PageLocked(pages[i]));
	}
For faster browsing, not all history is shown. View entire blame