extent-tree.c 195 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */
Zach Brown's avatar
Zach Brown committed
18
#include <linux/sched.h>
19
#include <linux/pagemap.h>
20
#include <linux/writeback.h>
21
#include <linux/blkdev.h>
22
#include <linux/sort.h>
23
#include <linux/rcupdate.h>
Josef Bacik's avatar
Josef Bacik committed
24
#include <linux/kthread.h>
Chris Mason's avatar
Chris Mason committed
25
#include "compat.h"
26
#include "hash.h"
27
28
29
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
30
#include "transaction.h"
31
#include "volumes.h"
32
#include "locking.h"
33
#include "free-space-cache.h"
34

35
36
static int update_reserved_extents(struct btrfs_root *root,
				   u64 bytenr, u64 num, int reserve);
37
38
39
40
static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      u64 bytenr, u64 num_bytes, int alloc,
			      int mark_free);
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
				u64 bytenr, u64 num_bytes, u64 parent,
				u64 root_objectid, u64 owner_objectid,
				u64 owner_offset, int refs_to_drop,
				struct btrfs_delayed_extent_op *extra_op);
static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
				    struct extent_buffer *leaf,
				    struct btrfs_extent_item *ei);
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
				      struct btrfs_root *root,
				      u64 parent, u64 root_objectid,
				      u64 flags, u64 owner, u64 offset,
				      struct btrfs_key *ins, int ref_mod);
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
				     struct btrfs_root *root,
				     u64 parent, u64 root_objectid,
				     u64 flags, struct btrfs_disk_key *key,
				     int level, struct btrfs_key *ins);
60

61
62
63
64
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
			  struct btrfs_root *extent_root, u64 alloc_bytes,
			  u64 flags, int force);

Josef Bacik's avatar
Josef Bacik committed
65
66
67
68
69
70
71
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
{
	smp_mb();
	return cache->cached == BTRFS_CACHE_FINISHED;
}

72
73
74
75
76
77
78
79
80
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
	return (cache->flags & bits) == bits;
}

/*
 * this adds the block group to the fs_info rb tree for the block group
 * cache
 */
81
static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
				struct btrfs_block_group_cache *block_group)
{
	struct rb_node **p;
	struct rb_node *parent = NULL;
	struct btrfs_block_group_cache *cache;

	spin_lock(&info->block_group_cache_lock);
	p = &info->block_group_cache_tree.rb_node;

	while (*p) {
		parent = *p;
		cache = rb_entry(parent, struct btrfs_block_group_cache,
				 cache_node);
		if (block_group->key.objectid < cache->key.objectid) {
			p = &(*p)->rb_left;
		} else if (block_group->key.objectid > cache->key.objectid) {
			p = &(*p)->rb_right;
		} else {
			spin_unlock(&info->block_group_cache_lock);
			return -EEXIST;
		}
	}

	rb_link_node(&block_group->cache_node, parent, p);
	rb_insert_color(&block_group->cache_node,
			&info->block_group_cache_tree);
	spin_unlock(&info->block_group_cache_lock);

	return 0;
}

/*
 * This will return the block group at or after bytenr if contains is 0, else
 * it will return the block group that contains the bytenr
 */
static struct btrfs_block_group_cache *
block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
			      int contains)
{
	struct btrfs_block_group_cache *cache, *ret = NULL;
	struct rb_node *n;
	u64 end, start;

	spin_lock(&info->block_group_cache_lock);
	n = info->block_group_cache_tree.rb_node;

	while (n) {
		cache = rb_entry(n, struct btrfs_block_group_cache,
				 cache_node);
		end = cache->key.objectid + cache->key.offset - 1;
		start = cache->key.objectid;

		if (bytenr < start) {
			if (!contains && (!ret || start < ret->key.objectid))
				ret = cache;
			n = n->rb_left;
		} else if (bytenr > start) {
			if (contains && bytenr <= end) {
				ret = cache;
				break;
			}
			n = n->rb_right;
		} else {
			ret = cache;
			break;
		}
	}
149
150
	if (ret)
		atomic_inc(&ret->count);
151
152
153
154
155
	spin_unlock(&info->block_group_cache_lock);

	return ret;
}

Josef Bacik's avatar
Josef Bacik committed
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
void btrfs_free_super_mirror_extents(struct btrfs_fs_info *info)
{
	u64 start, end, last = 0;
	int ret;

	while (1) {
		ret = find_first_extent_bit(&info->pinned_extents, last,
					    &start, &end, EXTENT_LOCKED);
		if (ret)
			break;

		unlock_extent(&info->pinned_extents, start, end, GFP_NOFS);
		last = end+1;
	}
}

static int remove_sb_from_cache(struct btrfs_root *root,
				struct btrfs_block_group_cache *cache)
{
	struct btrfs_fs_info *fs_info = root->fs_info;
	u64 bytenr;
	u64 *logical;
	int stripe_len;
	int i, nr, ret;

	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
		bytenr = btrfs_sb_offset(i);
		ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
				       cache->key.objectid, bytenr,
				       0, &logical, &nr, &stripe_len);
		BUG_ON(ret);
		while (nr--) {
			try_lock_extent(&fs_info->pinned_extents,
					logical[nr],
					logical[nr] + stripe_len - 1, GFP_NOFS);
		}
		kfree(logical);
	}

	return 0;
}

198
199
200
201
202
/*
 * this is only called by cache_block_group, since we could have freed extents
 * we need to check the pinned_extents for any extents that can't be used yet
 * since their free space will be released as soon as the transaction commits.
 */
Josef Bacik's avatar
Josef Bacik committed
203
static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
204
205
			      struct btrfs_fs_info *info, u64 start, u64 end)
{
Josef Bacik's avatar
Josef Bacik committed
206
	u64 extent_start, extent_end, size, total_added = 0;
207
208
209
210
211
	int ret;

	while (start < end) {
		ret = find_first_extent_bit(&info->pinned_extents, start,
					    &extent_start, &extent_end,
Josef Bacik's avatar
Josef Bacik committed
212
213
					    EXTENT_DIRTY|EXTENT_LOCKED|
					    EXTENT_DELALLOC);
214
215
216
217
218
219
220
		if (ret)
			break;

		if (extent_start == start) {
			start = extent_end + 1;
		} else if (extent_start > start && extent_start < end) {
			size = extent_start - start;
Josef Bacik's avatar
Josef Bacik committed
221
			total_added += size;
222
223
			ret = btrfs_add_free_space(block_group, start,
						   size);
224
225
226
227
228
229
230
231
232
			BUG_ON(ret);
			start = extent_end + 1;
		} else {
			break;
		}
	}

	if (start < end) {
		size = end - start;
Josef Bacik's avatar
Josef Bacik committed
233
		total_added += size;
234
		ret = btrfs_add_free_space(block_group, start, size);
235
236
237
		BUG_ON(ret);
	}

Josef Bacik's avatar
Josef Bacik committed
238
	return total_added;
239
240
}

Josef Bacik's avatar
Josef Bacik committed
241
242
243
244
245
246
247
248
249
250
251
DEFINE_MUTEX(discard_mutex);

/*
 * if async kthreads are running when we cross transactions, we mark any pinned
 * extents with EXTENT_DELALLOC and then let the caching kthreads clean up those
 * extents when they are done.  Also we run this from btrfs_finish_extent_commit
 * in case there were some pinned extents that were missed because we had
 * already cached that block group.
 */
static void btrfs_discard_pinned_extents(struct btrfs_fs_info *fs_info,
					 struct btrfs_block_group_cache *cache)
Yan Zheng's avatar
Yan Zheng committed
252
{
Josef Bacik's avatar
Josef Bacik committed
253
254
	u64 start, end, last;
	int ret;
Yan Zheng's avatar
Yan Zheng committed
255

Josef Bacik's avatar
Josef Bacik committed
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
	if (!cache)
		last = 0;
	else
		last = cache->key.objectid;

	mutex_lock(&discard_mutex);
	while (1) {
		ret = find_first_extent_bit(&fs_info->pinned_extents, last,
					    &start, &end, EXTENT_DELALLOC);
		if (ret)
			break;

		if (cache && start >= cache->key.objectid + cache->key.offset)
			break;


		if (!cache) {
			cache = btrfs_lookup_block_group(fs_info, start);
			BUG_ON(!cache);

			start = max(start, cache->key.objectid);
			end = min(end, cache->key.objectid + cache->key.offset - 1);

			if (block_group_cache_done(cache))
				btrfs_add_free_space(cache, start,
						     end - start + 1);
			cache = NULL;
		} else {
			start = max(start, cache->key.objectid);
			end = min(end, cache->key.objectid + cache->key.offset - 1);
			btrfs_add_free_space(cache, start, end - start + 1);
		}

		clear_extent_bits(&fs_info->pinned_extents, start, end,
				  EXTENT_DELALLOC, GFP_NOFS);
		last = end + 1;

		if (need_resched()) {
			mutex_unlock(&discard_mutex);
			cond_resched();
			mutex_lock(&discard_mutex);
Yan Zheng's avatar
Yan Zheng committed
297
298
		}
	}
Josef Bacik's avatar
Josef Bacik committed
299
	mutex_unlock(&discard_mutex);
Yan Zheng's avatar
Yan Zheng committed
300
301
}

Josef Bacik's avatar
Josef Bacik committed
302
static int caching_kthread(void *data)
303
{
Josef Bacik's avatar
Josef Bacik committed
304
305
306
	struct btrfs_block_group_cache *block_group = data;
	struct btrfs_fs_info *fs_info = block_group->fs_info;
	u64 last = 0;
307
	struct btrfs_path *path;
308
	int ret = 0;
309
	struct btrfs_key key;
310
	struct extent_buffer *leaf;
311
	int slot;
Josef Bacik's avatar
Josef Bacik committed
312
	u64 total_found = 0;
313

Josef Bacik's avatar
Josef Bacik committed
314
	BUG_ON(!fs_info);
315

316
317
318
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
319

Josef Bacik's avatar
Josef Bacik committed
320
321
322
323
324
325
326
	atomic_inc(&fs_info->async_caching_threads);
	atomic_inc(&block_group->space_info->caching_threads);
	last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
again:
	/* need to make sure the commit_root doesn't disappear */
	down_read(&fs_info->extent_root->commit_root_sem);

327
	/*
Josef Bacik's avatar
Josef Bacik committed
328
329
330
331
	 * We don't want to deadlock with somebody trying to allocate a new
	 * extent for the extent root while also trying to search the extent
	 * root to add free space.  So we skip locking and search the commit
	 * root, since its read-only
332
333
	 */
	path->skip_locking = 1;
Josef Bacik's avatar
Josef Bacik committed
334
335
336
	path->search_commit_root = 1;
	path->reada = 2;

Yan Zheng's avatar
Yan Zheng committed
337
	key.objectid = last;
338
339
	key.offset = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
Josef Bacik's avatar
Josef Bacik committed
340
	ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
341
	if (ret < 0)
342
		goto err;
Yan Zheng's avatar
Yan Zheng committed
343

344
	while (1) {
Josef Bacik's avatar
Josef Bacik committed
345
346
347
348
		smp_mb();
		if (block_group->fs_info->closing)
			break;

349
		leaf = path->nodes[0];
350
		slot = path->slots[0];
351
		if (slot >= btrfs_header_nritems(leaf)) {
Josef Bacik's avatar
Josef Bacik committed
352
			ret = btrfs_next_leaf(fs_info->extent_root, path);
353
354
			if (ret < 0)
				goto err;
Josef Bacik's avatar
Josef Bacik committed
355
			else if (ret)
356
				break;
Josef Bacik's avatar
Josef Bacik committed
357
358
359
360
361
362
363
364
365

			if (need_resched()) {
				btrfs_release_path(fs_info->extent_root, path);
				up_read(&fs_info->extent_root->commit_root_sem);
				cond_resched();
				goto again;
			}

			continue;
366
		}
367
		btrfs_item_key_to_cpu(leaf, &key, slot);
368
		if (key.objectid < block_group->key.objectid)
369
			goto next;
370

371
		if (key.objectid >= block_group->key.objectid +
372
		    block_group->key.offset)
373
			break;
374

375
		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
Josef Bacik's avatar
Josef Bacik committed
376
377
378
			total_found += add_new_free_space(block_group,
							  fs_info, last,
							  key.objectid);
379
			last = key.objectid + key.offset;
380
		}
Josef Bacik's avatar
Josef Bacik committed
381
382
383
384
385

		if (total_found > (1024 * 1024 * 2)) {
			total_found = 0;
			wake_up(&block_group->caching_q);
		}
386
next:
387
388
		path->slots[0]++;
	}
Josef Bacik's avatar
Josef Bacik committed
389
	ret = 0;
390

Josef Bacik's avatar
Josef Bacik committed
391
392
393
394
395
396
397
	total_found += add_new_free_space(block_group, fs_info, last,
					  block_group->key.objectid +
					  block_group->key.offset);

	spin_lock(&block_group->lock);
	block_group->cached = BTRFS_CACHE_FINISHED;
	spin_unlock(&block_group->lock);
398

399
err:
400
	btrfs_free_path(path);
Josef Bacik's avatar
Josef Bacik committed
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
	up_read(&fs_info->extent_root->commit_root_sem);
	atomic_dec(&fs_info->async_caching_threads);
	atomic_dec(&block_group->space_info->caching_threads);
	wake_up(&block_group->caching_q);

	if (!ret)
		btrfs_discard_pinned_extents(fs_info, block_group);

	return 0;
}

static int cache_block_group(struct btrfs_block_group_cache *cache)
{
	struct task_struct *tsk;
	int ret = 0;

	spin_lock(&cache->lock);
	if (cache->cached != BTRFS_CACHE_NO) {
		spin_unlock(&cache->lock);
		return ret;
	}
	cache->cached = BTRFS_CACHE_STARTED;
	spin_unlock(&cache->lock);

	tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
			  cache->key.objectid);
	if (IS_ERR(tsk)) {
		ret = PTR_ERR(tsk);
		printk(KERN_ERR "error running thread %d\n", ret);
		BUG();
	}

433
	return ret;
434
435
}

436
437
438
/*
 * return the block group that starts at or after bytenr
 */
439
440
static struct btrfs_block_group_cache *
btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
441
{
442
	struct btrfs_block_group_cache *cache;
443

444
	cache = block_group_cache_tree_search(info, bytenr, 0);
445

446
	return cache;
447
448
}

449
/*
450
 * return the block group that contains the given bytenr
451
 */
452
453
454
struct btrfs_block_group_cache *btrfs_lookup_block_group(
						 struct btrfs_fs_info *info,
						 u64 bytenr)
455
{
456
	struct btrfs_block_group_cache *cache;
457

458
	cache = block_group_cache_tree_search(info, bytenr, 1);
459

460
	return cache;
461
}
462

463
void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
464
465
466
467
468
{
	if (atomic_dec_and_test(&cache->count))
		kfree(cache);
}

469
470
static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
						  u64 flags)
471
{
472
473
	struct list_head *head = &info->space_info;
	struct btrfs_space_info *found;
474
475
476
477
478

	rcu_read_lock();
	list_for_each_entry_rcu(found, head, list) {
		if (found->flags == flags) {
			rcu_read_unlock();
479
			return found;
480
		}
481
	}
482
	rcu_read_unlock();
483
	return NULL;
484
485
}

486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
/*
 * after adding space to the filesystem, we need to clear the full flags
 * on all the space infos.
 */
void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
{
	struct list_head *head = &info->space_info;
	struct btrfs_space_info *found;

	rcu_read_lock();
	list_for_each_entry_rcu(found, head, list)
		found->full = 0;
	rcu_read_unlock();
}

501
502
503
504
505
506
507
508
509
static u64 div_factor(u64 num, int factor)
{
	if (factor == 10)
		return num;
	num *= factor;
	do_div(num, 10);
	return num;
}

510
511
u64 btrfs_find_block_group(struct btrfs_root *root,
			   u64 search_start, u64 search_hint, int owner)
Chris Mason's avatar
Chris Mason committed
512
{
513
	struct btrfs_block_group_cache *cache;
Chris Mason's avatar
Chris Mason committed
514
	u64 used;
515
516
	u64 last = max(search_hint, search_start);
	u64 group_start = 0;
517
	int full_search = 0;
518
	int factor = 9;
519
	int wrapped = 0;
520
again:
521
522
	while (1) {
		cache = btrfs_lookup_first_block_group(root->fs_info, last);
523
524
		if (!cache)
			break;
525

526
		spin_lock(&cache->lock);
527
528
529
		last = cache->key.objectid + cache->key.offset;
		used = btrfs_block_group_used(&cache->item);

530
531
		if ((full_search || !cache->ro) &&
		    block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
532
			if (used + cache->pinned + cache->reserved <
533
534
			    div_factor(cache->key.offset, factor)) {
				group_start = cache->key.objectid;
535
				spin_unlock(&cache->lock);
536
				btrfs_put_block_group(cache);
537
538
				goto found;
			}
539
		}
540
		spin_unlock(&cache->lock);
541
		btrfs_put_block_group(cache);
542
		cond_resched();
Chris Mason's avatar
Chris Mason committed
543
	}
544
545
546
547
548
549
	if (!wrapped) {
		last = search_start;
		wrapped = 1;
		goto again;
	}
	if (!full_search && factor < 10) {
550
		last = search_start;
551
		full_search = 1;
552
		factor = 10;
553
554
		goto again;
	}
555
found:
556
	return group_start;
557
}
558

559
/* simple helper to search for an existing extent at a given offset */
Zheng Yan's avatar
Zheng Yan committed
560
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
561
562
563
{
	int ret;
	struct btrfs_key key;
Zheng Yan's avatar
Zheng Yan committed
564
	struct btrfs_path *path;
565

Zheng Yan's avatar
Zheng Yan committed
566
567
	path = btrfs_alloc_path();
	BUG_ON(!path);
568
569
570
571
572
	key.objectid = start;
	key.offset = len;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
				0, 0);
Zheng Yan's avatar
Zheng Yan committed
573
	btrfs_free_path(path);
574
575
576
	return ret;
}

577
578
579
580
581
582
583
584
585
586
587
588
589
590
/*
 * Back reference rules.  Back refs have three main goals:
 *
 * 1) differentiate between all holders of references to an extent so that
 *    when a reference is dropped we can make sure it was a valid reference
 *    before freeing the extent.
 *
 * 2) Provide enough information to quickly find the holders of an extent
 *    if we notice a given block is corrupted or bad.
 *
 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
 *    maintenance.  This is actually the same as #2, but with a slightly
 *    different use case.
 *
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
 * There are two kinds of back refs. The implicit back refs is optimized
 * for pointers in non-shared tree blocks. For a given pointer in a block,
 * back refs of this kind provide information about the block's owner tree
 * and the pointer's key. These information allow us to find the block by
 * b-tree searching. The full back refs is for pointers in tree blocks not
 * referenced by their owner trees. The location of tree block is recorded
 * in the back refs. Actually the full back refs is generic, and can be
 * used in all cases the implicit back refs is used. The major shortcoming
 * of the full back refs is its overhead. Every time a tree block gets
 * COWed, we have to update back refs entry for all pointers in it.
 *
 * For a newly allocated tree block, we use implicit back refs for
 * pointers in it. This means most tree related operations only involve
 * implicit back refs. For a tree block created in old transaction, the
 * only way to drop a reference to it is COW it. So we can detect the
 * event that tree block loses its owner tree's reference and do the
 * back refs conversion.
 *
 * When a tree block is COW'd through a tree, there are four cases:
 *
 * The reference count of the block is one and the tree is the block's
 * owner tree. Nothing to do in this case.
 *
 * The reference count of the block is one and the tree is not the
 * block's owner tree. In this case, full back refs is used for pointers
 * in the block. Remove these full back refs, add implicit back refs for
 * every pointers in the new block.
 *
 * The reference count of the block is greater than one and the tree is
 * the block's owner tree. In this case, implicit back refs is used for
 * pointers in the block. Add full back refs for every pointers in the
 * block, increase lower level extents' reference counts. The original
 * implicit back refs are entailed to the new block.
 *
 * The reference count of the block is greater than one and the tree is
 * not the block's owner tree. Add implicit back refs for every pointer in
 * the new block, increase lower level extents' reference count.
 *
 * Back Reference Key composing:
 *
 * The key objectid corresponds to the first byte in the extent,
 * The key type is used to differentiate between types of back refs.
 * There are different meanings of the key offset for different types
 * of back refs.
 *
636
637
638
 * File extents can be referenced by:
 *
 * - multiple snapshots, subvolumes, or different generations in one subvol
Zheng Yan's avatar
Zheng Yan committed
639
 * - different files inside a single subvolume
640
641
 * - different offsets inside a file (bookend extents in file.c)
 *
642
 * The extent ref structure for the implicit back refs has fields for:
643
644
645
 *
 * - Objectid of the subvolume root
 * - objectid of the file holding the reference
646
647
 * - original offset in the file
 * - how many bookend extents
648
 *
649
650
 * The key offset for the implicit back refs is hash of the first
 * three fields.
651
 *
652
 * The extent ref structure for the full back refs has field for:
653
 *
654
 * - number of pointers in the tree leaf
655
 *
656
657
 * The key offset for the implicit back refs is the first byte of
 * the tree leaf
658
 *
659
660
 * When a file extent is allocated, The implicit back refs is used.
 * the fields are filled in:
661
 *
662
 *     (root_key.objectid, inode objectid, offset in file, 1)
663
 *
664
665
 * When a file extent is removed file truncation, we find the
 * corresponding implicit back refs and check the following fields:
666
 *
667
 *     (btrfs_header_owner(leaf), inode objectid, offset in file)
668
 *
669
 * Btree extents can be referenced by:
670
 *
671
 * - Different subvolumes
672
 *
673
674
675
676
 * Both the implicit back refs and the full back refs for tree blocks
 * only consist of key. The key offset for the implicit back refs is
 * objectid of block's owner tree. The key offset for the full back refs
 * is the first byte of parent block.
677
 *
678
679
680
 * When implicit back refs is used, information about the lowest key and
 * level of the tree block are required. These information are stored in
 * tree block info structure.
681
 */
Zheng Yan's avatar
Zheng Yan committed
682

683
684
685
686
687
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
				  struct btrfs_root *root,
				  struct btrfs_path *path,
				  u64 owner, u32 extra_size)
688
{
689
690
691
692
693
	struct btrfs_extent_item *item;
	struct btrfs_extent_item_v0 *ei0;
	struct btrfs_extent_ref_v0 *ref0;
	struct btrfs_tree_block_info *bi;
	struct extent_buffer *leaf;
694
	struct btrfs_key key;
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
	struct btrfs_key found_key;
	u32 new_size = sizeof(*item);
	u64 refs;
	int ret;

	leaf = path->nodes[0];
	BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));

	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
	ei0 = btrfs_item_ptr(leaf, path->slots[0],
			     struct btrfs_extent_item_v0);
	refs = btrfs_extent_refs_v0(leaf, ei0);

	if (owner == (u64)-1) {
		while (1) {
			if (path->slots[0] >= btrfs_header_nritems(leaf)) {
				ret = btrfs_next_leaf(root, path);
				if (ret < 0)
					return ret;
				BUG_ON(ret > 0);
				leaf = path->nodes[0];
			}
			btrfs_item_key_to_cpu(leaf, &found_key,
					      path->slots[0]);
			BUG_ON(key.objectid != found_key.objectid);
			if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
				path->slots[0]++;
				continue;
			}
			ref0 = btrfs_item_ptr(leaf, path->slots[0],
					      struct btrfs_extent_ref_v0);
			owner = btrfs_ref_objectid_v0(leaf, ref0);
			break;
		}
	}
	btrfs_release_path(root, path);

	if (owner < BTRFS_FIRST_FREE_OBJECTID)
		new_size += sizeof(*bi);

	new_size -= sizeof(*ei0);
	ret = btrfs_search_slot(trans, root, &key, path,
				new_size + extra_size, 1);
	if (ret < 0)
		return ret;
	BUG_ON(ret);

	ret = btrfs_extend_item(trans, root, path, new_size);
	BUG_ON(ret);

	leaf = path->nodes[0];
	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
	btrfs_set_extent_refs(leaf, item, refs);
	/* FIXME: get real generation */
	btrfs_set_extent_generation(leaf, item, 0);
	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
		btrfs_set_extent_flags(leaf, item,
				       BTRFS_EXTENT_FLAG_TREE_BLOCK |
				       BTRFS_BLOCK_FLAG_FULL_BACKREF);
		bi = (struct btrfs_tree_block_info *)(item + 1);
		/* FIXME: get first key of the block */
		memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
		btrfs_set_tree_block_level(leaf, bi, (int)owner);
	} else {
		btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
	}
	btrfs_mark_buffer_dirty(leaf);
	return 0;
}
#endif

static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
{
	u32 high_crc = ~(u32)0;
	u32 low_crc = ~(u32)0;
	__le64 lenum;

	lenum = cpu_to_le64(root_objectid);
773
	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
774
	lenum = cpu_to_le64(owner);
775
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
776
	lenum = cpu_to_le64(offset);
777
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809

	return ((u64)high_crc << 31) ^ (u64)low_crc;
}

static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
				     struct btrfs_extent_data_ref *ref)
{
	return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
				    btrfs_extent_data_ref_objectid(leaf, ref),
				    btrfs_extent_data_ref_offset(leaf, ref));
}

static int match_extent_data_ref(struct extent_buffer *leaf,
				 struct btrfs_extent_data_ref *ref,
				 u64 root_objectid, u64 owner, u64 offset)
{
	if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
	    btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
		return 0;
	return 1;
}

static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
					   struct btrfs_root *root,
					   struct btrfs_path *path,
					   u64 bytenr, u64 parent,
					   u64 root_objectid,
					   u64 owner, u64 offset)
{
	struct btrfs_key key;
	struct btrfs_extent_data_ref *ref;
Zheng Yan's avatar
Zheng Yan committed
810
	struct extent_buffer *leaf;
811
	u32 nritems;
812
	int ret;
813
814
	int recow;
	int err = -ENOENT;
815

Zheng Yan's avatar
Zheng Yan committed
816
	key.objectid = bytenr;
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
	if (parent) {
		key.type = BTRFS_SHARED_DATA_REF_KEY;
		key.offset = parent;
	} else {
		key.type = BTRFS_EXTENT_DATA_REF_KEY;
		key.offset = hash_extent_data_ref(root_objectid,
						  owner, offset);
	}
again:
	recow = 0;
	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
	if (ret < 0) {
		err = ret;
		goto fail;
	}
Zheng Yan's avatar
Zheng Yan committed
832

833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
	if (parent) {
		if (!ret)
			return 0;
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
		key.type = BTRFS_EXTENT_REF_V0_KEY;
		btrfs_release_path(root, path);
		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
		if (ret < 0) {
			err = ret;
			goto fail;
		}
		if (!ret)
			return 0;
#endif
		goto fail;
Zheng Yan's avatar
Zheng Yan committed
848
849
850
	}

	leaf = path->nodes[0];
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
	nritems = btrfs_header_nritems(leaf);
	while (1) {
		if (path->slots[0] >= nritems) {
			ret = btrfs_next_leaf(root, path);
			if (ret < 0)
				err = ret;
			if (ret)
				goto fail;

			leaf = path->nodes[0];
			nritems = btrfs_header_nritems(leaf);
			recow = 1;
		}

		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		if (key.objectid != bytenr ||
		    key.type != BTRFS_EXTENT_DATA_REF_KEY)
			goto fail;

		ref = btrfs_item_ptr(leaf, path->slots[0],
				     struct btrfs_extent_data_ref);

		if (match_extent_data_ref(leaf, ref, root_objectid,
					  owner, offset)) {
			if (recow) {
				btrfs_release_path(root, path);
				goto again;
			}
			err = 0;
			break;
		}
		path->slots[0]++;
Zheng Yan's avatar
Zheng Yan committed
883
	}
884
885
fail:
	return err;
Zheng Yan's avatar
Zheng Yan committed
886
887
}

888
889
890
891
892
893
static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
					   struct btrfs_root *root,
					   struct btrfs_path *path,
					   u64 bytenr, u64 parent,
					   u64 root_objectid, u64 owner,
					   u64 offset, int refs_to_add)
Zheng Yan's avatar
Zheng Yan committed
894
895
896
{
	struct btrfs_key key;
	struct extent_buffer *leaf;
897
	u32 size;
Zheng Yan's avatar
Zheng Yan committed
898
899
	u32 num_refs;
	int ret;
900
901

	key.objectid = bytenr;
902
903
904
905
906
907
908
909
910
911
	if (parent) {
		key.type = BTRFS_SHARED_DATA_REF_KEY;
		key.offset = parent;
		size = sizeof(struct btrfs_shared_data_ref);
	} else {
		key.type = BTRFS_EXTENT_DATA_REF_KEY;
		key.offset = hash_extent_data_ref(root_objectid,
						  owner, offset);
		size = sizeof(struct btrfs_extent_data_ref);
	}
912

913
914
915
916
917
918
919
	ret = btrfs_insert_empty_item(trans, root, path, &key, size);
	if (ret && ret != -EEXIST)
		goto fail;

	leaf = path->nodes[0];
	if (parent) {
		struct btrfs_shared_data_ref *ref;
Zheng Yan's avatar
Zheng Yan committed
920
		ref = btrfs_item_ptr(leaf, path->slots[0],
921
922
923
924
925
926
927
				     struct btrfs_shared_data_ref);
		if (ret == 0) {
			btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
		} else {
			num_refs = btrfs_shared_data_ref_count(leaf, ref);
			num_refs += refs_to_add;
			btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
Zheng Yan's avatar
Zheng Yan committed
928
		}
929
930
931
932
933
934
935
936
937
938
939
940
941
942
	} else {
		struct btrfs_extent_data_ref *ref;
		while (ret == -EEXIST) {
			ref = btrfs_item_ptr(leaf, path->slots[0],
					     struct btrfs_extent_data_ref);
			if (match_extent_data_ref(leaf, ref, root_objectid,
						  owner, offset))
				break;
			btrfs_release_path(root, path);
			key.offset++;
			ret = btrfs_insert_empty_item(trans, root, path, &key,
						      size);
			if (ret && ret != -EEXIST)
				goto fail;
Zheng Yan's avatar
Zheng Yan committed
943

944
945
946
947
948
949
950
951
952
953
954
955
956
957
			leaf = path->nodes[0];
		}
		ref = btrfs_item_ptr(leaf, path->slots[0],
				     struct btrfs_extent_data_ref);
		if (ret == 0) {
			btrfs_set_extent_data_ref_root(leaf, ref,
						       root_objectid);
			btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
			btrfs_set_extent_data_ref_offset(leaf, ref, offset);
			btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
		} else {
			num_refs = btrfs_extent_data_ref_count(leaf, ref);
			num_refs += refs_to_add;
			btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
Zheng Yan's avatar
Zheng Yan committed
958
959
		}
	}
960
961
962
	btrfs_mark_buffer_dirty(leaf);
	ret = 0;
fail:
963
964
	btrfs_release_path(root, path);
	return ret;
965
966
}

967
968
969
970
static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
					   struct btrfs_root *root,
					   struct btrfs_path *path,
					   int refs_to_drop)
Zheng Yan's avatar
Zheng Yan committed
971
{
972
973
974
	struct btrfs_key key;
	struct btrfs_extent_data_ref *ref1 = NULL;
	struct btrfs_shared_data_ref *ref2 = NULL;
Zheng Yan's avatar
Zheng Yan committed
975
	struct extent_buffer *leaf;
976
	u32 num_refs = 0;
Zheng Yan's avatar
Zheng Yan committed
977
978
979
	int ret = 0;

	leaf = path->nodes[0];
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);

	if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
		ref1 = btrfs_item_ptr(leaf, path->slots[0],
				      struct btrfs_extent_data_ref);
		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
		ref2 = btrfs_item_ptr(leaf, path->slots[0],
				      struct btrfs_shared_data_ref);
		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
		struct btrfs_extent_ref_v0 *ref0;
		ref0 = btrfs_item_ptr(leaf, path->slots[0],
				      struct btrfs_extent_ref_v0);
		num_refs = btrfs_ref_count_v0(leaf, ref0);
#endif
	} else {
		BUG();
	}

1001
1002
	BUG_ON(num_refs < refs_to_drop);
	num_refs -= refs_to_drop;
1003

Zheng Yan's avatar
Zheng Yan committed
1004
1005
1006
	if (num_refs == 0) {
		ret = btrfs_del_item(trans, root, path);
	} else {
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
		if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
			btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
		else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
			btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
		else {
			struct btrfs_extent_ref_v0 *ref0;
			ref0 = btrfs_item_ptr(leaf, path->slots[0],
					struct btrfs_extent_ref_v0);
			btrfs_set_ref_count_v0(leaf, ref0, num_refs);
		}
#endif
Zheng Yan's avatar
Zheng Yan committed
1019
1020
1021
1022
1023
		btrfs_mark_buffer_dirty(leaf);
	}
	return ret;
}

1024
1025
1026
static noinline u32 extent_data_ref_count(struct btrfs_root *root,
					  struct btrfs_path *path,
					  struct btrfs_extent_inline_ref *iref)
1027
{
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
	struct btrfs_key key;
	struct extent_buffer *leaf;
	struct btrfs_extent_data_ref *ref1;
	struct btrfs_shared_data_ref *ref2;
	u32 num_refs = 0;

	leaf = path->nodes[0];
	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
	if (iref) {
		if (btrfs_extent_inline_ref_type(leaf, iref) ==
		    BTRFS_EXTENT_DATA_REF_KEY) {
			ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
			num_refs = btrfs_extent_data_ref_count(leaf, ref1);
		} else {
			ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
			num_refs = btrfs_shared_data_ref_count(leaf, ref2);
		}
	} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
		ref1 = btrfs_item_ptr(leaf, path->slots[0],
				      struct btrfs_extent_data_ref);
		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
		ref2 = btrfs_item_ptr(leaf, path->slots[0],
				      struct btrfs_shared_data_ref);
		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
		struct btrfs_extent_ref_v0 *ref0;
		ref0 = btrfs_item_ptr(leaf, path->slots[0],
				      struct btrfs_extent_ref_v0);
		num_refs = btrfs_ref_count_v0(leaf, ref0);
Chris Mason's avatar
Chris Mason committed
1059
#endif
1060
1061
1062
1063
1064
	} else {
		WARN_ON(1);
	}
	return num_refs;
}
1065

1066
1067
1068
1069
1070
static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
					  struct btrfs_root *root,
					  struct btrfs_path *path,
					  u64 bytenr, u64 parent,
					  u64 root_objectid)
1071
{
1072
	struct btrfs_key key;
1073
1074
	int ret;

1075
1076
1077
1078
1079
1080
1081
	key.objectid = bytenr;
	if (parent) {
		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
		key.offset = parent;
	} else {
		key.type = BTRFS_TREE_BLOCK_REF_KEY;
		key.offset = root_objectid;
1082
1083
	}

1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
	if (ret > 0)
		ret = -ENOENT;
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
	if (ret == -ENOENT && parent) {
		btrfs_release_path(root, path);
		key.type = BTRFS_EXTENT_REF_V0_KEY;
		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
		if (ret > 0)
			ret = -ENOENT;
	}
1095
#endif
1096
	return ret;
1097
1098
}

1099
1100
1101
1102
1103
static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
					  struct btrfs_root *root,
					  struct btrfs_path *path,
					  u64 bytenr, u64 parent,
					  u64 root_objectid)
Zheng Yan's avatar
Zheng Yan committed
1104
{
1105
	struct btrfs_key key;
Zheng Yan's avatar
Zheng Yan committed
1106
1107
	int ret;

1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
	key.objectid = bytenr;
	if (parent) {
		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
		key.offset = parent;
	} else {
		key.type = BTRFS_TREE_BLOCK_REF_KEY;
		key.offset = root_objectid;
	}

	ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
	btrfs_release_path(root, path);
Zheng Yan's avatar
Zheng Yan committed
1119
1120
1121
	return ret;
}

1122
static inline int extent_ref_type(u64 parent, u64 owner)
Zheng Yan's avatar
Zheng Yan committed
1123
{
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
	int type;
	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
		if (parent > 0)
			type = BTRFS_SHARED_BLOCK_REF_KEY;
		else
			type = BTRFS_TREE_BLOCK_REF_KEY;
	} else {
		if (parent > 0)
			type = BTRFS_SHARED_DATA_REF_KEY;
		else
			type = BTRFS_EXTENT_DATA_REF_KEY;
	}
	return type;
Zheng Yan's avatar
Zheng Yan committed
1137
}
1138

1139
1140
static int find_next_key(struct btrfs_path *path, int level,
			 struct btrfs_key *key)
1141

Chris Mason's avatar
Chris Mason committed
1142
{
1143
	for (; level < BTRFS_MAX_LEVEL; level++) {
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
		if (!path->nodes[level])
			break;
		if (path->slots[level] + 1 >=
		    btrfs_header_nritems(path->nodes[level]))
			continue;
		if (level == 0)
			btrfs_item_key_to_cpu(path->nodes[level], key,
					      path->slots[level] + 1);
		else
			btrfs_node_key_to_cpu(path->nodes[level], key,
					      path->slots[level] + 1);
		return 0;
	}
	return 1;
}
Chris Mason's avatar
Chris Mason committed
1159

1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
/*
 * look for inline back ref. if back ref is found, *ref_ret is set
 * to the address of inline back ref, and 0 is returned.
 *
 * if back ref isn't found, *ref_ret is set to the address where it
 * should be inserted, and -ENOENT is returned.
 *
 * if insert is true and there are too many inline back refs, the path
 * points to the extent item, and -EAGAIN is returned.
 *
 * NOTE: inline back refs are ordered in the same way that back ref
 *	 items in the tree are ordered.
 */
static noinline_for_stack
int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path,
				 struct btrfs_extent_inline_ref **ref_ret,
				 u64 bytenr, u64 num_bytes,
				 u64 parent, u64 root_objectid,
				 u64 owner, u64 offset, int insert)
{
	struct btrfs_key key;
	struct extent_buffer *leaf;
	struct btrfs_extent_item *ei;
	struct btrfs_extent_inline_ref *iref;
	u64 flags;
	u64 item_size;
	unsigned long ptr;
	unsigned long end;
	int extra_size;
	int type;
	int want;
	int ret;
	int err = 0;
1195

1196
	key.objectid = bytenr;
Zheng Yan's avatar
Zheng Yan committed
1197
	key.type = BTRFS_EXTENT_ITEM_KEY;
1198
	key.offset = num_bytes;
Zheng Yan's avatar
Zheng Yan committed
1199

1200
1201
1202
	want = extent_ref_type(parent, owner);
	if (insert) {
		extra_size = btrfs_extent_inline_ref_size(want);
1203
		path->keep_locks = 1;
1204
1205
1206
	} else
		extra_size = -1;
	ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1207
	if (ret < 0) {
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
		err = ret;
		goto out;
	}
	BUG_ON(ret);

	leaf = path->nodes[0];
	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
	if (item_size < sizeof(*ei)) {
		if (!insert) {
			err = -ENOENT;
			goto out;
		}
		ret = convert_extent_item_v0(trans, root, path, owner,
					     extra_size);
		if (ret < 0) {
			err = ret;
			goto out;
		}
		leaf = path->nodes[0];
		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
	}
#endif
	BUG_ON(item_size < sizeof(*ei));

	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
	flags = btrfs_extent_flags(leaf, ei);

	ptr = (unsigned long)(ei + 1);
	end = (unsigned long)ei + item_size;

	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
		ptr += sizeof(struct btrfs_tree_block_info);
		BUG_ON(ptr > end);
	} else {
		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
	}

	err = -ENOENT;
	while (1) {
		if (ptr >= end) {
			WARN_ON(ptr > end);
			break;
		}
		iref = (struct btrfs_extent_inline_ref *)ptr;
		type = btrfs_extent_inline_ref_type(leaf, iref);
		if (want < type)
			break;
		if (want > type) {
			ptr += btrfs_extent_inline_ref_size(type);
			continue;
		}

		if (type == BTRFS_EXTENT_DATA_REF_KEY) {
			struct btrfs_extent_data_ref *dref;
			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
			if (match_extent_data_ref(leaf, dref, root_objectid,
						  owner, offset)) {
				err = 0;
				break;
			}
			if (hash_extent_data_ref_item(leaf, dref) <
			    hash_extent_data_ref(root_objectid, owner, offset))
				break;
		} else {
			u64 ref_offset;
			ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
			if (parent > 0) {
				if (parent == ref_offset) {
					err = 0;
					break;
				}
				if (ref_offset < parent)
					break;
			} else {
				if (root_objectid == ref_offset) {
					err = 0;
					break;
				}
				if (ref_offset < root_objectid)
					break;
			}
		}
		ptr += btrfs_extent_inline_ref_size(type);
	}
	if (err == -ENOENT && insert) {
		if (item_size + extra_size >=
		    BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
			err = -EAGAIN;
			goto out;
		}
		/*
		 * To add new inline back ref, we have to make sure
		 * there is no corresponding back ref item.
		 * For simplicity, we just do not add new inline back
		 * ref if there is any kind of item for this block
		 */
1305
1306
		if (find_next_key(path, 0, &key) == 0 &&
		    key.objectid == bytenr &&
1307
		    key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1308
1309
1310
1311
1312
1313
			err = -EAGAIN;
			goto out;
		}
	}
	*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
out:
1314
	if (insert) {
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
		path->keep_locks = 0;
		btrfs_unlock_up_safe(path, 1);
	}
	return err;
}

/*
 * helper to add new inline back ref
 */
static noinline_for_stack
int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
				struct btrfs_path *path,
				struct btrfs_extent_inline_ref *iref,
				u64 parent, u64 root_objectid,
				u64 owner, u64 offset, int refs_to_add,
				struct btrfs_delayed_extent_op *extent_op)
{
	struct extent_buffer *leaf;
	struct btrfs_extent_item *ei;
	unsigned long ptr;
	unsigned long end;
	unsigned long item_offset;
	u64 refs;
	int size;
	int type;
	int ret;

	leaf = path->nodes[0];
	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
	item_offset = (unsigned long)iref - (unsigned long)ei;

	type = extent_ref_type(parent, owner);
	size = btrfs_extent_inline_ref_size(type);

	ret = btrfs_extend_item(trans, root, path, size);
	BUG_ON(ret);

	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
	refs = btrfs_extent_refs(leaf, ei);
	refs += refs_to_add;
	btrfs_set_extent_refs(leaf, ei, refs);
	if (extent_op)
		__run_delayed_extent_op(extent_op, leaf, ei);

	ptr = (unsigned long)ei + item_offset;
	end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
	if (ptr < end - size)
		memmove_extent_buffer(leaf, ptr + size, ptr,
				      end - size - ptr);

	iref = (struct btrfs_extent_inline_ref *)ptr;
	btrfs_set_extent_inline_ref_type(leaf, iref, type);
	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
		struct btrfs_extent_data_ref *dref;
		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
		btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
		btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
		btrfs_set_extent_data_ref_offset(leaf, dref, offset);
		btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
		struct btrfs_shared_data_ref *sref;
		sref = (struct btrfs_shared_data_ref *)(iref + 1);
		btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
	} else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
	} else {
		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
	}
	btrfs_mark_buffer_dirty(leaf);
	return 0;
}

static int lookup_extent_backref(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path,
				 struct btrfs_extent_inline_ref **ref_ret,
				 u64 bytenr, u64 num_bytes, u64 parent,
				 u64 root_objectid, u64 owner, u64 offset)
{
	int ret;

	ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
					   bytenr, num_bytes, parent,
					   root_objectid, owner, offset, 0);
	if (ret != -ENOENT)
1402
		return ret;
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412

	btrfs_release_path(root, path);
	*ref_ret = NULL;

	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
		ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
					    root_objectid);
	} else {
		ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
					     root_objectid, owner, offset);
1413
	}
1414
1415
	return ret;
}
Zheng Yan's avatar
Zheng Yan committed
1416