extent_io.c 91.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#include <linux/bitops.h>
#include <linux/slab.h>
#include <linux/bio.h>
#include <linux/mm.h>
#include <linux/gfp.h>
#include <linux/pagemap.h>
#include <linux/page-flags.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include "extent_io.h"
#include "extent_map.h"
16
#include "compat.h"
17
18
#include "ctree.h"
#include "btrfs_inode.h"
19
20
21
22
23
24

static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;

static LIST_HEAD(buffers);
static LIST_HEAD(states);
Chris Mason's avatar
Chris Mason committed
25

26
#define LEAK_DEBUG 0
27
#if LEAK_DEBUG
28
static DEFINE_SPINLOCK(leak_lock);
Chris Mason's avatar
Chris Mason committed
29
#endif
30
31
32
33
34
35
36
37
38
39
40
41
42

#define BUFFER_LRU_MAX 64

struct tree_entry {
	u64 start;
	u64 end;
	struct rb_node rb_node;
};

struct extent_page_data {
	struct bio *bio;
	struct extent_io_tree *tree;
	get_extent_t *get_extent;
43
44
45
46

	/* tells writepage not to lock the state bits for this range
	 * it still does the unlocking
	 */
47
48
49
50
	unsigned int extent_locked:1;

	/* tells the submit_bio code to use a WRITE_SYNC */
	unsigned int sync_io:1;
51
52
53
54
};

int __init extent_io_init(void)
{
55
56
57
	extent_state_cache = kmem_cache_create("extent_state",
			sizeof(struct extent_state), 0,
			SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
58
59
60
	if (!extent_state_cache)
		return -ENOMEM;

61
62
63
	extent_buffer_cache = kmem_cache_create("extent_buffers",
			sizeof(struct extent_buffer), 0,
			SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
64
65
66
67
68
69
70
71
72
73
74
75
	if (!extent_buffer_cache)
		goto free_state_cache;
	return 0;

free_state_cache:
	kmem_cache_destroy(extent_state_cache);
	return -ENOMEM;
}

void extent_io_exit(void)
{
	struct extent_state *state;
76
	struct extent_buffer *eb;
77
78

	while (!list_empty(&states)) {
79
		state = list_entry(states.next, struct extent_state, leak_list);
80
81
82
83
84
		printk(KERN_ERR "btrfs state leak: start %llu end %llu "
		       "state %lu in tree %p refs %d\n",
		       (unsigned long long)state->start,
		       (unsigned long long)state->end,
		       state->state, state->tree, atomic_read(&state->refs));
85
		list_del(&state->leak_list);
86
87
88
89
		kmem_cache_free(extent_state_cache, state);

	}

90
91
	while (!list_empty(&buffers)) {
		eb = list_entry(buffers.next, struct extent_buffer, leak_list);
92
93
94
		printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
		       "refs %d\n", (unsigned long long)eb->start,
		       eb->len, atomic_read(&eb->refs));
95
96
97
		list_del(&eb->leak_list);
		kmem_cache_free(extent_buffer_cache, eb);
	}
98
99
100
101
102
103
104
105
106
107
	if (extent_state_cache)
		kmem_cache_destroy(extent_state_cache);
	if (extent_buffer_cache)
		kmem_cache_destroy(extent_buffer_cache);
}

void extent_io_tree_init(struct extent_io_tree *tree,
			  struct address_space *mapping, gfp_t mask)
{
	tree->state.rb_node = NULL;
108
	tree->buffer.rb_node = NULL;
109
110
	tree->ops = NULL;
	tree->dirty_bytes = 0;
111
	spin_lock_init(&tree->lock);
112
	spin_lock_init(&tree->buffer_lock);
113
114
115
	tree->mapping = mapping;
}

116
static struct extent_state *alloc_extent_state(gfp_t mask)
117
118
{
	struct extent_state *state;
119
#if LEAK_DEBUG
120
	unsigned long flags;
Chris Mason's avatar
Chris Mason committed
121
#endif
122
123

	state = kmem_cache_alloc(extent_state_cache, mask);
124
	if (!state)
125
126
127
		return state;
	state->state = 0;
	state->private = 0;
128
	state->tree = NULL;
129
#if LEAK_DEBUG
130
131
132
	spin_lock_irqsave(&leak_lock, flags);
	list_add(&state->leak_list, &states);
	spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason's avatar
Chris Mason committed
133
#endif
134
135
136
137
138
	atomic_set(&state->refs, 1);
	init_waitqueue_head(&state->wq);
	return state;
}

139
static void free_extent_state(struct extent_state *state)
140
141
142
143
{
	if (!state)
		return;
	if (atomic_dec_and_test(&state->refs)) {
144
#if LEAK_DEBUG
145
		unsigned long flags;
Chris Mason's avatar
Chris Mason committed
146
#endif
147
		WARN_ON(state->tree);
148
#if LEAK_DEBUG
149
150
151
		spin_lock_irqsave(&leak_lock, flags);
		list_del(&state->leak_list);
		spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason's avatar
Chris Mason committed
152
#endif
153
154
155
156
157
158
159
		kmem_cache_free(extent_state_cache, state);
	}
}

static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
				   struct rb_node *node)
{
160
161
	struct rb_node **p = &root->rb_node;
	struct rb_node *parent = NULL;
162
163
	struct tree_entry *entry;

164
	while (*p) {
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
		parent = *p;
		entry = rb_entry(parent, struct tree_entry, rb_node);

		if (offset < entry->start)
			p = &(*p)->rb_left;
		else if (offset > entry->end)
			p = &(*p)->rb_right;
		else
			return parent;
	}

	entry = rb_entry(node, struct tree_entry, rb_node);
	rb_link_node(node, parent, p);
	rb_insert_color(node, root);
	return NULL;
}

182
static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
183
184
185
				     struct rb_node **prev_ret,
				     struct rb_node **next_ret)
{
186
	struct rb_root *root = &tree->state;
187
	struct rb_node *n = root->rb_node;
188
189
190
191
192
	struct rb_node *prev = NULL;
	struct rb_node *orig_prev = NULL;
	struct tree_entry *entry;
	struct tree_entry *prev_entry = NULL;

193
	while (n) {
194
195
196
197
198
199
200
201
		entry = rb_entry(n, struct tree_entry, rb_node);
		prev = n;
		prev_entry = entry;

		if (offset < entry->start)
			n = n->rb_left;
		else if (offset > entry->end)
			n = n->rb_right;
202
		else
203
204
205
206
207
			return n;
	}

	if (prev_ret) {
		orig_prev = prev;
208
		while (prev && offset > prev_entry->end) {
209
210
211
212
213
214
215
216
217
			prev = rb_next(prev);
			prev_entry = rb_entry(prev, struct tree_entry, rb_node);
		}
		*prev_ret = prev;
		prev = orig_prev;
	}

	if (next_ret) {
		prev_entry = rb_entry(prev, struct tree_entry, rb_node);
218
		while (prev && offset < prev_entry->start) {
219
220
221
222
223
224
225
226
			prev = rb_prev(prev);
			prev_entry = rb_entry(prev, struct tree_entry, rb_node);
		}
		*next_ret = prev;
	}
	return NULL;
}

227
228
static inline struct rb_node *tree_search(struct extent_io_tree *tree,
					  u64 offset)
229
{
230
	struct rb_node *prev = NULL;
231
	struct rb_node *ret;
232

233
	ret = __etree_search(tree, offset, &prev, NULL);
234
	if (!ret)
235
236
237
238
		return prev;
	return ret;
}

239
240
241
242
static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
					  u64 offset, struct rb_node *node)
{
	struct rb_root *root = &tree->buffer;
243
244
	struct rb_node **p = &root->rb_node;
	struct rb_node *parent = NULL;
245
246
	struct extent_buffer *eb;

247
	while (*p) {
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
		parent = *p;
		eb = rb_entry(parent, struct extent_buffer, rb_node);

		if (offset < eb->start)
			p = &(*p)->rb_left;
		else if (offset > eb->start)
			p = &(*p)->rb_right;
		else
			return eb;
	}

	rb_link_node(node, parent, p);
	rb_insert_color(node, root);
	return NULL;
}

static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
					   u64 offset)
{
	struct rb_root *root = &tree->buffer;
268
	struct rb_node *n = root->rb_node;
269
270
	struct extent_buffer *eb;

271
	while (n) {
272
273
274
275
276
277
278
279
280
281
282
		eb = rb_entry(n, struct extent_buffer, rb_node);
		if (offset < eb->start)
			n = n->rb_left;
		else if (offset > eb->start)
			n = n->rb_right;
		else
			return eb;
	}
	return NULL;
}

283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
/*
 * utility function to look for merge candidates inside a given range.
 * Any extents with matching state are merged together into a single
 * extent in the tree.  Extents with EXTENT_IO in their state field
 * are not merged because the end_io handlers need to be able to do
 * operations on them without sleeping (or doing allocations/splits).
 *
 * This should be called with the tree lock held.
 */
static int merge_state(struct extent_io_tree *tree,
		       struct extent_state *state)
{
	struct extent_state *other;
	struct rb_node *other_node;

298
	if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
299
300
301
302
303
304
305
306
		return 0;

	other_node = rb_prev(&state->rb_node);
	if (other_node) {
		other = rb_entry(other_node, struct extent_state, rb_node);
		if (other->end == state->start - 1 &&
		    other->state == state->state) {
			state->start = other->start;
307
			other->tree = NULL;
308
309
310
311
312
313
314
315
316
317
			rb_erase(&other->rb_node, &tree->state);
			free_extent_state(other);
		}
	}
	other_node = rb_next(&state->rb_node);
	if (other_node) {
		other = rb_entry(other_node, struct extent_state, rb_node);
		if (other->start == state->end + 1 &&
		    other->state == state->state) {
			other->start = state->start;
318
			state->tree = NULL;
319
320
321
322
323
324
325
			rb_erase(&state->rb_node, &tree->state);
			free_extent_state(state);
		}
	}
	return 0;
}

326
327
328
329
330
331
static void set_state_cb(struct extent_io_tree *tree,
			 struct extent_state *state,
			 unsigned long bits)
{
	if (tree->ops && tree->ops->set_bit_hook) {
		tree->ops->set_bit_hook(tree->mapping->host, state->start,
332
					state->end, state->state, bits);
333
334
335
336
337
338
339
	}
}

static void clear_state_cb(struct extent_io_tree *tree,
			   struct extent_state *state,
			   unsigned long bits)
{
Liu Hui's avatar
Liu Hui committed
340
	if (tree->ops && tree->ops->clear_bit_hook) {
341
		tree->ops->clear_bit_hook(tree->mapping->host, state->start,
342
					  state->end, state->state, bits);
343
344
345
	}
}

346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
/*
 * insert an extent_state struct into the tree.  'bits' are set on the
 * struct before it is inserted.
 *
 * This may return -EEXIST if the extent is already there, in which case the
 * state struct is freed.
 *
 * The tree lock is not taken internally.  This is a utility function and
 * probably isn't what you want to call (see set/clear_extent_bit).
 */
static int insert_state(struct extent_io_tree *tree,
			struct extent_state *state, u64 start, u64 end,
			int bits)
{
	struct rb_node *node;

	if (end < start) {
363
364
365
		printk(KERN_ERR "btrfs end < start %llu %llu\n",
		       (unsigned long long)end,
		       (unsigned long long)start);
366
367
368
369
		WARN_ON(1);
	}
	if (bits & EXTENT_DIRTY)
		tree->dirty_bytes += end - start + 1;
370
	set_state_cb(tree, state, bits);
371
372
373
374
375
376
377
	state->state |= bits;
	state->start = start;
	state->end = end;
	node = tree_insert(&tree->state, end, &state->rb_node);
	if (node) {
		struct extent_state *found;
		found = rb_entry(node, struct extent_state, rb_node);
378
379
380
381
		printk(KERN_ERR "btrfs found node %llu %llu on insert of "
		       "%llu %llu\n", (unsigned long long)found->start,
		       (unsigned long long)found->end,
		       (unsigned long long)start, (unsigned long long)end);
382
383
384
		free_extent_state(state);
		return -EEXIST;
	}
385
	state->tree = tree;
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
	merge_state(tree, state);
	return 0;
}

/*
 * split a given extent state struct in two, inserting the preallocated
 * struct 'prealloc' as the newly created second half.  'split' indicates an
 * offset inside 'orig' where it should be split.
 *
 * Before calling,
 * the tree has 'orig' at [orig->start, orig->end].  After calling, there
 * are two extent state structs in the tree:
 * prealloc: [orig->start, split - 1]
 * orig: [ split, orig->end ]
 *
 * The tree locks are not taken by this function. They need to be held
 * by the caller.
 */
static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
		       struct extent_state *prealloc, u64 split)
{
	struct rb_node *node;
	prealloc->start = orig->start;
	prealloc->end = split - 1;
	prealloc->state = orig->state;
	orig->start = split;

	node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
	if (node) {
		free_extent_state(prealloc);
		return -EEXIST;
	}
418
	prealloc->tree = tree;
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
	return 0;
}

/*
 * utility function to clear some bits in an extent state struct.
 * it will optionally wake up any one waiting on this state (wake == 1), or
 * forcibly remove the state from the tree (delete == 1).
 *
 * If no bits are set on the state struct after clearing things, the
 * struct is freed and removed from the tree
 */
static int clear_state_bit(struct extent_io_tree *tree,
			    struct extent_state *state, int bits, int wake,
			    int delete)
{
	int ret = state->state & bits;

	if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
		u64 range = state->end - state->start + 1;
		WARN_ON(range > tree->dirty_bytes);
		tree->dirty_bytes -= range;
	}
441
	clear_state_cb(tree, state, bits);
442
	state->state &= ~bits;
443
444
445
	if (wake)
		wake_up(&state->wq);
	if (delete || state->state == 0) {
446
		if (state->tree) {
447
			clear_state_cb(tree, state, state->state);
448
			rb_erase(&state->rb_node, &tree->state);
449
			state->tree = NULL;
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
			free_extent_state(state);
		} else {
			WARN_ON(1);
		}
	} else {
		merge_state(tree, state);
	}
	return ret;
}

/*
 * clear some bits on a range in the tree.  This may require splitting
 * or inserting elements in the tree, so the gfp mask is used to
 * indicate which allocations or sleeping are allowed.
 *
 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
 * the given range from the tree regardless of state (ie for truncate).
 *
 * the range [start, end] is inclusive.
 *
 * This takes the tree lock, and returns < 0 on error, > 0 if any of the
 * bits were already set, or zero if none of the bits were already set.
 */
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
		     int bits, int wake, int delete, gfp_t mask)
{
	struct extent_state *state;
	struct extent_state *prealloc = NULL;
	struct rb_node *node;
479
	u64 last_end;
480
481
482
483
484
485
486
487
488
489
	int err;
	int set = 0;

again:
	if (!prealloc && (mask & __GFP_WAIT)) {
		prealloc = alloc_extent_state(mask);
		if (!prealloc)
			return -ENOMEM;
	}

490
	spin_lock(&tree->lock);
491
492
493
494
	/*
	 * this search will find the extents that end after
	 * our range starts
	 */
495
	node = tree_search(tree, start);
496
497
498
499
500
501
	if (!node)
		goto out;
	state = rb_entry(node, struct extent_state, rb_node);
	if (state->start > end)
		goto out;
	WARN_ON(state->end < start);
502
	last_end = state->end;
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520

	/*
	 *     | ---- desired range ---- |
	 *  | state | or
	 *  | ------------- state -------------- |
	 *
	 * We need to split the extent we found, and may flip
	 * bits on second half.
	 *
	 * If the extent we found extends past our range, we
	 * just split and search again.  It'll get split again
	 * the next time though.
	 *
	 * If the extent we found is inside our range, we clear
	 * the desired bit on it.
	 */

	if (state->start < start) {
521
522
		if (!prealloc)
			prealloc = alloc_extent_state(GFP_ATOMIC);
523
524
525
526
527
528
529
530
		err = split_state(tree, state, prealloc, start);
		BUG_ON(err == -EEXIST);
		prealloc = NULL;
		if (err)
			goto out;
		if (state->end <= end) {
			set |= clear_state_bit(tree, state, bits,
					wake, delete);
531
532
533
			if (last_end == (u64)-1)
				goto out;
			start = last_end + 1;
534
535
536
537
538
539
540
541
542
543
544
545
		} else {
			start = state->start;
		}
		goto search_again;
	}
	/*
	 * | ---- desired range ---- |
	 *                        | state |
	 * We need to split the extent, and clear the bit
	 * on the first half
	 */
	if (state->start <= end && state->end > end) {
546
547
		if (!prealloc)
			prealloc = alloc_extent_state(GFP_ATOMIC);
548
549
550
551
552
553
554
555
556
557
558
559
		err = split_state(tree, state, prealloc, end + 1);
		BUG_ON(err == -EEXIST);

		if (wake)
			wake_up(&state->wq);
		set |= clear_state_bit(tree, prealloc, bits,
				       wake, delete);
		prealloc = NULL;
		goto out;
	}

	set |= clear_state_bit(tree, state, bits, wake, delete);
560
561
562
	if (last_end == (u64)-1)
		goto out;
	start = last_end + 1;
563
564
565
	goto search_again;

out:
566
	spin_unlock(&tree->lock);
567
568
569
570
571
572
573
574
	if (prealloc)
		free_extent_state(prealloc);

	return set;

search_again:
	if (start > end)
		goto out;
575
	spin_unlock(&tree->lock);
576
577
578
579
580
581
582
	if (mask & __GFP_WAIT)
		cond_resched();
	goto again;
}

static int wait_on_state(struct extent_io_tree *tree,
			 struct extent_state *state)
583
584
		__releases(tree->lock)
		__acquires(tree->lock)
585
586
587
{
	DEFINE_WAIT(wait);
	prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
588
	spin_unlock(&tree->lock);
589
	schedule();
590
	spin_lock(&tree->lock);
591
592
593
594
595
596
597
598
599
600
601
602
603
604
	finish_wait(&state->wq, &wait);
	return 0;
}

/*
 * waits for one or more bits to clear on a range in the state tree.
 * The range [start, end] is inclusive.
 * The tree lock is taken by this function
 */
int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
{
	struct extent_state *state;
	struct rb_node *node;

605
	spin_lock(&tree->lock);
606
607
608
609
610
611
again:
	while (1) {
		/*
		 * this search will find all the extents that end after
		 * our range starts
		 */
612
		node = tree_search(tree, start);
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
		if (!node)
			break;

		state = rb_entry(node, struct extent_state, rb_node);

		if (state->start > end)
			goto out;

		if (state->state & bits) {
			start = state->start;
			atomic_inc(&state->refs);
			wait_on_state(tree, state);
			free_extent_state(state);
			goto again;
		}
		start = state->end + 1;

		if (start > end)
			break;

		if (need_resched()) {
634
			spin_unlock(&tree->lock);
635
			cond_resched();
636
			spin_lock(&tree->lock);
637
638
639
		}
	}
out:
640
	spin_unlock(&tree->lock);
641
642
643
644
645
646
647
648
649
650
651
	return 0;
}

static void set_state_bits(struct extent_io_tree *tree,
			   struct extent_state *state,
			   int bits)
{
	if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
		u64 range = state->end - state->start + 1;
		tree->dirty_bytes += range;
	}
652
	set_state_cb(tree, state, bits);
653
	state->state |= bits;
654
655
656
657
658
659
660
661
662
663
664
665
666
}

/*
 * set some bits on a range in the tree.  This may require allocations
 * or sleeping, so the gfp mask is used to indicate what is allowed.
 *
 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
 * range already has the desired bits set.  The start of the existing
 * range is returned in failed_start in this case.
 *
 * [start, end] is inclusive
 * This takes the tree lock.
 */
667
668
669
static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
			  int bits, int exclusive, u64 *failed_start,
			  gfp_t mask)
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
{
	struct extent_state *state;
	struct extent_state *prealloc = NULL;
	struct rb_node *node;
	int err = 0;
	int set;
	u64 last_start;
	u64 last_end;
again:
	if (!prealloc && (mask & __GFP_WAIT)) {
		prealloc = alloc_extent_state(mask);
		if (!prealloc)
			return -ENOMEM;
	}

685
	spin_lock(&tree->lock);
686
687
688
689
	/*
	 * this search will find all the extents that end after
	 * our range starts.
	 */
690
	node = tree_search(tree, start);
691
692
693
694
695
696
697
	if (!node) {
		err = insert_state(tree, prealloc, start, end, bits);
		prealloc = NULL;
		BUG_ON(err == -EEXIST);
		goto out;
	}
	state = rb_entry(node, struct extent_state, rb_node);
Chris Mason's avatar
Chris Mason committed
698
hit_next:
699
700
701
702
703
704
705
706
707
708
	last_start = state->start;
	last_end = state->end;

	/*
	 * | ---- desired range ---- |
	 * | state |
	 *
	 * Just lock what we found and keep going
	 */
	if (state->start == start && state->end <= end) {
Chris Mason's avatar
Chris Mason committed
709
		struct rb_node *next_node;
710
711
712
713
714
715
716
717
		set = state->state & bits;
		if (set && exclusive) {
			*failed_start = state->start;
			err = -EEXIST;
			goto out;
		}
		set_state_bits(tree, state, bits);
		merge_state(tree, state);
718
719
		if (last_end == (u64)-1)
			goto out;
Chris Mason's avatar
Chris Mason committed
720

721
		start = last_end + 1;
Chris Mason's avatar
Chris Mason committed
722
723
724
725
726
727
728
729
730
		if (start < end && prealloc && !need_resched()) {
			next_node = rb_next(node);
			if (next_node) {
				state = rb_entry(next_node, struct extent_state,
						 rb_node);
				if (state->start == start)
					goto hit_next;
			}
		}
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
		goto search_again;
	}

	/*
	 *     | ---- desired range ---- |
	 * | state |
	 *   or
	 * | ------------- state -------------- |
	 *
	 * We need to split the extent we found, and may flip bits on
	 * second half.
	 *
	 * If the extent we found extends past our
	 * range, we just split and search again.  It'll get split
	 * again the next time though.
	 *
	 * If the extent we found is inside our range, we set the
	 * desired bit on it.
	 */
	if (state->start < start) {
		set = state->state & bits;
		if (exclusive && set) {
			*failed_start = start;
			err = -EEXIST;
			goto out;
		}
		err = split_state(tree, state, prealloc, start);
		BUG_ON(err == -EEXIST);
		prealloc = NULL;
		if (err)
			goto out;
		if (state->end <= end) {
			set_state_bits(tree, state, bits);
			merge_state(tree, state);
765
766
767
			if (last_end == (u64)-1)
				goto out;
			start = last_end + 1;
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
		} else {
			start = state->start;
		}
		goto search_again;
	}
	/*
	 * | ---- desired range ---- |
	 *     | state | or               | state |
	 *
	 * There's a hole, we need to insert something in it and
	 * ignore the extent we found.
	 */
	if (state->start > start) {
		u64 this_end;
		if (end < last_start)
			this_end = end;
		else
785
			this_end = last_start - 1;
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
		err = insert_state(tree, prealloc, start, this_end,
				   bits);
		prealloc = NULL;
		BUG_ON(err == -EEXIST);
		if (err)
			goto out;
		start = this_end + 1;
		goto search_again;
	}
	/*
	 * | ---- desired range ---- |
	 *                        | state |
	 * We need to split the extent, and set the bit
	 * on the first half
	 */
	if (state->start <= end && state->end > end) {
		set = state->state & bits;
		if (exclusive && set) {
			*failed_start = start;
			err = -EEXIST;
			goto out;
		}
		err = split_state(tree, state, prealloc, end + 1);
		BUG_ON(err == -EEXIST);

		set_state_bits(tree, prealloc, bits);
		merge_state(tree, prealloc);
		prealloc = NULL;
		goto out;
	}

	goto search_again;

out:
820
	spin_unlock(&tree->lock);
821
822
823
824
825
826
827
828
	if (prealloc)
		free_extent_state(prealloc);

	return err;

search_again:
	if (start > end)
		goto out;
829
	spin_unlock(&tree->lock);
830
831
832
833
834
835
836
837
838
839
840
841
842
	if (mask & __GFP_WAIT)
		cond_resched();
	goto again;
}

/* wrappers around set/clear extent bit */
int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
		     gfp_t mask)
{
	return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
			      mask);
}

843
844
845
846
847
848
int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
		       gfp_t mask)
{
	return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
}

849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
		    int bits, gfp_t mask)
{
	return set_extent_bit(tree, start, end, bits, 0, NULL,
			      mask);
}

int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
		      int bits, gfp_t mask)
{
	return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
}

int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
		     gfp_t mask)
{
	return set_extent_bit(tree, start, end,
Chris Mason's avatar
Chris Mason committed
866
			      EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
867
			      0, NULL, mask);
868
869
870
871
872
873
874
875
876
}

int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
		       gfp_t mask)
{
	return clear_extent_bit(tree, start, end,
				EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
}

877
878
879
880
881
882
int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
			 gfp_t mask)
{
	return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
}

883
884
885
886
887
888
889
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
		     gfp_t mask)
{
	return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
			      mask);
}

890
static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
891
892
893
894
895
896
897
898
899
900
901
902
		       gfp_t mask)
{
	return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
}

int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
			gfp_t mask)
{
	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
			      mask);
}

903
904
static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
				 u64 end, gfp_t mask)
905
906
907
908
{
	return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
}

909
static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
910
911
912
913
914
915
			 gfp_t mask)
{
	return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
			      0, NULL, mask);
}

916
917
static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
				  u64 end, gfp_t mask)
918
919
920
921
922
923
924
925
926
{
	return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
}

int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{
	return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
}

Chris Mason's avatar
Chris Mason committed
927
928
929
930
/*
 * either insert or lock state struct between start and end use mask to tell
 * us if waiting is desired.
 */
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
{
	int err;
	u64 failed_start;
	while (1) {
		err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
				     &failed_start, mask);
		if (err == -EEXIST && (mask & __GFP_WAIT)) {
			wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
			start = failed_start;
		} else {
			break;
		}
		WARN_ON(start > end);
	}
	return err;
}

949
950
951
952
953
954
955
956
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
		    gfp_t mask)
{
	int err;
	u64 failed_start;

	err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
			     &failed_start, mask);
Yan Zheng's avatar
Yan Zheng committed
957
958
959
960
	if (err == -EEXIST) {
		if (failed_start > start)
			clear_extent_bit(tree, start, failed_start - 1,
					 EXTENT_LOCKED, 1, 0, mask);
961
		return 0;
Yan Zheng's avatar
Yan Zheng committed
962
	}
963
964
965
	return 1;
}

966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
		  gfp_t mask)
{
	return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
}

/*
 * helper function to set pages and extents in the tree dirty
 */
int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
{
	unsigned long index = start >> PAGE_CACHE_SHIFT;
	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
	struct page *page;

	while (index <= end_index) {
		page = find_get_page(tree->mapping, index);
		BUG_ON(!page);
		__set_page_dirty_nobuffers(page);
		page_cache_release(page);
		index++;
	}
	set_extent_dirty(tree, start, end, GFP_NOFS);
	return 0;
}

/*
 * helper function to set both pages and extents in the tree writeback
 */
995
static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
996
997
998
999
1000
{
	unsigned long index = start >> PAGE_CACHE_SHIFT;
	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
	struct page *page;