transaction.c 46.4 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Chris Mason's avatar
Chris Mason committed
19
#include <linux/fs.h>
20
#include <linux/slab.h>
Chris Mason's avatar
Chris Mason committed
21
#include <linux/sched.h>
22
#include <linux/writeback.h>
23
#include <linux/pagemap.h>
24
#include <linux/blkdev.h>
25
#include <linux/uuid.h>
Chris Mason's avatar
Chris Mason committed
26
27
28
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
29
#include "locking.h"
30
#include "tree-log.h"
31
#include "inode-map.h"
32
#include "volumes.h"
Chris Mason's avatar
Chris Mason committed
33

34
35
#define BTRFS_ROOT_TRANS_TAG 0

36
void put_transaction(struct btrfs_transaction *transaction)
Chris Mason's avatar
Chris Mason committed
37
{
38
39
	WARN_ON(atomic_read(&transaction->use_count) == 0);
	if (atomic_dec_and_test(&transaction->use_count)) {
Josef Bacik's avatar
Josef Bacik committed
40
		BUG_ON(!list_empty(&transaction->list));
41
		WARN_ON(transaction->delayed_refs.root.rb_node);
Chris Mason's avatar
Chris Mason committed
42
43
		memset(transaction, 0, sizeof(*transaction));
		kmem_cache_free(btrfs_transaction_cachep, transaction);
Chris Mason's avatar
Chris Mason committed
44
	}
Chris Mason's avatar
Chris Mason committed
45
46
}

Josef Bacik's avatar
Josef Bacik committed
47
48
49
50
51
52
static noinline void switch_commit_root(struct btrfs_root *root)
{
	free_extent_buffer(root->commit_root);
	root->commit_root = btrfs_root_node(root);
}

Chris Mason's avatar
Chris Mason committed
53
54
55
/*
 * either allocate a new transaction or hop into the existing one
 */
56
static noinline int join_transaction(struct btrfs_root *root, int type)
Chris Mason's avatar
Chris Mason committed
57
58
{
	struct btrfs_transaction *cur_trans;
59
	struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacik's avatar
Josef Bacik committed
60

61
	spin_lock(&fs_info->trans_lock);
62
loop:
63
	/* The file system has been taken offline. No new transactions. */
64
65
	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
		spin_unlock(&fs_info->trans_lock);
66
67
68
		return -EROFS;
	}

69
	if (fs_info->trans_no_join) {
70
71
72
73
74
75
76
		/* 
		 * If we are JOIN_NOLOCK we're already committing a current
		 * transaction, we just need a handle to deal with something
		 * when committing the transaction, such as inode cache and
		 * space cache. It is a special case.
		 */
		if (type != TRANS_JOIN_NOLOCK) {
77
			spin_unlock(&fs_info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
78
79
80
81
			return -EBUSY;
		}
	}

82
	cur_trans = fs_info->running_transaction;
Josef Bacik's avatar
Josef Bacik committed
83
	if (cur_trans) {
84
		if (cur_trans->aborted) {
85
			spin_unlock(&fs_info->trans_lock);
86
			return cur_trans->aborted;
87
		}
Josef Bacik's avatar
Josef Bacik committed
88
		atomic_inc(&cur_trans->use_count);
89
		atomic_inc(&cur_trans->num_writers);
90
		cur_trans->num_joined++;
91
		spin_unlock(&fs_info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
92
		return 0;
Chris Mason's avatar
Chris Mason committed
93
	}
94
	spin_unlock(&fs_info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
95

96
97
98
99
100
101
102
	/*
	 * If we are ATTACH, we just want to catch the current transaction,
	 * and commit it. If there is no transaction, just return ENOENT.
	 */
	if (type == TRANS_ATTACH)
		return -ENOENT;

Josef Bacik's avatar
Josef Bacik committed
103
104
105
	cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
	if (!cur_trans)
		return -ENOMEM;
106

107
108
	spin_lock(&fs_info->trans_lock);
	if (fs_info->running_transaction) {
109
110
111
112
		/*
		 * someone started a transaction after we unlocked.  Make sure
		 * to redo the trans_no_join checks above
		 */
Josef Bacik's avatar
Josef Bacik committed
113
		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
114
		cur_trans = fs_info->running_transaction;
115
		goto loop;
116
117
	} else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
		spin_unlock(&fs_info->trans_lock);
118
119
		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
		return -EROFS;
Chris Mason's avatar
Chris Mason committed
120
	}
121

Josef Bacik's avatar
Josef Bacik committed
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
	atomic_set(&cur_trans->num_writers, 1);
	cur_trans->num_joined = 0;
	init_waitqueue_head(&cur_trans->writer_wait);
	init_waitqueue_head(&cur_trans->commit_wait);
	cur_trans->in_commit = 0;
	cur_trans->blocked = 0;
	/*
	 * One for this trans handle, one so it will live on until we
	 * commit the transaction.
	 */
	atomic_set(&cur_trans->use_count, 2);
	cur_trans->commit_done = 0;
	cur_trans->start_time = get_seconds();

	cur_trans->delayed_refs.root = RB_ROOT;
	cur_trans->delayed_refs.num_entries = 0;
	cur_trans->delayed_refs.num_heads_ready = 0;
	cur_trans->delayed_refs.num_heads = 0;
	cur_trans->delayed_refs.flushing = 0;
	cur_trans->delayed_refs.run_delayed_start = 0;
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

	/*
	 * although the tree mod log is per file system and not per transaction,
	 * the log must never go across transaction boundaries.
	 */
	smp_mb();
	if (!list_empty(&fs_info->tree_mod_seq_list)) {
		printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when "
			"creating a fresh transaction\n");
		WARN_ON(1);
	}
	if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) {
		printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when "
			"creating a fresh transaction\n");
		WARN_ON(1);
	}
	atomic_set(&fs_info->tree_mod_seq, 0);

Josef Bacik's avatar
Josef Bacik committed
160
161
162
163
	spin_lock_init(&cur_trans->commit_lock);
	spin_lock_init(&cur_trans->delayed_refs.lock);

	INIT_LIST_HEAD(&cur_trans->pending_snapshots);
164
	list_add_tail(&cur_trans->list, &fs_info->trans_list);
Josef Bacik's avatar
Josef Bacik committed
165
	extent_io_tree_init(&cur_trans->dirty_pages,
166
167
168
169
			     fs_info->btree_inode->i_mapping);
	fs_info->generation++;
	cur_trans->transid = fs_info->generation;
	fs_info->running_transaction = cur_trans;
170
	cur_trans->aborted = 0;
171
	spin_unlock(&fs_info->trans_lock);
172

Chris Mason's avatar
Chris Mason committed
173
174
175
	return 0;
}

Chris Mason's avatar
Chris Mason committed
176
/*
177
178
179
180
 * this does all the record keeping required to make sure that a reference
 * counted root is properly recorded in a given transaction.  This is required
 * to make sure the old root from before we joined the transaction is deleted
 * when the transaction commits
Chris Mason's avatar
Chris Mason committed
181
 */
Chris Mason's avatar
Chris Mason committed
182
static int record_root_in_trans(struct btrfs_trans_handle *trans,
Josef Bacik's avatar
Josef Bacik committed
183
			       struct btrfs_root *root)
184
{
185
	if (root->ref_cows && root->last_trans < trans->transid) {
186
		WARN_ON(root == root->fs_info->extent_root);
187
188
		WARN_ON(root->commit_root != root->node);

Chris Mason's avatar
Chris Mason committed
189
190
191
192
193
194
195
196
197
198
199
200
		/*
		 * see below for in_trans_setup usage rules
		 * we have the reloc mutex held now, so there
		 * is only one writer in this function
		 */
		root->in_trans_setup = 1;

		/* make sure readers find in_trans_setup before
		 * they find our root->last_trans update
		 */
		smp_wmb();

Josef Bacik's avatar
Josef Bacik committed
201
202
203
204
205
		spin_lock(&root->fs_info->fs_roots_radix_lock);
		if (root->last_trans == trans->transid) {
			spin_unlock(&root->fs_info->fs_roots_radix_lock);
			return 0;
		}
206
207
208
		radix_tree_tag_set(&root->fs_info->fs_roots_radix,
			   (unsigned long)root->root_key.objectid,
			   BTRFS_ROOT_TRANS_TAG);
Josef Bacik's avatar
Josef Bacik committed
209
		spin_unlock(&root->fs_info->fs_roots_radix_lock);
Chris Mason's avatar
Chris Mason committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
		root->last_trans = trans->transid;

		/* this is pretty tricky.  We don't want to
		 * take the relocation lock in btrfs_record_root_in_trans
		 * unless we're really doing the first setup for this root in
		 * this transaction.
		 *
		 * Normally we'd use root->last_trans as a flag to decide
		 * if we want to take the expensive mutex.
		 *
		 * But, we have to set root->last_trans before we
		 * init the relocation root, otherwise, we trip over warnings
		 * in ctree.c.  The solution used here is to flag ourselves
		 * with root->in_trans_setup.  When this is 1, we're still
		 * fixing up the reloc trees and everyone must wait.
		 *
		 * When this is zero, they can trust root->last_trans and fly
		 * through btrfs_record_root_in_trans without having to take the
		 * lock.  smp_wmb() makes sure that all the writes above are
		 * done before we pop in the zero below
		 */
231
		btrfs_init_reloc_root(trans, root);
Chris Mason's avatar
Chris Mason committed
232
233
		smp_wmb();
		root->in_trans_setup = 0;
234
235
236
	}
	return 0;
}
237

Chris Mason's avatar
Chris Mason committed
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260

int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root)
{
	if (!root->ref_cows)
		return 0;

	/*
	 * see record_root_in_trans for comments about in_trans_setup usage
	 * and barriers
	 */
	smp_rmb();
	if (root->last_trans == trans->transid &&
	    !root->in_trans_setup)
		return 0;

	mutex_lock(&root->fs_info->reloc_mutex);
	record_root_in_trans(trans, root);
	mutex_unlock(&root->fs_info->reloc_mutex);

	return 0;
}

Chris Mason's avatar
Chris Mason committed
261
262
263
264
/* wait for commit against the current transaction to become unblocked
 * when this is done, it is safe to start a new transaction, but the current
 * transaction might not be fully on disk.
 */
Chris Mason's avatar
Chris Mason committed
265
static void wait_current_trans(struct btrfs_root *root)
Chris Mason's avatar
Chris Mason committed
266
{
267
	struct btrfs_transaction *cur_trans;
Chris Mason's avatar
Chris Mason committed
268

Josef Bacik's avatar
Josef Bacik committed
269
	spin_lock(&root->fs_info->trans_lock);
270
	cur_trans = root->fs_info->running_transaction;
Chris Mason's avatar
Chris Mason committed
271
	if (cur_trans && cur_trans->blocked) {
272
		atomic_inc(&cur_trans->use_count);
Josef Bacik's avatar
Josef Bacik committed
273
		spin_unlock(&root->fs_info->trans_lock);
Li Zefan's avatar
Li Zefan committed
274
275
276

		wait_event(root->fs_info->transaction_wait,
			   !cur_trans->blocked);
277
		put_transaction(cur_trans);
Josef Bacik's avatar
Josef Bacik committed
278
279
	} else {
		spin_unlock(&root->fs_info->trans_lock);
280
	}
Chris Mason's avatar
Chris Mason committed
281
282
}

283
284
static int may_wait_transaction(struct btrfs_root *root, int type)
{
Josef Bacik's avatar
Josef Bacik committed
285
286
287
288
289
290
291
292
	if (root->fs_info->log_root_recovering)
		return 0;

	if (type == TRANS_USERSPACE)
		return 1;

	if (type == TRANS_START &&
	    !atomic_read(&root->fs_info->open_ioctl_trans))
293
		return 1;
Josef Bacik's avatar
Josef Bacik committed
294

295
296
297
	return 0;
}

298
static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
299
300
						    u64 num_items, int type,
						    int noflush)
Chris Mason's avatar
Chris Mason committed
301
{
302
303
	struct btrfs_trans_handle *h;
	struct btrfs_transaction *cur_trans;
304
	u64 num_bytes = 0;
Chris Mason's avatar
Chris Mason committed
305
	int ret;
306
	u64 qgroup_reserved = 0;
307
308
309

	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
		return ERR_PTR(-EROFS);
310
311
312
313
314
315
316
317
318

	if (current->journal_info) {
		WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
		h = current->journal_info;
		h->use_count++;
		h->orig_rsv = h->block_rsv;
		h->block_rsv = NULL;
		goto got_it;
	}
319
320
321
322
323
324

	/*
	 * Do the reservation before we join the transaction so we can do all
	 * the appropriate flushing if need be.
	 */
	if (num_items > 0 && root != root->fs_info->chunk_root) {
325
326
327
328
329
330
331
332
		if (root->fs_info->quota_enabled &&
		    is_fstree(root->root_key.objectid)) {
			qgroup_reserved = num_items * root->leafsize;
			ret = btrfs_qgroup_reserve(root, qgroup_reserved);
			if (ret)
				return ERR_PTR(ret);
		}

333
		num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
334
335
336
337
338
339
340
341
		if (noflush)
			ret = btrfs_block_rsv_add_noflush(root,
						&root->fs_info->trans_block_rsv,
						num_bytes);
		else
			ret = btrfs_block_rsv_add(root,
						&root->fs_info->trans_block_rsv,
						num_bytes);
342
343
344
		if (ret)
			return ERR_PTR(ret);
	}
345
346
347
348
again:
	h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
	if (!h)
		return ERR_PTR(-ENOMEM);
Chris Mason's avatar
Chris Mason committed
349

350
351
352
353
354
355
	/*
	 * If we are JOIN_NOLOCK we're already committing a transaction and
	 * waiting on this guy, so we don't need to do the sb_start_intwrite
	 * because we're already holding a ref.  We need this because we could
	 * have raced in and did an fsync() on a file which can kick a commit
	 * and then we deadlock with somebody doing a freeze.
356
357
358
	 *
	 * If we are ATTACH, it means we just want to catch the current
	 * transaction and commit it, so we needn't do sb_start_intwrite(). 
359
	 */
360
	if (type < TRANS_JOIN_NOLOCK)
361
		sb_start_intwrite(root->fs_info->sb);
362

363
	if (may_wait_transaction(root, type))
Chris Mason's avatar
Chris Mason committed
364
		wait_current_trans(root);
365

Josef Bacik's avatar
Josef Bacik committed
366
	do {
367
		ret = join_transaction(root, type);
Josef Bacik's avatar
Josef Bacik committed
368
369
370
371
		if (ret == -EBUSY)
			wait_current_trans(root);
	} while (ret == -EBUSY);

Tsutomu Itoh's avatar
Tsutomu Itoh committed
372
	if (ret < 0) {
373
374
375
376
377
		/* We must get the transaction if we are JOIN_NOLOCK. */
		BUG_ON(type == TRANS_JOIN_NOLOCK);

		if (type < TRANS_JOIN_NOLOCK)
			sb_end_intwrite(root->fs_info->sb);
378
		kmem_cache_free(btrfs_trans_handle_cachep, h);
Tsutomu Itoh's avatar
Tsutomu Itoh committed
379
380
		return ERR_PTR(ret);
	}
381

382
383
384
385
	cur_trans = root->fs_info->running_transaction;

	h->transid = cur_trans->transid;
	h->transaction = cur_trans;
Chris Mason's avatar
Chris Mason committed
386
	h->blocks_used = 0;
387
	h->bytes_reserved = 0;
388
	h->root = root;
389
	h->delayed_ref_updates = 0;
390
	h->use_count = 1;
391
	h->adding_csums = 0;
392
	h->block_rsv = NULL;
393
	h->orig_rsv = NULL;
394
	h->aborted = 0;
395
	h->qgroup_reserved = qgroup_reserved;
396
	h->delayed_ref_elem.seq = 0;
397
	h->type = type;
398
	INIT_LIST_HEAD(&h->qgroup_ref_list);
399
	INIT_LIST_HEAD(&h->new_bgs);
400

401
402
403
404
405
406
	smp_mb();
	if (cur_trans->blocked && may_wait_transaction(root, type)) {
		btrfs_commit_transaction(h, root);
		goto again;
	}

407
	if (num_bytes) {
Josef Bacik's avatar
Josef Bacik committed
408
		trace_btrfs_space_reservation(root->fs_info, "transaction",
409
					      h->transid, num_bytes, 1);
410
411
		h->block_rsv = &root->fs_info->trans_block_rsv;
		h->bytes_reserved = num_bytes;
412
	}
Josef Bacik's avatar
Josef Bacik committed
413

414
got_it:
Josef Bacik's avatar
Josef Bacik committed
415
	btrfs_record_root_in_trans(h, root);
416
417
418

	if (!current->journal_info && type != TRANS_USERSPACE)
		current->journal_info = h;
Chris Mason's avatar
Chris Mason committed
419
420
421
	return h;
}

422
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
423
						   int num_items)
424
{
425
	return start_transaction(root, num_items, TRANS_START, 0);
426
}
427
428
429
430
431
432
433

struct btrfs_trans_handle *btrfs_start_transaction_noflush(
					struct btrfs_root *root, int num_items)
{
	return start_transaction(root, num_items, TRANS_START, 1);
}

434
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
435
{
436
	return start_transaction(root, 0, TRANS_JOIN, 0);
437
438
}

439
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
440
{
441
	return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0);
442
443
}

444
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
445
{
446
	return start_transaction(root, 0, TRANS_USERSPACE, 0);
447
448
}

449
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
450
{
451
	return start_transaction(root, 0, TRANS_ATTACH, 0);
452
453
}

Chris Mason's avatar
Chris Mason committed
454
/* wait for a transaction commit to be fully complete */
455
static noinline void wait_for_commit(struct btrfs_root *root,
456
457
				    struct btrfs_transaction *commit)
{
Li Zefan's avatar
Li Zefan committed
458
	wait_event(commit->commit_wait, commit->commit_done);
459
460
}

461
462
463
464
465
466
467
468
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
{
	struct btrfs_transaction *cur_trans = NULL, *t;
	int ret;

	ret = 0;
	if (transid) {
		if (transid <= root->fs_info->last_trans_committed)
Josef Bacik's avatar
Josef Bacik committed
469
			goto out;
470
471

		/* find specified transaction */
Josef Bacik's avatar
Josef Bacik committed
472
		spin_lock(&root->fs_info->trans_lock);
473
474
475
		list_for_each_entry(t, &root->fs_info->trans_list, list) {
			if (t->transid == transid) {
				cur_trans = t;
Josef Bacik's avatar
Josef Bacik committed
476
				atomic_inc(&cur_trans->use_count);
477
478
479
480
481
				break;
			}
			if (t->transid > transid)
				break;
		}
Josef Bacik's avatar
Josef Bacik committed
482
		spin_unlock(&root->fs_info->trans_lock);
483
484
		ret = -EINVAL;
		if (!cur_trans)
Josef Bacik's avatar
Josef Bacik committed
485
			goto out;  /* bad transid */
486
487
	} else {
		/* find newest transaction that is committing | committed */
Josef Bacik's avatar
Josef Bacik committed
488
		spin_lock(&root->fs_info->trans_lock);
489
490
491
492
		list_for_each_entry_reverse(t, &root->fs_info->trans_list,
					    list) {
			if (t->in_commit) {
				if (t->commit_done)
493
					break;
494
				cur_trans = t;
Josef Bacik's avatar
Josef Bacik committed
495
				atomic_inc(&cur_trans->use_count);
496
497
498
				break;
			}
		}
Josef Bacik's avatar
Josef Bacik committed
499
		spin_unlock(&root->fs_info->trans_lock);
500
		if (!cur_trans)
Josef Bacik's avatar
Josef Bacik committed
501
			goto out;  /* nothing committing|committed */
502
503
504
505
506
507
	}

	wait_for_commit(root, cur_trans);

	put_transaction(cur_trans);
	ret = 0;
Josef Bacik's avatar
Josef Bacik committed
508
out:
509
510
511
	return ret;
}

Chris Mason's avatar
Chris Mason committed
512
513
void btrfs_throttle(struct btrfs_root *root)
{
Josef Bacik's avatar
Josef Bacik committed
514
	if (!atomic_read(&root->fs_info->open_ioctl_trans))
515
		wait_current_trans(root);
Chris Mason's avatar
Chris Mason committed
516
517
}

518
519
520
521
static int should_end_transaction(struct btrfs_trans_handle *trans,
				  struct btrfs_root *root)
{
	int ret;
522
523

	ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
524
525
526
527
528
529
530
531
	return ret ? 1 : 0;
}

int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root)
{
	struct btrfs_transaction *cur_trans = trans->transaction;
	int updates;
532
	int err;
533

Josef Bacik's avatar
Josef Bacik committed
534
	smp_mb();
535
536
537
538
539
	if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
		return 1;

	updates = trans->delayed_ref_updates;
	trans->delayed_ref_updates = 0;
540
541
542
543
544
	if (updates) {
		err = btrfs_run_delayed_refs(trans, root, updates);
		if (err) /* Error code will also eval true */
			return err;
	}
545
546
547
548

	return should_end_transaction(trans, root);
}

549
static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
550
			  struct btrfs_root *root, int throttle)
Chris Mason's avatar
Chris Mason committed
551
{
552
	struct btrfs_transaction *cur_trans = trans->transaction;
553
	struct btrfs_fs_info *info = root->fs_info;
554
	int count = 0;
555
	int lock = (trans->type != TRANS_JOIN_NOLOCK);
556
	int err = 0;
557

558
559
560
561
562
	if (--trans->use_count) {
		trans->block_rsv = trans->orig_rsv;
		return 0;
	}

563
564
565
566
567
	/*
	 * do the qgroup accounting as early as possible
	 */
	err = btrfs_delayed_refs_qgroup_accounting(trans, info);

568
	btrfs_trans_release_metadata(trans, root);
569
	trans->block_rsv = NULL;
570
571
572
573
574
	/*
	 * the same root has to be passed to start_transaction and
	 * end_transaction. Subvolume quota depends on this.
	 */
	WARN_ON(trans->root != root);
575
576
577
578
579
580

	if (trans->qgroup_reserved) {
		btrfs_qgroup_free(root, trans->qgroup_reserved);
		trans->qgroup_reserved = 0;
	}

581
582
583
	if (!list_empty(&trans->new_bgs))
		btrfs_create_pending_block_groups(trans, root);

584
	while (count < 2) {
585
586
587
588
589
590
591
592
593
594
		unsigned long cur = trans->delayed_ref_updates;
		trans->delayed_ref_updates = 0;
		if (cur &&
		    trans->transaction->delayed_refs.num_heads_ready > 64) {
			trans->delayed_ref_updates = 0;
			btrfs_run_delayed_refs(trans, root, cur);
		} else {
			break;
		}
		count++;
595
	}
596
597
	btrfs_trans_release_metadata(trans, root);
	trans->block_rsv = NULL;
598

599
600
601
	if (!list_empty(&trans->new_bgs))
		btrfs_create_pending_block_groups(trans, root);

Josef Bacik's avatar
Josef Bacik committed
602
603
	if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
	    should_end_transaction(trans, root)) {
604
		trans->transaction->blocked = 1;
Josef Bacik's avatar
Josef Bacik committed
605
606
		smp_wmb();
	}
607

608
	if (lock && cur_trans->blocked && !cur_trans->in_commit) {
609
610
611
612
613
614
615
		if (throttle) {
			/*
			 * We may race with somebody else here so end up having
			 * to call end_transaction on ourselves again, so inc
			 * our use_count.
			 */
			trans->use_count++;
616
			return btrfs_commit_transaction(trans, root);
617
		} else {
618
			wake_up_process(info->transaction_kthread);
619
		}
620
621
	}

622
	if (trans->type < TRANS_JOIN_NOLOCK)
623
		sb_end_intwrite(root->fs_info->sb);
624

625
	WARN_ON(cur_trans != info->running_transaction);
626
627
	WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
	atomic_dec(&cur_trans->num_writers);
628

629
	smp_mb();
Chris Mason's avatar
Chris Mason committed
630
631
632
	if (waitqueue_active(&cur_trans->writer_wait))
		wake_up(&cur_trans->writer_wait);
	put_transaction(cur_trans);
Josef Bacik's avatar
Josef Bacik committed
633
634
635

	if (current->journal_info == trans)
		current->journal_info = NULL;
636

Yan, Zheng's avatar
Yan, Zheng committed
637
638
639
	if (throttle)
		btrfs_run_delayed_iputs(root);

640
641
	if (trans->aborted ||
	    root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
642
		err = -EIO;
643
	}
644
	assert_qgroups_uptodate(trans);
645

646
647
648
	memset(trans, 0, sizeof(*trans));
	kmem_cache_free(btrfs_trans_handle_cachep, trans);
	return err;
Chris Mason's avatar
Chris Mason committed
649
650
}

651
652
653
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root)
{
654
655
	int ret;

656
	ret = __btrfs_end_transaction(trans, root, 0);
657
658
659
	if (ret)
		return ret;
	return 0;
660
661
662
663
664
}

int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
{
665
666
	int ret;

667
	ret = __btrfs_end_transaction(trans, root, 1);
668
669
670
671
672
673
674
675
	if (ret)
		return ret;
	return 0;
}

int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
				struct btrfs_root *root)
{
676
	return __btrfs_end_transaction(trans, root, 1);
677
678
}

Chris Mason's avatar
Chris Mason committed
679
680
681
/*
 * when btree blocks are allocated, they have some corresponding bits set for
 * them in one of two extent_io trees.  This is used to make sure all of
682
 * those extents are sent to disk but does not wait on them
Chris Mason's avatar
Chris Mason committed
683
 */
684
int btrfs_write_marked_extents(struct btrfs_root *root,
685
			       struct extent_io_tree *dirty_pages, int mark)
Chris Mason's avatar
Chris Mason committed
686
{
687
	int err = 0;
688
	int werr = 0;
Josef Bacik's avatar
Josef Bacik committed
689
	struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
690
	struct extent_state *cached_state = NULL;
691
	u64 start = 0;
692
	u64 end;
693

Josef Bacik's avatar
Josef Bacik committed
694
	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
695
696
697
698
				      mark, &cached_state)) {
		convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
				   mark, &cached_state, GFP_NOFS);
		cached_state = NULL;
Josef Bacik's avatar
Josef Bacik committed
699
700
701
702
703
		err = filemap_fdatawrite_range(mapping, start, end);
		if (err)
			werr = err;
		cond_resched();
		start = end + 1;
704
	}
705
706
707
708
709
710
711
712
713
714
715
716
	if (err)
		werr = err;
	return werr;
}

/*
 * when btree blocks are allocated, they have some corresponding bits set for
 * them in one of two extent_io trees.  This is used to make sure all of
 * those extents are on disk for transaction or log commit.  We wait
 * on all the pages and clear them from the dirty pages state tree
 */
int btrfs_wait_marked_extents(struct btrfs_root *root,
717
			      struct extent_io_tree *dirty_pages, int mark)
718
719
720
{
	int err = 0;
	int werr = 0;
Josef Bacik's avatar
Josef Bacik committed
721
	struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
722
	struct extent_state *cached_state = NULL;
723
724
	u64 start = 0;
	u64 end;
725

Josef Bacik's avatar
Josef Bacik committed
726
	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
727
728
729
				      EXTENT_NEED_WAIT, &cached_state)) {
		clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
				 0, 0, &cached_state, GFP_NOFS);
Josef Bacik's avatar
Josef Bacik committed
730
731
732
733
734
		err = filemap_fdatawait_range(mapping, start, end);
		if (err)
			werr = err;
		cond_resched();
		start = end + 1;
735
	}
736
737
738
	if (err)
		werr = err;
	return werr;
Chris Mason's avatar
Chris Mason committed
739
740
}

741
742
743
744
745
746
/*
 * when btree blocks are allocated, they have some corresponding bits set for
 * them in one of two extent_io trees.  This is used to make sure all of
 * those extents are on disk for transaction or log commit
 */
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
747
				struct extent_io_tree *dirty_pages, int mark)
748
749
750
751
{
	int ret;
	int ret2;

752
753
	ret = btrfs_write_marked_extents(root, dirty_pages, mark);
	ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
754
755
756
757
758
759

	if (ret)
		return ret;
	if (ret2)
		return ret2;
	return 0;
760
761
}

762
763
764
765
766
767
768
769
770
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
				     struct btrfs_root *root)
{
	if (!trans || !trans->transaction) {
		struct inode *btree_inode;
		btree_inode = root->fs_info->btree_inode;
		return filemap_write_and_wait(btree_inode->i_mapping);
	}
	return btrfs_write_and_wait_marked_extents(root,
771
772
					   &trans->transaction->dirty_pages,
					   EXTENT_DIRTY);
773
774
}

Chris Mason's avatar
Chris Mason committed
775
776
777
778
779
780
781
782
783
784
/*
 * this is used to update the root pointer in the tree of tree roots.
 *
 * But, in the case of the extent allocation tree, updating the root
 * pointer may allocate blocks which may change the root of the extent
 * allocation tree.
 *
 * So, this loops and repeats and makes sure the cowonly root didn't
 * change while the root pointer was being updated in the metadata.
 */
785
786
static int update_cowonly_root(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root)
Chris Mason's avatar
Chris Mason committed
787
788
{
	int ret;
789
	u64 old_root_bytenr;
790
	u64 old_root_used;
791
	struct btrfs_root *tree_root = root->fs_info->tree_root;
Chris Mason's avatar
Chris Mason committed
792

793
	old_root_used = btrfs_root_used(&root->root_item);
794
	btrfs_write_dirty_block_groups(trans, root);
795

796
	while (1) {
797
		old_root_bytenr = btrfs_root_bytenr(&root->root_item);
798
799
		if (old_root_bytenr == root->node->start &&
		    old_root_used == btrfs_root_used(&root->root_item))
Chris Mason's avatar
Chris Mason committed
800
			break;
801

802
		btrfs_set_root_node(&root->root_item, root->node);
Chris Mason's avatar
Chris Mason committed
803
		ret = btrfs_update_root(trans, tree_root,
804
805
					&root->root_key,
					&root->root_item);
806
807
		if (ret)
			return ret;
808

809
		old_root_used = btrfs_root_used(&root->root_item);
810
		ret = btrfs_write_dirty_block_groups(trans, root);
811
812
		if (ret)
			return ret;
813
	}
814
815
816
817

	if (root != root->fs_info->extent_root)
		switch_commit_root(root);

818
819
820
	return 0;
}

Chris Mason's avatar
Chris Mason committed
821
822
/*
 * update all the cowonly tree roots on disk
823
824
825
826
 *
 * The error handling in this function may not be obvious. Any of the
 * failures will cause the file system to go offline. We still need
 * to clean up the delayed refs.
Chris Mason's avatar
Chris Mason committed
827
 */
828
829
static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
					 struct btrfs_root *root)
830
831
832
{
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct list_head *next;
833
	struct extent_buffer *eb;
834
	int ret;
835

836
	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
837
838
	if (ret)
		return ret;
839

840
	eb = btrfs_lock_root_node(fs_info->tree_root);
841
842
	ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
			      0, &eb);
843
844
	btrfs_tree_unlock(eb);
	free_extent_buffer(eb);
845

846
847
848
	if (ret)
		return ret;

849
	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
850
851
	if (ret)
		return ret;
852

853
854
855
	ret = btrfs_run_dev_stats(trans, root->fs_info);
	BUG_ON(ret);

856
857
858
859
860
861
862
	ret = btrfs_run_qgroups(trans, root->fs_info);
	BUG_ON(ret);

	/* run_qgroups might have added some more refs */
	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
	BUG_ON(ret);

863
	while (!list_empty(&fs_info->dirty_cowonly_roots)) {
864
865
866
		next = fs_info->dirty_cowonly_roots.next;
		list_del_init(next);
		root = list_entry(next, struct btrfs_root, dirty_list);
867

868
869
870
		ret = update_cowonly_root(trans, root);
		if (ret)
			return ret;
Chris Mason's avatar
Chris Mason committed
871
	}
872
873
874
875
876

	down_write(&fs_info->extent_commit_sem);
	switch_commit_root(fs_info->extent_root);
	up_write(&fs_info->extent_commit_sem);

Chris Mason's avatar
Chris Mason committed
877
878
879
	return 0;
}

Chris Mason's avatar
Chris Mason committed
880
881
882
883
884
/*
 * dead roots are old snapshots that need to be deleted.  This allocates
 * a dirty root struct and adds it into the list of dead roots that need to
 * be deleted
 */
885
int btrfs_add_dead_root(struct btrfs_root *root)
886
{
Josef Bacik's avatar
Josef Bacik committed
887
	spin_lock(&root->fs_info->trans_lock);
888
	list_add(&root->root_list, &root->fs_info->dead_roots);
Josef Bacik's avatar
Josef Bacik committed
889
	spin_unlock(&root->fs_info->trans_lock);
890
891
892
	return 0;
}

Chris Mason's avatar
Chris Mason committed
893
/*
894
 * update all the cowonly tree roots on disk
Chris Mason's avatar
Chris Mason committed
895
 */
896
897
static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
				    struct btrfs_root *root)
898
899
{
	struct btrfs_root *gang[8];
900
	struct btrfs_fs_info *fs_info = root->fs_info;
901
902
	int i;
	int ret;
903
904
	int err = 0;

Josef Bacik's avatar
Josef Bacik committed
905
	spin_lock(&fs_info->fs_roots_radix_lock);
906
	while (1) {
907
908
		ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
						 (void **)gang, 0,
909
910
911
912
913
914
						 ARRAY_SIZE(gang),
						 BTRFS_ROOT_TRANS_TAG);
		if (ret == 0)
			break;
		for (i = 0; i < ret; i++) {
			root = gang[i];
915
916
917
			radix_tree_tag_clear(&fs_info->fs_roots_radix,
					(unsigned long)root->root_key.objectid,
					BTRFS_ROOT_TRANS_TAG);
Josef Bacik's avatar
Josef Bacik committed
918
			spin_unlock(&fs_info->fs_roots_radix_lock);
Yan Zheng's avatar
Yan Zheng committed
919

920
			btrfs_free_log(trans, root);
921
			btrfs_update_reloc_root(trans, root);
922
			btrfs_orphan_commit_root(trans, root);
923

924
925
			btrfs_save_ino_cache(root, trans);

926
927
928
929
			/* see comments in should_cow_block() */
			root->force_cow = 0;
			smp_wmb();

930
			if (root->commit_root != root->node) {
931
				mutex_lock(&root->fs_commit_mutex);
Josef Bacik's avatar
Josef Bacik committed
932
				switch_commit_root(root);
933
934
935
				btrfs_unpin_free_ino(root);
				mutex_unlock(&root->fs_commit_mutex);

936
937
938
				btrfs_set_root_node(&root->root_item,
						    root->node);
			}
939
940

			err = btrfs_update_root(trans, fs_info->tree_root,
941
942
						&root->root_key,
						&root->root_item);
Josef Bacik's avatar
Josef Bacik committed
943
			spin_lock(&fs_info->fs_roots_radix_lock);
944
945
			if (err)
				break;
946
947
		}
	}
Josef Bacik's avatar
Josef Bacik committed
948
	spin_unlock(&fs_info->fs_roots_radix_lock);
949
	return err;
950
951
}

Chris Mason's avatar
Chris Mason committed
952
953
954
955
/*
 * defrag a given btree.  If cacheonly == 1, this won't read from the disk,
 * otherwise every leaf in the btree is read and defragged.
 */
956
957
958
959
int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
{
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_trans_handle *trans;
960
	int ret;
961
	unsigned long nr;
962

963
	if (xchg(&root->defrag_running, 1))
964
		return 0;
965

966
	while (1) {
967
968
969
970
		trans = btrfs_start_transaction(root, 0);
		if (IS_ERR(trans))
			return PTR_ERR(trans);