transaction.c 62.9 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Chris Mason's avatar
Chris Mason committed
19
#include <linux/fs.h>
20
#include <linux/slab.h>
Chris Mason's avatar
Chris Mason committed
21
#include <linux/sched.h>
22
#include <linux/writeback.h>
23
#include <linux/pagemap.h>
24
#include <linux/blkdev.h>
25
#include <linux/uuid.h>
Chris Mason's avatar
Chris Mason committed
26
27
28
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
29
#include "locking.h"
30
#include "tree-log.h"
31
#include "inode-map.h"
32
#include "volumes.h"
33
#include "dev-replace.h"
Josef Bacik's avatar
Josef Bacik committed
34
#include "qgroup.h"
Chris Mason's avatar
Chris Mason committed
35

36
37
#define BTRFS_ROOT_TRANS_TAG 0

38
static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
	[TRANS_STATE_RUNNING]		= 0U,
	[TRANS_STATE_BLOCKED]		= (__TRANS_USERSPACE |
					   __TRANS_START),
	[TRANS_STATE_COMMIT_START]	= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH),
	[TRANS_STATE_COMMIT_DOING]	= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN),
	[TRANS_STATE_UNBLOCKED]		= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN |
					   __TRANS_JOIN_NOLOCK),
	[TRANS_STATE_COMPLETED]		= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN |
					   __TRANS_JOIN_NOLOCK),
};

61
void btrfs_put_transaction(struct btrfs_transaction *transaction)
Chris Mason's avatar
Chris Mason committed
62
{
63
64
	WARN_ON(atomic_read(&transaction->use_count) == 0);
	if (atomic_dec_and_test(&transaction->use_count)) {
Josef Bacik's avatar
Josef Bacik committed
65
		BUG_ON(!list_empty(&transaction->list));
Liu Bo's avatar
Liu Bo committed
66
		WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
67
68
69
		if (transaction->delayed_refs.pending_csums)
			printk(KERN_ERR "pending csums is %llu\n",
			       transaction->delayed_refs.pending_csums);
70
71
72
73
74
75
76
77
		while (!list_empty(&transaction->pending_chunks)) {
			struct extent_map *em;

			em = list_first_entry(&transaction->pending_chunks,
					      struct extent_map, list);
			list_del_init(&em->list);
			free_extent_map(em);
		}
Chris Mason's avatar
Chris Mason committed
78
		kmem_cache_free(btrfs_transaction_cachep, transaction);
Chris Mason's avatar
Chris Mason committed
79
	}
Chris Mason's avatar
Chris Mason committed
80
81
}

82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
static void clear_btree_io_tree(struct extent_io_tree *tree)
{
	spin_lock(&tree->lock);
	while (!RB_EMPTY_ROOT(&tree->state)) {
		struct rb_node *node;
		struct extent_state *state;

		node = rb_first(&tree->state);
		state = rb_entry(node, struct extent_state, rb_node);
		rb_erase(&state->rb_node, &tree->state);
		RB_CLEAR_NODE(&state->rb_node);
		/*
		 * btree io trees aren't supposed to have tasks waiting for
		 * changes in the flags of extent states ever.
		 */
		ASSERT(!waitqueue_active(&state->wq));
		free_extent_state(state);
99
100

		cond_resched_lock(&tree->lock);
101
102
103
104
	}
	spin_unlock(&tree->lock);
}

105
106
static noinline void switch_commit_roots(struct btrfs_transaction *trans,
					 struct btrfs_fs_info *fs_info)
Josef Bacik's avatar
Josef Bacik committed
107
{
108
109
110
111
112
113
114
115
116
117
	struct btrfs_root *root, *tmp;

	down_write(&fs_info->commit_root_sem);
	list_for_each_entry_safe(root, tmp, &trans->switch_commits,
				 dirty_list) {
		list_del_init(&root->dirty_list);
		free_extent_buffer(root->commit_root);
		root->commit_root = btrfs_root_node(root);
		if (is_fstree(root->objectid))
			btrfs_unpin_free_ino(root);
118
		clear_btree_io_tree(&root->dirty_log_pages);
119
	}
120
121
122
123
124
125
126
127
128
129
130
131

	/* We can free old roots now. */
	spin_lock(&trans->dropped_roots_lock);
	while (!list_empty(&trans->dropped_roots)) {
		root = list_first_entry(&trans->dropped_roots,
					struct btrfs_root, root_list);
		list_del_init(&root->root_list);
		spin_unlock(&trans->dropped_roots_lock);
		btrfs_drop_and_free_fs_root(fs_info, root);
		spin_lock(&trans->dropped_roots_lock);
	}
	spin_unlock(&trans->dropped_roots_lock);
132
	up_write(&fs_info->commit_root_sem);
Josef Bacik's avatar
Josef Bacik committed
133
134
}

135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
					 unsigned int type)
{
	if (type & TRANS_EXTWRITERS)
		atomic_inc(&trans->num_extwriters);
}

static inline void extwriter_counter_dec(struct btrfs_transaction *trans,
					 unsigned int type)
{
	if (type & TRANS_EXTWRITERS)
		atomic_dec(&trans->num_extwriters);
}

static inline void extwriter_counter_init(struct btrfs_transaction *trans,
					  unsigned int type)
{
	atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0));
}

static inline int extwriter_counter_read(struct btrfs_transaction *trans)
{
	return atomic_read(&trans->num_extwriters);
158
159
}

Chris Mason's avatar
Chris Mason committed
160
161
162
/*
 * either allocate a new transaction or hop into the existing one
 */
163
static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
Chris Mason's avatar
Chris Mason committed
164
165
{
	struct btrfs_transaction *cur_trans;
166
	struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacik's avatar
Josef Bacik committed
167

168
	spin_lock(&fs_info->trans_lock);
169
loop:
170
	/* The file system has been taken offline. No new transactions. */
171
	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
172
		spin_unlock(&fs_info->trans_lock);
173
174
175
		return -EROFS;
	}

176
	cur_trans = fs_info->running_transaction;
Josef Bacik's avatar
Josef Bacik committed
177
	if (cur_trans) {
178
		if (cur_trans->aborted) {
179
			spin_unlock(&fs_info->trans_lock);
180
			return cur_trans->aborted;
181
		}
182
		if (btrfs_blocked_trans_types[cur_trans->state] & type) {
183
184
185
			spin_unlock(&fs_info->trans_lock);
			return -EBUSY;
		}
Josef Bacik's avatar
Josef Bacik committed
186
		atomic_inc(&cur_trans->use_count);
187
		atomic_inc(&cur_trans->num_writers);
188
		extwriter_counter_inc(cur_trans, type);
189
		spin_unlock(&fs_info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
190
		return 0;
Chris Mason's avatar
Chris Mason committed
191
	}
192
	spin_unlock(&fs_info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
193

194
195
196
197
198
199
200
	/*
	 * If we are ATTACH, we just want to catch the current transaction,
	 * and commit it. If there is no transaction, just return ENOENT.
	 */
	if (type == TRANS_ATTACH)
		return -ENOENT;

201
202
203
204
205
206
	/*
	 * JOIN_NOLOCK only happens during the transaction commit, so
	 * it is impossible that ->running_transaction is NULL
	 */
	BUG_ON(type == TRANS_JOIN_NOLOCK);

Josef Bacik's avatar
Josef Bacik committed
207
208
209
	cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
	if (!cur_trans)
		return -ENOMEM;
210

211
212
	spin_lock(&fs_info->trans_lock);
	if (fs_info->running_transaction) {
213
214
		/*
		 * someone started a transaction after we unlocked.  Make sure
215
		 * to redo the checks above
216
		 */
Josef Bacik's avatar
Josef Bacik committed
217
		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
218
		goto loop;
219
	} else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
220
		spin_unlock(&fs_info->trans_lock);
221
222
		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
		return -EROFS;
Chris Mason's avatar
Chris Mason committed
223
	}
224

Josef Bacik's avatar
Josef Bacik committed
225
	atomic_set(&cur_trans->num_writers, 1);
226
	extwriter_counter_init(cur_trans, type);
Josef Bacik's avatar
Josef Bacik committed
227
228
	init_waitqueue_head(&cur_trans->writer_wait);
	init_waitqueue_head(&cur_trans->commit_wait);
229
	cur_trans->state = TRANS_STATE_RUNNING;
Josef Bacik's avatar
Josef Bacik committed
230
231
232
233
234
	/*
	 * One for this trans handle, one so it will live on until we
	 * commit the transaction.
	 */
	atomic_set(&cur_trans->use_count, 2);
Zhao Lei's avatar
Zhao Lei committed
235
	cur_trans->have_free_bgs = 0;
Josef Bacik's avatar
Josef Bacik committed
236
	cur_trans->start_time = get_seconds();
237
	cur_trans->dirty_bg_run = 0;
Josef Bacik's avatar
Josef Bacik committed
238

Liu Bo's avatar
Liu Bo committed
239
	cur_trans->delayed_refs.href_root = RB_ROOT;
240
	cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
241
	atomic_set(&cur_trans->delayed_refs.num_entries, 0);
Josef Bacik's avatar
Josef Bacik committed
242
	cur_trans->delayed_refs.num_heads_ready = 0;
243
	cur_trans->delayed_refs.pending_csums = 0;
Josef Bacik's avatar
Josef Bacik committed
244
245
246
	cur_trans->delayed_refs.num_heads = 0;
	cur_trans->delayed_refs.flushing = 0;
	cur_trans->delayed_refs.run_delayed_start = 0;
247
	cur_trans->delayed_refs.qgroup_to_skip = 0;
248
249
250
251
252
253

	/*
	 * although the tree mod log is per file system and not per transaction,
	 * the log must never go across transaction boundaries.
	 */
	smp_mb();
Julia Lawall's avatar
Julia Lawall committed
254
	if (!list_empty(&fs_info->tree_mod_seq_list))
255
		WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when "
256
			"creating a fresh transaction\n");
Julia Lawall's avatar
Julia Lawall committed
257
	if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
258
		WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when "
259
			"creating a fresh transaction\n");
260
	atomic64_set(&fs_info->tree_mod_seq, 0);
261

Josef Bacik's avatar
Josef Bacik committed
262
263
264
	spin_lock_init(&cur_trans->delayed_refs.lock);

	INIT_LIST_HEAD(&cur_trans->pending_snapshots);
265
	INIT_LIST_HEAD(&cur_trans->pending_chunks);
266
	INIT_LIST_HEAD(&cur_trans->switch_commits);
267
	INIT_LIST_HEAD(&cur_trans->pending_ordered);
268
	INIT_LIST_HEAD(&cur_trans->dirty_bgs);
269
	INIT_LIST_HEAD(&cur_trans->io_bgs);
270
	INIT_LIST_HEAD(&cur_trans->dropped_roots);
271
	mutex_init(&cur_trans->cache_write_mutex);
272
	cur_trans->num_dirty_bgs = 0;
273
	spin_lock_init(&cur_trans->dirty_bgs_lock);
274
275
	INIT_LIST_HEAD(&cur_trans->deleted_bgs);
	spin_lock_init(&cur_trans->deleted_bgs_lock);
276
	spin_lock_init(&cur_trans->dropped_roots_lock);
277
	list_add_tail(&cur_trans->list, &fs_info->trans_list);
Josef Bacik's avatar
Josef Bacik committed
278
	extent_io_tree_init(&cur_trans->dirty_pages,
279
280
281
282
			     fs_info->btree_inode->i_mapping);
	fs_info->generation++;
	cur_trans->transid = fs_info->generation;
	fs_info->running_transaction = cur_trans;
283
	cur_trans->aborted = 0;
284
	spin_unlock(&fs_info->trans_lock);
285

Chris Mason's avatar
Chris Mason committed
286
287
288
	return 0;
}

Chris Mason's avatar
Chris Mason committed
289
/*
290
291
292
293
 * this does all the record keeping required to make sure that a reference
 * counted root is properly recorded in a given transaction.  This is required
 * to make sure the old root from before we joined the transaction is deleted
 * when the transaction commits
Chris Mason's avatar
Chris Mason committed
294
 */
Chris Mason's avatar
Chris Mason committed
295
static int record_root_in_trans(struct btrfs_trans_handle *trans,
Josef Bacik's avatar
Josef Bacik committed
296
			       struct btrfs_root *root)
297
{
298
299
	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
	    root->last_trans < trans->transid) {
300
		WARN_ON(root == root->fs_info->extent_root);
301
302
		WARN_ON(root->commit_root != root->node);

Chris Mason's avatar
Chris Mason committed
303
		/*
304
		 * see below for IN_TRANS_SETUP usage rules
Chris Mason's avatar
Chris Mason committed
305
306
307
		 * we have the reloc mutex held now, so there
		 * is only one writer in this function
		 */
308
		set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
Chris Mason's avatar
Chris Mason committed
309

310
		/* make sure readers find IN_TRANS_SETUP before
Chris Mason's avatar
Chris Mason committed
311
312
313
314
		 * they find our root->last_trans update
		 */
		smp_wmb();

Josef Bacik's avatar
Josef Bacik committed
315
316
317
318
319
		spin_lock(&root->fs_info->fs_roots_radix_lock);
		if (root->last_trans == trans->transid) {
			spin_unlock(&root->fs_info->fs_roots_radix_lock);
			return 0;
		}
320
321
322
		radix_tree_tag_set(&root->fs_info->fs_roots_radix,
			   (unsigned long)root->root_key.objectid,
			   BTRFS_ROOT_TRANS_TAG);
Josef Bacik's avatar
Josef Bacik committed
323
		spin_unlock(&root->fs_info->fs_roots_radix_lock);
Chris Mason's avatar
Chris Mason committed
324
325
326
327
328
329
330
331
332
333
334
335
336
		root->last_trans = trans->transid;

		/* this is pretty tricky.  We don't want to
		 * take the relocation lock in btrfs_record_root_in_trans
		 * unless we're really doing the first setup for this root in
		 * this transaction.
		 *
		 * Normally we'd use root->last_trans as a flag to decide
		 * if we want to take the expensive mutex.
		 *
		 * But, we have to set root->last_trans before we
		 * init the relocation root, otherwise, we trip over warnings
		 * in ctree.c.  The solution used here is to flag ourselves
337
		 * with root IN_TRANS_SETUP.  When this is 1, we're still
Chris Mason's avatar
Chris Mason committed
338
339
340
341
342
343
344
		 * fixing up the reloc trees and everyone must wait.
		 *
		 * When this is zero, they can trust root->last_trans and fly
		 * through btrfs_record_root_in_trans without having to take the
		 * lock.  smp_wmb() makes sure that all the writes above are
		 * done before we pop in the zero below
		 */
345
		btrfs_init_reloc_root(trans, root);
346
		smp_mb__before_atomic();
347
		clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
348
349
350
	}
	return 0;
}
351

Chris Mason's avatar
Chris Mason committed
352

353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
			    struct btrfs_root *root)
{
	struct btrfs_transaction *cur_trans = trans->transaction;

	/* Add ourselves to the transaction dropped list */
	spin_lock(&cur_trans->dropped_roots_lock);
	list_add_tail(&root->root_list, &cur_trans->dropped_roots);
	spin_unlock(&cur_trans->dropped_roots_lock);

	/* Make sure we don't try to update the root at commit time */
	spin_lock(&root->fs_info->fs_roots_radix_lock);
	radix_tree_tag_clear(&root->fs_info->fs_roots_radix,
			     (unsigned long)root->root_key.objectid,
			     BTRFS_ROOT_TRANS_TAG);
	spin_unlock(&root->fs_info->fs_roots_radix_lock);
}

Chris Mason's avatar
Chris Mason committed
371
372
373
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root)
{
374
	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
Chris Mason's avatar
Chris Mason committed
375
376
377
		return 0;

	/*
378
	 * see record_root_in_trans for comments about IN_TRANS_SETUP usage
Chris Mason's avatar
Chris Mason committed
379
380
381
382
	 * and barriers
	 */
	smp_rmb();
	if (root->last_trans == trans->transid &&
383
	    !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
Chris Mason's avatar
Chris Mason committed
384
385
386
387
388
389
390
391
392
		return 0;

	mutex_lock(&root->fs_info->reloc_mutex);
	record_root_in_trans(trans, root);
	mutex_unlock(&root->fs_info->reloc_mutex);

	return 0;
}

393
394
395
static inline int is_transaction_blocked(struct btrfs_transaction *trans)
{
	return (trans->state >= TRANS_STATE_BLOCKED &&
396
397
		trans->state < TRANS_STATE_UNBLOCKED &&
		!trans->aborted);
398
399
}

Chris Mason's avatar
Chris Mason committed
400
401
402
403
/* wait for commit against the current transaction to become unblocked
 * when this is done, it is safe to start a new transaction, but the current
 * transaction might not be fully on disk.
 */
Chris Mason's avatar
Chris Mason committed
404
static void wait_current_trans(struct btrfs_root *root)
Chris Mason's avatar
Chris Mason committed
405
{
406
	struct btrfs_transaction *cur_trans;
Chris Mason's avatar
Chris Mason committed
407

Josef Bacik's avatar
Josef Bacik committed
408
	spin_lock(&root->fs_info->trans_lock);
409
	cur_trans = root->fs_info->running_transaction;
410
	if (cur_trans && is_transaction_blocked(cur_trans)) {
411
		atomic_inc(&cur_trans->use_count);
Josef Bacik's avatar
Josef Bacik committed
412
		spin_unlock(&root->fs_info->trans_lock);
Li Zefan's avatar
Li Zefan committed
413
414

		wait_event(root->fs_info->transaction_wait,
415
416
			   cur_trans->state >= TRANS_STATE_UNBLOCKED ||
			   cur_trans->aborted);
417
		btrfs_put_transaction(cur_trans);
Josef Bacik's avatar
Josef Bacik committed
418
419
	} else {
		spin_unlock(&root->fs_info->trans_lock);
420
	}
Chris Mason's avatar
Chris Mason committed
421
422
}

423
424
static int may_wait_transaction(struct btrfs_root *root, int type)
{
Josef Bacik's avatar
Josef Bacik committed
425
426
427
428
429
430
431
432
	if (root->fs_info->log_root_recovering)
		return 0;

	if (type == TRANS_USERSPACE)
		return 1;

	if (type == TRANS_START &&
	    !atomic_read(&root->fs_info->open_ioctl_trans))
433
		return 1;
Josef Bacik's avatar
Josef Bacik committed
434

435
436
437
	return 0;
}

438
439
440
static inline bool need_reserve_reloc_root(struct btrfs_root *root)
{
	if (!root->fs_info->reloc_ctl ||
441
	    !test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
442
443
444
445
446
447
448
	    root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
	    root->reloc_root)
		return false;

	return true;
}

449
static struct btrfs_trans_handle *
450
start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
451
		  enum btrfs_reserve_flush_enum flush)
Chris Mason's avatar
Chris Mason committed
452
{
453
454
	struct btrfs_trans_handle *h;
	struct btrfs_transaction *cur_trans;
455
	u64 num_bytes = 0;
456
	u64 qgroup_reserved = 0;
457
458
	bool reloc_reserved = false;
	int ret;
459

460
	/* Send isn't supposed to start transactions. */
461
	ASSERT(current->journal_info != BTRFS_SEND_TRANS_STUB);
462

463
	if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
464
		return ERR_PTR(-EROFS);
465

466
	if (current->journal_info) {
467
		WARN_ON(type & TRANS_EXTWRITERS);
468
469
		h = current->journal_info;
		h->use_count++;
470
		WARN_ON(h->use_count > 2);
471
472
473
474
		h->orig_rsv = h->block_rsv;
		h->block_rsv = NULL;
		goto got_it;
	}
475
476
477
478
479
480

	/*
	 * Do the reservation before we join the transaction so we can do all
	 * the appropriate flushing if need be.
	 */
	if (num_items > 0 && root != root->fs_info->chunk_root) {
481
482
		if (root->fs_info->quota_enabled &&
		    is_fstree(root->root_key.objectid)) {
483
			qgroup_reserved = num_items * root->nodesize;
484
485
486
487
488
			ret = btrfs_qgroup_reserve(root, qgroup_reserved);
			if (ret)
				return ERR_PTR(ret);
		}

489
		num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
490
491
492
		/*
		 * Do the reservation for the relocation root creation
		 */
493
		if (need_reserve_reloc_root(root)) {
494
495
496
497
			num_bytes += root->nodesize;
			reloc_reserved = true;
		}

498
499
500
		ret = btrfs_block_rsv_add(root,
					  &root->fs_info->trans_block_rsv,
					  num_bytes, flush);
501
		if (ret)
502
			goto reserve_fail;
503
	}
504
505
again:
	h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
506
507
508
509
	if (!h) {
		ret = -ENOMEM;
		goto alloc_fail;
	}
Chris Mason's avatar
Chris Mason committed
510

511
512
513
514
515
516
	/*
	 * If we are JOIN_NOLOCK we're already committing a transaction and
	 * waiting on this guy, so we don't need to do the sb_start_intwrite
	 * because we're already holding a ref.  We need this because we could
	 * have raced in and did an fsync() on a file which can kick a commit
	 * and then we deadlock with somebody doing a freeze.
517
518
519
	 *
	 * If we are ATTACH, it means we just want to catch the current
	 * transaction and commit it, so we needn't do sb_start_intwrite(). 
520
	 */
521
	if (type & __TRANS_FREEZABLE)
522
		sb_start_intwrite(root->fs_info->sb);
523

524
	if (may_wait_transaction(root, type))
Chris Mason's avatar
Chris Mason committed
525
		wait_current_trans(root);
526

Josef Bacik's avatar
Josef Bacik committed
527
	do {
528
		ret = join_transaction(root, type);
529
		if (ret == -EBUSY) {
Josef Bacik's avatar
Josef Bacik committed
530
			wait_current_trans(root);
531
532
533
			if (unlikely(type == TRANS_ATTACH))
				ret = -ENOENT;
		}
Josef Bacik's avatar
Josef Bacik committed
534
535
	} while (ret == -EBUSY);

Tsutomu Itoh's avatar
Tsutomu Itoh committed
536
	if (ret < 0) {
537
538
		/* We must get the transaction if we are JOIN_NOLOCK. */
		BUG_ON(type == TRANS_JOIN_NOLOCK);
539
		goto join_fail;
Tsutomu Itoh's avatar
Tsutomu Itoh committed
540
	}
541

542
543
544
545
	cur_trans = root->fs_info->running_transaction;

	h->transid = cur_trans->transid;
	h->transaction = cur_trans;
Chris Mason's avatar
Chris Mason committed
546
	h->blocks_used = 0;
547
	h->bytes_reserved = 0;
548
	h->chunk_bytes_reserved = 0;
549
	h->root = root;
550
	h->delayed_ref_updates = 0;
551
	h->use_count = 1;
552
	h->adding_csums = 0;
553
	h->block_rsv = NULL;
554
	h->orig_rsv = NULL;
555
	h->aborted = 0;
556
	h->qgroup_reserved = 0;
557
	h->delayed_ref_elem.seq = 0;
558
	h->type = type;
559
	h->allocating_chunk = false;
560
	h->reloc_reserved = false;
561
	h->sync = false;
562
	INIT_LIST_HEAD(&h->qgroup_ref_list);
563
	INIT_LIST_HEAD(&h->new_bgs);
564
	INIT_LIST_HEAD(&h->ordered);
565

566
	smp_mb();
567
568
	if (cur_trans->state >= TRANS_STATE_BLOCKED &&
	    may_wait_transaction(root, type)) {
569
		current->journal_info = h;
570
571
572
573
		btrfs_commit_transaction(h, root);
		goto again;
	}

574
	if (num_bytes) {
Josef Bacik's avatar
Josef Bacik committed
575
		trace_btrfs_space_reservation(root->fs_info, "transaction",
576
					      h->transid, num_bytes, 1);
577
578
		h->block_rsv = &root->fs_info->trans_block_rsv;
		h->bytes_reserved = num_bytes;
579
		h->reloc_reserved = reloc_reserved;
580
	}
581
	h->qgroup_reserved = qgroup_reserved;
Josef Bacik's avatar
Josef Bacik committed
582

583
got_it:
Josef Bacik's avatar
Josef Bacik committed
584
	btrfs_record_root_in_trans(h, root);
585
586
587

	if (!current->journal_info && type != TRANS_USERSPACE)
		current->journal_info = h;
Chris Mason's avatar
Chris Mason committed
588
	return h;
589
590

join_fail:
591
	if (type & __TRANS_FREEZABLE)
592
593
594
595
596
597
598
599
600
601
		sb_end_intwrite(root->fs_info->sb);
	kmem_cache_free(btrfs_trans_handle_cachep, h);
alloc_fail:
	if (num_bytes)
		btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
					num_bytes);
reserve_fail:
	if (qgroup_reserved)
		btrfs_qgroup_free(root, qgroup_reserved);
	return ERR_PTR(ret);
Chris Mason's avatar
Chris Mason committed
602
603
}

604
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
605
						   int num_items)
606
{
607
608
	return start_transaction(root, num_items, TRANS_START,
				 BTRFS_RESERVE_FLUSH_ALL);
609
}
610

611
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
612
613
					struct btrfs_root *root, int num_items)
{
614
615
	return start_transaction(root, num_items, TRANS_START,
				 BTRFS_RESERVE_FLUSH_LIMIT);
616
617
}

618
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
619
{
620
	return start_transaction(root, 0, TRANS_JOIN, 0);
621
622
}

623
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
624
{
625
	return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0);
626
627
}

628
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
629
{
630
	return start_transaction(root, 0, TRANS_USERSPACE, 0);
631
632
}

Miao Xie's avatar
Miao Xie committed
633
634
635
636
637
638
639
640
641
642
643
644
645
/*
 * btrfs_attach_transaction() - catch the running transaction
 *
 * It is used when we want to commit the current the transaction, but
 * don't want to start a new one.
 *
 * Note: If this function return -ENOENT, it just means there is no
 * running transaction. But it is possible that the inactive transaction
 * is still in the memory, not fully on disk. If you hope there is no
 * inactive transaction in the fs when -ENOENT is returned, you should
 * invoke
 *     btrfs_attach_transaction_barrier()
 */
646
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
647
{
648
	return start_transaction(root, 0, TRANS_ATTACH, 0);
649
650
}

Miao Xie's avatar
Miao Xie committed
651
/*
652
 * btrfs_attach_transaction_barrier() - catch the running transaction
Miao Xie's avatar
Miao Xie committed
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
 *
 * It is similar to the above function, the differentia is this one
 * will wait for all the inactive transactions until they fully
 * complete.
 */
struct btrfs_trans_handle *
btrfs_attach_transaction_barrier(struct btrfs_root *root)
{
	struct btrfs_trans_handle *trans;

	trans = start_transaction(root, 0, TRANS_ATTACH, 0);
	if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
		btrfs_wait_for_commit(root, 0);

	return trans;
}

Chris Mason's avatar
Chris Mason committed
670
/* wait for a transaction commit to be fully complete */
671
static noinline void wait_for_commit(struct btrfs_root *root,
672
673
				    struct btrfs_transaction *commit)
{
674
	wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED);
675
676
}

677
678
679
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
{
	struct btrfs_transaction *cur_trans = NULL, *t;
680
	int ret = 0;
681
682
683

	if (transid) {
		if (transid <= root->fs_info->last_trans_committed)
Josef Bacik's avatar
Josef Bacik committed
684
			goto out;
685
686

		/* find specified transaction */
Josef Bacik's avatar
Josef Bacik committed
687
		spin_lock(&root->fs_info->trans_lock);
688
689
690
		list_for_each_entry(t, &root->fs_info->trans_list, list) {
			if (t->transid == transid) {
				cur_trans = t;
Josef Bacik's avatar
Josef Bacik committed
691
				atomic_inc(&cur_trans->use_count);
692
				ret = 0;
693
694
				break;
			}
695
696
			if (t->transid > transid) {
				ret = 0;
697
				break;
698
			}
699
		}
Josef Bacik's avatar
Josef Bacik committed
700
		spin_unlock(&root->fs_info->trans_lock);
Sage Weil's avatar
Sage Weil committed
701
702
703
704
705
706
707
708

		/*
		 * The specified transaction doesn't exist, or we
		 * raced with btrfs_commit_transaction
		 */
		if (!cur_trans) {
			if (transid > root->fs_info->last_trans_committed)
				ret = -EINVAL;
709
			goto out;
Sage Weil's avatar
Sage Weil committed
710
		}
711
712
	} else {
		/* find newest transaction that is committing | committed */
Josef Bacik's avatar
Josef Bacik committed
713
		spin_lock(&root->fs_info->trans_lock);
714
715
		list_for_each_entry_reverse(t, &root->fs_info->trans_list,
					    list) {
716
717
			if (t->state >= TRANS_STATE_COMMIT_START) {
				if (t->state == TRANS_STATE_COMPLETED)
718
					break;
719
				cur_trans = t;
Josef Bacik's avatar
Josef Bacik committed
720
				atomic_inc(&cur_trans->use_count);
721
722
723
				break;
			}
		}
Josef Bacik's avatar
Josef Bacik committed
724
		spin_unlock(&root->fs_info->trans_lock);
725
		if (!cur_trans)
Josef Bacik's avatar
Josef Bacik committed
726
			goto out;  /* nothing committing|committed */
727
728
729
	}

	wait_for_commit(root, cur_trans);
730
	btrfs_put_transaction(cur_trans);
Josef Bacik's avatar
Josef Bacik committed
731
out:
732
733
734
	return ret;
}

Chris Mason's avatar
Chris Mason committed
735
736
void btrfs_throttle(struct btrfs_root *root)
{
Josef Bacik's avatar
Josef Bacik committed
737
	if (!atomic_read(&root->fs_info->open_ioctl_trans))
738
		wait_current_trans(root);
Chris Mason's avatar
Chris Mason committed
739
740
}

741
742
743
static int should_end_transaction(struct btrfs_trans_handle *trans,
				  struct btrfs_root *root)
{
744
	if (root->fs_info->global_block_rsv.space_info->full &&
745
	    btrfs_check_space_for_delayed_refs(trans, root))
746
		return 1;
747

748
	return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
749
750
751
752
753
754
755
}

int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root)
{
	struct btrfs_transaction *cur_trans = trans->transaction;
	int updates;
756
	int err;
757

Josef Bacik's avatar
Josef Bacik committed
758
	smp_mb();
759
760
	if (cur_trans->state >= TRANS_STATE_BLOCKED ||
	    cur_trans->delayed_refs.flushing)
761
762
763
764
		return 1;

	updates = trans->delayed_ref_updates;
	trans->delayed_ref_updates = 0;
765
	if (updates) {
766
		err = btrfs_run_delayed_refs(trans, root, updates * 2);
767
768
769
		if (err) /* Error code will also eval true */
			return err;
	}
770
771
772
773

	return should_end_transaction(trans, root);
}

774
static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
775
			  struct btrfs_root *root, int throttle)
Chris Mason's avatar
Chris Mason committed
776
{
777
	struct btrfs_transaction *cur_trans = trans->transaction;
778
	struct btrfs_fs_info *info = root->fs_info;
779
	unsigned long cur = trans->delayed_ref_updates;
780
	int lock = (trans->type != TRANS_JOIN_NOLOCK);
781
	int err = 0;
Chris Mason's avatar
Chris Mason committed
782
	int must_run_delayed_refs = 0;
783

784
785
	if (trans->use_count > 1) {
		trans->use_count--;
786
787
788
789
		trans->block_rsv = trans->orig_rsv;
		return 0;
	}

790
	btrfs_trans_release_metadata(trans, root);
791
	trans->block_rsv = NULL;
792

793
794
795
	if (!list_empty(&trans->new_bgs))
		btrfs_create_pending_block_groups(trans, root);

796
797
	if (!list_empty(&trans->ordered)) {
		spin_lock(&info->trans_lock);
798
		list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
799
800
801
		spin_unlock(&info->trans_lock);
	}

802
	trans->delayed_ref_updates = 0;
Chris Mason's avatar
Chris Mason committed
803
804
805
	if (!trans->sync) {
		must_run_delayed_refs =
			btrfs_should_throttle_delayed_refs(trans, root);
806
		cur = max_t(unsigned long, cur, 32);
Chris Mason's avatar
Chris Mason committed
807
808
809
810
811
812
813
814

		/*
		 * don't make the caller wait if they are from a NOLOCK
		 * or ATTACH transaction, it will deadlock with commit
		 */
		if (must_run_delayed_refs == 1 &&
		    (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH)))
			must_run_delayed_refs = 2;
815
	}
816

Josef Bacik's avatar
Josef Bacik committed
817
818
819
820
821
822
823
824
825
	if (trans->qgroup_reserved) {
		/*
		 * the same root has to be passed here between start_transaction
		 * and end_transaction. Subvolume quota depends on this.
		 */
		btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
		trans->qgroup_reserved = 0;
	}

826
827
	btrfs_trans_release_metadata(trans, root);
	trans->block_rsv = NULL;
828

829
830
831
	if (!list_empty(&trans->new_bgs))
		btrfs_create_pending_block_groups(trans, root);

832
833
	btrfs_trans_release_chunk_metadata(trans);

Josef Bacik's avatar
Josef Bacik committed
834
	if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
835
836
837
838
839
840
	    should_end_transaction(trans, root) &&
	    ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
		spin_lock(&info->trans_lock);
		if (cur_trans->state == TRANS_STATE_RUNNING)
			cur_trans->state = TRANS_STATE_BLOCKED;
		spin_unlock(&info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
841
	}
842

843
	if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
844
		if (throttle)
845
			return btrfs_commit_transaction(trans, root);
846
		else
847
848
849
			wake_up_process(info->transaction_kthread);
	}

850
	if (trans->type & __TRANS_FREEZABLE)
851
		sb_end_intwrite(root->fs_info->sb);
852

853
	WARN_ON(cur_trans != info->running_transaction);
854
855
	WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
	atomic_dec(&cur_trans->num_writers);
856
	extwriter_counter_dec(cur_trans, trans->type);
857

858
	smp_mb();
Chris Mason's avatar
Chris Mason committed
859
860
	if (waitqueue_active(&cur_trans->writer_wait))
		wake_up(&cur_trans->writer_wait);
861
	btrfs_put_transaction(cur_trans);
Josef Bacik's avatar
Josef Bacik committed
862
863
864

	if (current->journal_info == trans)
		current->journal_info = NULL;
865

Yan, Zheng's avatar
Yan, Zheng committed
866
867
868
	if (throttle)
		btrfs_run_delayed_iputs(root);

869
	if (trans->aborted ||
870
871
	    test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
		wake_up_process(info->transaction_kthread);
872
		err = -EIO;
873
	}
874
	assert_qgroups_uptodate(trans);
875

876
	kmem_cache_free(btrfs_trans_handle_cachep, trans);
Chris Mason's avatar
Chris Mason committed
877
878
879
880
	if (must_run_delayed_refs) {
		btrfs_async_run_delayed_refs(root, cur,
					     must_run_delayed_refs == 1);
	}
881
	return err;
Chris Mason's avatar
Chris Mason committed
882
883
}

884
885
886
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root)
{
887
	return __btrfs_end_transaction(trans, root, 0);
888
889
890
891
892
}

int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
{
893
	return __btrfs_end_transaction(trans, root, 1);
894
895
}

Chris Mason's avatar
Chris Mason committed
896
897
898
/*
 * when btree blocks are allocated, they have some corresponding bits set for
 * them in one of two extent_io trees.  This is used to make sure all of
899
 * those extents are sent to disk but does not wait on them
Chris Mason's avatar
Chris Mason committed
900
 */
901
int btrfs_write_marked_extents(struct btrfs_root *root,
902
			       struct extent_io_tree *dirty_pages, int mark)
Chris Mason's avatar
Chris Mason committed
903
{
904
	int err = 0;
905
	int werr = 0;
Josef Bacik's avatar
Josef Bacik committed