transaction.c 62.2 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Chris Mason's avatar
Chris Mason committed
19
#include <linux/fs.h>
20
#include <linux/slab.h>
Chris Mason's avatar
Chris Mason committed
21
#include <linux/sched.h>
22
#include <linux/writeback.h>
23
#include <linux/pagemap.h>
24
#include <linux/blkdev.h>
25
#include <linux/uuid.h>
Chris Mason's avatar
Chris Mason committed
26
27
28
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
29
#include "locking.h"
30
#include "tree-log.h"
31
#include "inode-map.h"
32
#include "volumes.h"
33
#include "dev-replace.h"
Josef Bacik's avatar
Josef Bacik committed
34
#include "qgroup.h"
Chris Mason's avatar
Chris Mason committed
35

36
37
#define BTRFS_ROOT_TRANS_TAG 0

38
static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
	[TRANS_STATE_RUNNING]		= 0U,
	[TRANS_STATE_BLOCKED]		= (__TRANS_USERSPACE |
					   __TRANS_START),
	[TRANS_STATE_COMMIT_START]	= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH),
	[TRANS_STATE_COMMIT_DOING]	= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN),
	[TRANS_STATE_UNBLOCKED]		= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN |
					   __TRANS_JOIN_NOLOCK),
	[TRANS_STATE_COMPLETED]		= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN |
					   __TRANS_JOIN_NOLOCK),
};

61
void btrfs_put_transaction(struct btrfs_transaction *transaction)
Chris Mason's avatar
Chris Mason committed
62
{
63
64
	WARN_ON(atomic_read(&transaction->use_count) == 0);
	if (atomic_dec_and_test(&transaction->use_count)) {
Josef Bacik's avatar
Josef Bacik committed
65
		BUG_ON(!list_empty(&transaction->list));
Liu Bo's avatar
Liu Bo committed
66
		WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
67
68
69
		if (transaction->delayed_refs.pending_csums)
			printk(KERN_ERR "pending csums is %llu\n",
			       transaction->delayed_refs.pending_csums);
70
71
72
73
74
75
76
77
		while (!list_empty(&transaction->pending_chunks)) {
			struct extent_map *em;

			em = list_first_entry(&transaction->pending_chunks,
					      struct extent_map, list);
			list_del_init(&em->list);
			free_extent_map(em);
		}
Chris Mason's avatar
Chris Mason committed
78
		kmem_cache_free(btrfs_transaction_cachep, transaction);
Chris Mason's avatar
Chris Mason committed
79
	}
Chris Mason's avatar
Chris Mason committed
80
81
}

82
83
84
static void clear_btree_io_tree(struct extent_io_tree *tree)
{
	spin_lock(&tree->lock);
85
86
87
88
89
90
	/*
	 * Do a single barrier for the waitqueue_active check here, the state
	 * of the waitqueue should not change once clear_btree_io_tree is
	 * called.
	 */
	smp_mb();
91
92
93
94
95
96
97
98
99
100
101
102
103
104
	while (!RB_EMPTY_ROOT(&tree->state)) {
		struct rb_node *node;
		struct extent_state *state;

		node = rb_first(&tree->state);
		state = rb_entry(node, struct extent_state, rb_node);
		rb_erase(&state->rb_node, &tree->state);
		RB_CLEAR_NODE(&state->rb_node);
		/*
		 * btree io trees aren't supposed to have tasks waiting for
		 * changes in the flags of extent states ever.
		 */
		ASSERT(!waitqueue_active(&state->wq));
		free_extent_state(state);
105
106

		cond_resched_lock(&tree->lock);
107
108
109
110
	}
	spin_unlock(&tree->lock);
}

111
112
static noinline void switch_commit_roots(struct btrfs_transaction *trans,
					 struct btrfs_fs_info *fs_info)
Josef Bacik's avatar
Josef Bacik committed
113
{
114
115
116
117
118
119
120
121
122
123
	struct btrfs_root *root, *tmp;

	down_write(&fs_info->commit_root_sem);
	list_for_each_entry_safe(root, tmp, &trans->switch_commits,
				 dirty_list) {
		list_del_init(&root->dirty_list);
		free_extent_buffer(root->commit_root);
		root->commit_root = btrfs_root_node(root);
		if (is_fstree(root->objectid))
			btrfs_unpin_free_ino(root);
124
		clear_btree_io_tree(&root->dirty_log_pages);
125
	}
126
127
128
129
130
131
132
133
134
135
136
137

	/* We can free old roots now. */
	spin_lock(&trans->dropped_roots_lock);
	while (!list_empty(&trans->dropped_roots)) {
		root = list_first_entry(&trans->dropped_roots,
					struct btrfs_root, root_list);
		list_del_init(&root->root_list);
		spin_unlock(&trans->dropped_roots_lock);
		btrfs_drop_and_free_fs_root(fs_info, root);
		spin_lock(&trans->dropped_roots_lock);
	}
	spin_unlock(&trans->dropped_roots_lock);
138
	up_write(&fs_info->commit_root_sem);
Josef Bacik's avatar
Josef Bacik committed
139
140
}

141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
					 unsigned int type)
{
	if (type & TRANS_EXTWRITERS)
		atomic_inc(&trans->num_extwriters);
}

static inline void extwriter_counter_dec(struct btrfs_transaction *trans,
					 unsigned int type)
{
	if (type & TRANS_EXTWRITERS)
		atomic_dec(&trans->num_extwriters);
}

static inline void extwriter_counter_init(struct btrfs_transaction *trans,
					  unsigned int type)
{
	atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0));
}

static inline int extwriter_counter_read(struct btrfs_transaction *trans)
{
	return atomic_read(&trans->num_extwriters);
164
165
}

Chris Mason's avatar
Chris Mason committed
166
167
168
/*
 * either allocate a new transaction or hop into the existing one
 */
169
static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
Chris Mason's avatar
Chris Mason committed
170
171
{
	struct btrfs_transaction *cur_trans;
172
	struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacik's avatar
Josef Bacik committed
173

174
	spin_lock(&fs_info->trans_lock);
175
loop:
176
	/* The file system has been taken offline. No new transactions. */
177
	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
178
		spin_unlock(&fs_info->trans_lock);
179
180
181
		return -EROFS;
	}

182
	cur_trans = fs_info->running_transaction;
Josef Bacik's avatar
Josef Bacik committed
183
	if (cur_trans) {
184
		if (cur_trans->aborted) {
185
			spin_unlock(&fs_info->trans_lock);
186
			return cur_trans->aborted;
187
		}
188
		if (btrfs_blocked_trans_types[cur_trans->state] & type) {
189
190
191
			spin_unlock(&fs_info->trans_lock);
			return -EBUSY;
		}
Josef Bacik's avatar
Josef Bacik committed
192
		atomic_inc(&cur_trans->use_count);
193
		atomic_inc(&cur_trans->num_writers);
194
		extwriter_counter_inc(cur_trans, type);
195
		spin_unlock(&fs_info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
196
		return 0;
Chris Mason's avatar
Chris Mason committed
197
	}
198
	spin_unlock(&fs_info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
199

200
201
202
203
204
205
206
	/*
	 * If we are ATTACH, we just want to catch the current transaction,
	 * and commit it. If there is no transaction, just return ENOENT.
	 */
	if (type == TRANS_ATTACH)
		return -ENOENT;

207
208
209
210
211
212
	/*
	 * JOIN_NOLOCK only happens during the transaction commit, so
	 * it is impossible that ->running_transaction is NULL
	 */
	BUG_ON(type == TRANS_JOIN_NOLOCK);

Josef Bacik's avatar
Josef Bacik committed
213
214
215
	cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
	if (!cur_trans)
		return -ENOMEM;
216

217
218
	spin_lock(&fs_info->trans_lock);
	if (fs_info->running_transaction) {
219
220
		/*
		 * someone started a transaction after we unlocked.  Make sure
221
		 * to redo the checks above
222
		 */
Josef Bacik's avatar
Josef Bacik committed
223
		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
224
		goto loop;
225
	} else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
226
		spin_unlock(&fs_info->trans_lock);
227
228
		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
		return -EROFS;
Chris Mason's avatar
Chris Mason committed
229
	}
230

Josef Bacik's avatar
Josef Bacik committed
231
	atomic_set(&cur_trans->num_writers, 1);
232
	extwriter_counter_init(cur_trans, type);
Josef Bacik's avatar
Josef Bacik committed
233
234
	init_waitqueue_head(&cur_trans->writer_wait);
	init_waitqueue_head(&cur_trans->commit_wait);
235
	init_waitqueue_head(&cur_trans->pending_wait);
236
	cur_trans->state = TRANS_STATE_RUNNING;
Josef Bacik's avatar
Josef Bacik committed
237
238
239
240
241
	/*
	 * One for this trans handle, one so it will live on until we
	 * commit the transaction.
	 */
	atomic_set(&cur_trans->use_count, 2);
242
	atomic_set(&cur_trans->pending_ordered, 0);
243
	cur_trans->flags = 0;
Josef Bacik's avatar
Josef Bacik committed
244
245
	cur_trans->start_time = get_seconds();

246
247
	memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));

Liu Bo's avatar
Liu Bo committed
248
	cur_trans->delayed_refs.href_root = RB_ROOT;
249
	cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
250
	atomic_set(&cur_trans->delayed_refs.num_entries, 0);
251
252
253
254
255
256

	/*
	 * although the tree mod log is per file system and not per transaction,
	 * the log must never go across transaction boundaries.
	 */
	smp_mb();
Julia Lawall's avatar
Julia Lawall committed
257
	if (!list_empty(&fs_info->tree_mod_seq_list))
258
		WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when "
259
			"creating a fresh transaction\n");
Julia Lawall's avatar
Julia Lawall committed
260
	if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
261
		WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when "
262
			"creating a fresh transaction\n");
263
	atomic64_set(&fs_info->tree_mod_seq, 0);
264

Josef Bacik's avatar
Josef Bacik committed
265
266
267
	spin_lock_init(&cur_trans->delayed_refs.lock);

	INIT_LIST_HEAD(&cur_trans->pending_snapshots);
268
	INIT_LIST_HEAD(&cur_trans->pending_chunks);
269
	INIT_LIST_HEAD(&cur_trans->switch_commits);
270
	INIT_LIST_HEAD(&cur_trans->dirty_bgs);
271
	INIT_LIST_HEAD(&cur_trans->io_bgs);
272
	INIT_LIST_HEAD(&cur_trans->dropped_roots);
273
	mutex_init(&cur_trans->cache_write_mutex);
274
	cur_trans->num_dirty_bgs = 0;
275
	spin_lock_init(&cur_trans->dirty_bgs_lock);
276
	INIT_LIST_HEAD(&cur_trans->deleted_bgs);
277
	spin_lock_init(&cur_trans->dropped_roots_lock);
278
	list_add_tail(&cur_trans->list, &fs_info->trans_list);
Josef Bacik's avatar
Josef Bacik committed
279
	extent_io_tree_init(&cur_trans->dirty_pages,
280
281
282
283
			     fs_info->btree_inode->i_mapping);
	fs_info->generation++;
	cur_trans->transid = fs_info->generation;
	fs_info->running_transaction = cur_trans;
284
	cur_trans->aborted = 0;
285
	spin_unlock(&fs_info->trans_lock);
286

Chris Mason's avatar
Chris Mason committed
287
288
289
	return 0;
}

Chris Mason's avatar
Chris Mason committed
290
/*
291
292
293
294
 * this does all the record keeping required to make sure that a reference
 * counted root is properly recorded in a given transaction.  This is required
 * to make sure the old root from before we joined the transaction is deleted
 * when the transaction commits
Chris Mason's avatar
Chris Mason committed
295
 */
Chris Mason's avatar
Chris Mason committed
296
static int record_root_in_trans(struct btrfs_trans_handle *trans,
Josef Bacik's avatar
Josef Bacik committed
297
			       struct btrfs_root *root)
298
{
299
300
	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
	    root->last_trans < trans->transid) {
301
		WARN_ON(root == root->fs_info->extent_root);
302
303
		WARN_ON(root->commit_root != root->node);

Chris Mason's avatar
Chris Mason committed
304
		/*
305
		 * see below for IN_TRANS_SETUP usage rules
Chris Mason's avatar
Chris Mason committed
306
307
308
		 * we have the reloc mutex held now, so there
		 * is only one writer in this function
		 */
309
		set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
Chris Mason's avatar
Chris Mason committed
310

311
		/* make sure readers find IN_TRANS_SETUP before
Chris Mason's avatar
Chris Mason committed
312
313
314
315
		 * they find our root->last_trans update
		 */
		smp_wmb();

Josef Bacik's avatar
Josef Bacik committed
316
317
318
319
320
		spin_lock(&root->fs_info->fs_roots_radix_lock);
		if (root->last_trans == trans->transid) {
			spin_unlock(&root->fs_info->fs_roots_radix_lock);
			return 0;
		}
321
322
323
		radix_tree_tag_set(&root->fs_info->fs_roots_radix,
			   (unsigned long)root->root_key.objectid,
			   BTRFS_ROOT_TRANS_TAG);
Josef Bacik's avatar
Josef Bacik committed
324
		spin_unlock(&root->fs_info->fs_roots_radix_lock);
Chris Mason's avatar
Chris Mason committed
325
326
327
328
329
330
331
332
333
334
335
336
337
		root->last_trans = trans->transid;

		/* this is pretty tricky.  We don't want to
		 * take the relocation lock in btrfs_record_root_in_trans
		 * unless we're really doing the first setup for this root in
		 * this transaction.
		 *
		 * Normally we'd use root->last_trans as a flag to decide
		 * if we want to take the expensive mutex.
		 *
		 * But, we have to set root->last_trans before we
		 * init the relocation root, otherwise, we trip over warnings
		 * in ctree.c.  The solution used here is to flag ourselves
338
		 * with root IN_TRANS_SETUP.  When this is 1, we're still
Chris Mason's avatar
Chris Mason committed
339
340
341
342
343
344
345
		 * fixing up the reloc trees and everyone must wait.
		 *
		 * When this is zero, they can trust root->last_trans and fly
		 * through btrfs_record_root_in_trans without having to take the
		 * lock.  smp_wmb() makes sure that all the writes above are
		 * done before we pop in the zero below
		 */
346
		btrfs_init_reloc_root(trans, root);
347
		smp_mb__before_atomic();
348
		clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
349
350
351
	}
	return 0;
}
352

Chris Mason's avatar
Chris Mason committed
353

354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
			    struct btrfs_root *root)
{
	struct btrfs_transaction *cur_trans = trans->transaction;

	/* Add ourselves to the transaction dropped list */
	spin_lock(&cur_trans->dropped_roots_lock);
	list_add_tail(&root->root_list, &cur_trans->dropped_roots);
	spin_unlock(&cur_trans->dropped_roots_lock);

	/* Make sure we don't try to update the root at commit time */
	spin_lock(&root->fs_info->fs_roots_radix_lock);
	radix_tree_tag_clear(&root->fs_info->fs_roots_radix,
			     (unsigned long)root->root_key.objectid,
			     BTRFS_ROOT_TRANS_TAG);
	spin_unlock(&root->fs_info->fs_roots_radix_lock);
}

Chris Mason's avatar
Chris Mason committed
372
373
374
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root)
{
375
	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
Chris Mason's avatar
Chris Mason committed
376
377
378
		return 0;

	/*
379
	 * see record_root_in_trans for comments about IN_TRANS_SETUP usage
Chris Mason's avatar
Chris Mason committed
380
381
382
383
	 * and barriers
	 */
	smp_rmb();
	if (root->last_trans == trans->transid &&
384
	    !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
Chris Mason's avatar
Chris Mason committed
385
386
387
388
389
390
391
392
393
		return 0;

	mutex_lock(&root->fs_info->reloc_mutex);
	record_root_in_trans(trans, root);
	mutex_unlock(&root->fs_info->reloc_mutex);

	return 0;
}

394
395
396
static inline int is_transaction_blocked(struct btrfs_transaction *trans)
{
	return (trans->state >= TRANS_STATE_BLOCKED &&
397
398
		trans->state < TRANS_STATE_UNBLOCKED &&
		!trans->aborted);
399
400
}

Chris Mason's avatar
Chris Mason committed
401
402
403
404
/* wait for commit against the current transaction to become unblocked
 * when this is done, it is safe to start a new transaction, but the current
 * transaction might not be fully on disk.
 */
Chris Mason's avatar
Chris Mason committed
405
static void wait_current_trans(struct btrfs_root *root)
Chris Mason's avatar
Chris Mason committed
406
{
407
	struct btrfs_transaction *cur_trans;
Chris Mason's avatar
Chris Mason committed
408

Josef Bacik's avatar
Josef Bacik committed
409
	spin_lock(&root->fs_info->trans_lock);
410
	cur_trans = root->fs_info->running_transaction;
411
	if (cur_trans && is_transaction_blocked(cur_trans)) {
412
		atomic_inc(&cur_trans->use_count);
Josef Bacik's avatar
Josef Bacik committed
413
		spin_unlock(&root->fs_info->trans_lock);
Li Zefan's avatar
Li Zefan committed
414
415

		wait_event(root->fs_info->transaction_wait,
416
417
			   cur_trans->state >= TRANS_STATE_UNBLOCKED ||
			   cur_trans->aborted);
418
		btrfs_put_transaction(cur_trans);
Josef Bacik's avatar
Josef Bacik committed
419
420
	} else {
		spin_unlock(&root->fs_info->trans_lock);
421
	}
Chris Mason's avatar
Chris Mason committed
422
423
}

424
425
static int may_wait_transaction(struct btrfs_root *root, int type)
{
Josef Bacik's avatar
Josef Bacik committed
426
427
428
429
430
431
432
433
	if (root->fs_info->log_root_recovering)
		return 0;

	if (type == TRANS_USERSPACE)
		return 1;

	if (type == TRANS_START &&
	    !atomic_read(&root->fs_info->open_ioctl_trans))
434
		return 1;
Josef Bacik's avatar
Josef Bacik committed
435

436
437
438
	return 0;
}

439
440
441
static inline bool need_reserve_reloc_root(struct btrfs_root *root)
{
	if (!root->fs_info->reloc_ctl ||
442
	    !test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
443
444
445
446
447
448
449
	    root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
	    root->reloc_root)
		return false;

	return true;
}

450
static struct btrfs_trans_handle *
451
452
start_transaction(struct btrfs_root *root, unsigned int num_items,
		  unsigned int type, enum btrfs_reserve_flush_enum flush)
Chris Mason's avatar
Chris Mason committed
453
{
454
455
	struct btrfs_trans_handle *h;
	struct btrfs_transaction *cur_trans;
456
	u64 num_bytes = 0;
457
	u64 qgroup_reserved = 0;
458
459
	bool reloc_reserved = false;
	int ret;
460

461
	/* Send isn't supposed to start transactions. */
462
	ASSERT(current->journal_info != BTRFS_SEND_TRANS_STUB);
463

464
	if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
465
		return ERR_PTR(-EROFS);
466

467
	if (current->journal_info) {
468
		WARN_ON(type & TRANS_EXTWRITERS);
469
470
		h = current->journal_info;
		h->use_count++;
471
		WARN_ON(h->use_count > 2);
472
473
474
475
		h->orig_rsv = h->block_rsv;
		h->block_rsv = NULL;
		goto got_it;
	}
476
477
478
479
480
481

	/*
	 * Do the reservation before we join the transaction so we can do all
	 * the appropriate flushing if need be.
	 */
	if (num_items > 0 && root != root->fs_info->chunk_root) {
482
483
484
485
		qgroup_reserved = num_items * root->nodesize;
		ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved);
		if (ret)
			return ERR_PTR(ret);
486

487
		num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
488
489
490
		/*
		 * Do the reservation for the relocation root creation
		 */
491
		if (need_reserve_reloc_root(root)) {
492
493
494
495
			num_bytes += root->nodesize;
			reloc_reserved = true;
		}

496
497
498
		ret = btrfs_block_rsv_add(root,
					  &root->fs_info->trans_block_rsv,
					  num_bytes, flush);
499
		if (ret)
500
			goto reserve_fail;
501
	}
502
again:
503
	h = kmem_cache_zalloc(btrfs_trans_handle_cachep, GFP_NOFS);
504
505
506
507
	if (!h) {
		ret = -ENOMEM;
		goto alloc_fail;
	}
Chris Mason's avatar
Chris Mason committed
508

509
510
511
512
513
514
	/*
	 * If we are JOIN_NOLOCK we're already committing a transaction and
	 * waiting on this guy, so we don't need to do the sb_start_intwrite
	 * because we're already holding a ref.  We need this because we could
	 * have raced in and did an fsync() on a file which can kick a commit
	 * and then we deadlock with somebody doing a freeze.
515
516
517
	 *
	 * If we are ATTACH, it means we just want to catch the current
	 * transaction and commit it, so we needn't do sb_start_intwrite(). 
518
	 */
519
	if (type & __TRANS_FREEZABLE)
520
		sb_start_intwrite(root->fs_info->sb);
521

522
	if (may_wait_transaction(root, type))
Chris Mason's avatar
Chris Mason committed
523
		wait_current_trans(root);
524

Josef Bacik's avatar
Josef Bacik committed
525
	do {
526
		ret = join_transaction(root, type);
527
		if (ret == -EBUSY) {
Josef Bacik's avatar
Josef Bacik committed
528
			wait_current_trans(root);
529
530
531
			if (unlikely(type == TRANS_ATTACH))
				ret = -ENOENT;
		}
Josef Bacik's avatar
Josef Bacik committed
532
533
	} while (ret == -EBUSY);

Tsutomu Itoh's avatar
Tsutomu Itoh committed
534
	if (ret < 0) {
535
536
		/* We must get the transaction if we are JOIN_NOLOCK. */
		BUG_ON(type == TRANS_JOIN_NOLOCK);
537
		goto join_fail;
Tsutomu Itoh's avatar
Tsutomu Itoh committed
538
	}
539

540
541
542
543
	cur_trans = root->fs_info->running_transaction;

	h->transid = cur_trans->transid;
	h->transaction = cur_trans;
544
	h->root = root;
545
	h->use_count = 1;
546

547
	h->type = type;
548
	h->can_flush_pending_bgs = true;
549
	INIT_LIST_HEAD(&h->qgroup_ref_list);
550
	INIT_LIST_HEAD(&h->new_bgs);
551

552
	smp_mb();
553
554
	if (cur_trans->state >= TRANS_STATE_BLOCKED &&
	    may_wait_transaction(root, type)) {
555
		current->journal_info = h;
556
557
558
559
		btrfs_commit_transaction(h, root);
		goto again;
	}

560
	if (num_bytes) {
Josef Bacik's avatar
Josef Bacik committed
561
		trace_btrfs_space_reservation(root->fs_info, "transaction",
562
					      h->transid, num_bytes, 1);
563
564
		h->block_rsv = &root->fs_info->trans_block_rsv;
		h->bytes_reserved = num_bytes;
565
		h->reloc_reserved = reloc_reserved;
566
	}
Josef Bacik's avatar
Josef Bacik committed
567

568
got_it:
Josef Bacik's avatar
Josef Bacik committed
569
	btrfs_record_root_in_trans(h, root);
570
571
572

	if (!current->journal_info && type != TRANS_USERSPACE)
		current->journal_info = h;
Chris Mason's avatar
Chris Mason committed
573
	return h;
574
575

join_fail:
576
	if (type & __TRANS_FREEZABLE)
577
578
579
580
581
582
583
		sb_end_intwrite(root->fs_info->sb);
	kmem_cache_free(btrfs_trans_handle_cachep, h);
alloc_fail:
	if (num_bytes)
		btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
					num_bytes);
reserve_fail:
584
	btrfs_qgroup_free_meta(root, qgroup_reserved);
585
	return ERR_PTR(ret);
Chris Mason's avatar
Chris Mason committed
586
587
}

588
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
589
						   unsigned int num_items)
590
{
591
592
	return start_transaction(root, num_items, TRANS_START,
				 BTRFS_RESERVE_FLUSH_ALL);
593
}
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
					struct btrfs_root *root,
					unsigned int num_items,
					int min_factor)
{
	struct btrfs_trans_handle *trans;
	u64 num_bytes;
	int ret;

	trans = btrfs_start_transaction(root, num_items);
	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
		return trans;

	trans = btrfs_start_transaction(root, 0);
	if (IS_ERR(trans))
		return trans;

	num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
	ret = btrfs_cond_migrate_bytes(root->fs_info,
				       &root->fs_info->trans_block_rsv,
				       num_bytes,
				       min_factor);
	if (ret) {
		btrfs_end_transaction(trans, root);
		return ERR_PTR(ret);
	}

	trans->block_rsv = &root->fs_info->trans_block_rsv;
	trans->bytes_reserved = num_bytes;

	return trans;
}
626

627
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
628
629
					struct btrfs_root *root,
					unsigned int num_items)
630
{
631
632
	return start_transaction(root, num_items, TRANS_START,
				 BTRFS_RESERVE_FLUSH_LIMIT);
633
634
}

635
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
636
{
637
	return start_transaction(root, 0, TRANS_JOIN, 0);
638
639
}

640
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
641
{
642
	return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0);
643
644
}

645
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
646
{
647
	return start_transaction(root, 0, TRANS_USERSPACE, 0);
648
649
}

Miao Xie's avatar
Miao Xie committed
650
651
652
653
654
655
656
657
658
659
660
661
662
/*
 * btrfs_attach_transaction() - catch the running transaction
 *
 * It is used when we want to commit the current the transaction, but
 * don't want to start a new one.
 *
 * Note: If this function return -ENOENT, it just means there is no
 * running transaction. But it is possible that the inactive transaction
 * is still in the memory, not fully on disk. If you hope there is no
 * inactive transaction in the fs when -ENOENT is returned, you should
 * invoke
 *     btrfs_attach_transaction_barrier()
 */
663
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
664
{
665
	return start_transaction(root, 0, TRANS_ATTACH, 0);
666
667
}

Miao Xie's avatar
Miao Xie committed
668
/*
669
 * btrfs_attach_transaction_barrier() - catch the running transaction
Miao Xie's avatar
Miao Xie committed
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
 *
 * It is similar to the above function, the differentia is this one
 * will wait for all the inactive transactions until they fully
 * complete.
 */
struct btrfs_trans_handle *
btrfs_attach_transaction_barrier(struct btrfs_root *root)
{
	struct btrfs_trans_handle *trans;

	trans = start_transaction(root, 0, TRANS_ATTACH, 0);
	if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
		btrfs_wait_for_commit(root, 0);

	return trans;
}

Chris Mason's avatar
Chris Mason committed
687
/* wait for a transaction commit to be fully complete */
688
static noinline void wait_for_commit(struct btrfs_root *root,
689
690
				    struct btrfs_transaction *commit)
{
691
	wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED);
692
693
}

694
695
696
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
{
	struct btrfs_transaction *cur_trans = NULL, *t;
697
	int ret = 0;
698
699
700

	if (transid) {
		if (transid <= root->fs_info->last_trans_committed)
Josef Bacik's avatar
Josef Bacik committed
701
			goto out;
702
703

		/* find specified transaction */
Josef Bacik's avatar
Josef Bacik committed
704
		spin_lock(&root->fs_info->trans_lock);
705
706
707
		list_for_each_entry(t, &root->fs_info->trans_list, list) {
			if (t->transid == transid) {
				cur_trans = t;
Josef Bacik's avatar
Josef Bacik committed
708
				atomic_inc(&cur_trans->use_count);
709
				ret = 0;
710
711
				break;
			}
712
713
			if (t->transid > transid) {
				ret = 0;
714
				break;
715
			}
716
		}
Josef Bacik's avatar
Josef Bacik committed
717
		spin_unlock(&root->fs_info->trans_lock);
Sage Weil's avatar
Sage Weil committed
718
719
720
721
722
723
724
725

		/*
		 * The specified transaction doesn't exist, or we
		 * raced with btrfs_commit_transaction
		 */
		if (!cur_trans) {
			if (transid > root->fs_info->last_trans_committed)
				ret = -EINVAL;
726
			goto out;
Sage Weil's avatar
Sage Weil committed
727
		}
728
729
	} else {
		/* find newest transaction that is committing | committed */
Josef Bacik's avatar
Josef Bacik committed
730
		spin_lock(&root->fs_info->trans_lock);
731
732
		list_for_each_entry_reverse(t, &root->fs_info->trans_list,
					    list) {
733
734
			if (t->state >= TRANS_STATE_COMMIT_START) {
				if (t->state == TRANS_STATE_COMPLETED)
735
					break;
736
				cur_trans = t;
Josef Bacik's avatar
Josef Bacik committed
737
				atomic_inc(&cur_trans->use_count);
738
739
740
				break;
			}
		}
Josef Bacik's avatar
Josef Bacik committed
741
		spin_unlock(&root->fs_info->trans_lock);
742
		if (!cur_trans)
Josef Bacik's avatar
Josef Bacik committed
743
			goto out;  /* nothing committing|committed */
744
745
746
	}

	wait_for_commit(root, cur_trans);
747
	btrfs_put_transaction(cur_trans);
Josef Bacik's avatar
Josef Bacik committed
748
out:
749
750
751
	return ret;
}

Chris Mason's avatar
Chris Mason committed
752
753
void btrfs_throttle(struct btrfs_root *root)
{
Josef Bacik's avatar
Josef Bacik committed
754
	if (!atomic_read(&root->fs_info->open_ioctl_trans))
755
		wait_current_trans(root);
Chris Mason's avatar
Chris Mason committed
756
757
}

758
759
760
static int should_end_transaction(struct btrfs_trans_handle *trans,
				  struct btrfs_root *root)
{
761
	if (root->fs_info->global_block_rsv.space_info->full &&
762
	    btrfs_check_space_for_delayed_refs(trans, root))
763
		return 1;
764

765
	return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
766
767
768
769
770
771
772
}

int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root)
{
	struct btrfs_transaction *cur_trans = trans->transaction;
	int updates;
773
	int err;
774

Josef Bacik's avatar
Josef Bacik committed
775
	smp_mb();
776
777
	if (cur_trans->state >= TRANS_STATE_BLOCKED ||
	    cur_trans->delayed_refs.flushing)
778
779
780
781
		return 1;

	updates = trans->delayed_ref_updates;
	trans->delayed_ref_updates = 0;
782
	if (updates) {
783
		err = btrfs_run_delayed_refs(trans, root, updates * 2);
784
785
786
		if (err) /* Error code will also eval true */
			return err;
	}
787
788
789
790

	return should_end_transaction(trans, root);
}

791
static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
792
			  struct btrfs_root *root, int throttle)
Chris Mason's avatar
Chris Mason committed
793
{
794
	struct btrfs_transaction *cur_trans = trans->transaction;
795
	struct btrfs_fs_info *info = root->fs_info;
796
	unsigned long cur = trans->delayed_ref_updates;
797
	int lock = (trans->type != TRANS_JOIN_NOLOCK);
798
	int err = 0;
Chris Mason's avatar
Chris Mason committed
799
	int must_run_delayed_refs = 0;
800

801
802
	if (trans->use_count > 1) {
		trans->use_count--;
803
804
805
806
		trans->block_rsv = trans->orig_rsv;
		return 0;
	}

807
	btrfs_trans_release_metadata(trans, root);
808
	trans->block_rsv = NULL;
809

810
811
812
	if (!list_empty(&trans->new_bgs))
		btrfs_create_pending_block_groups(trans, root);

813
	trans->delayed_ref_updates = 0;
Chris Mason's avatar
Chris Mason committed
814
815
816
	if (!trans->sync) {
		must_run_delayed_refs =
			btrfs_should_throttle_delayed_refs(trans, root);
817
		cur = max_t(unsigned long, cur, 32);
Chris Mason's avatar
Chris Mason committed
818
819
820
821
822
823
824
825

		/*
		 * don't make the caller wait if they are from a NOLOCK
		 * or ATTACH transaction, it will deadlock with commit
		 */
		if (must_run_delayed_refs == 1 &&
		    (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH)))
			must_run_delayed_refs = 2;
826
	}
827

828
829
	btrfs_trans_release_metadata(trans, root);
	trans->block_rsv = NULL;
830

831
832
833
	if (!list_empty(&trans->new_bgs))
		btrfs_create_pending_block_groups(trans, root);

834
835
	btrfs_trans_release_chunk_metadata(trans);

Josef Bacik's avatar
Josef Bacik committed
836
	if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
837
838
839
840
841
842
	    should_end_transaction(trans, root) &&
	    ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
		spin_lock(&info->trans_lock);
		if (cur_trans->state == TRANS_STATE_RUNNING)
			cur_trans->state = TRANS_STATE_BLOCKED;
		spin_unlock(&info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
843
	}
844

845
	if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
846
		if (throttle)
847
			return btrfs_commit_transaction(trans, root);
848
		else
849
850
851
			wake_up_process(info->transaction_kthread);
	}

852
	if (trans->type & __TRANS_FREEZABLE)
853
		sb_end_intwrite(root->fs_info->sb);
854

855
	WARN_ON(cur_trans != info->running_transaction);
856
857
	WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
	atomic_dec(&cur_trans->num_writers);
858
	extwriter_counter_dec(cur_trans, trans->type);
859

860
861
862
	/*
	 * Make sure counter is updated before we wake up waiters.
	 */
863
	smp_mb();
Chris Mason's avatar
Chris Mason committed
864
865
	if (waitqueue_active(&cur_trans->writer_wait))
		wake_up(&cur_trans->writer_wait);
866
	btrfs_put_transaction(cur_trans);
Josef Bacik's avatar
Josef Bacik committed
867
868
869

	if (current->journal_info == trans)
		current->journal_info = NULL;
870

Yan, Zheng's avatar
Yan, Zheng committed
871
872
873
	if (throttle)
		btrfs_run_delayed_iputs(root);

874
	if (trans->aborted ||
875
876
	    test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
		wake_up_process(info->transaction_kthread);
877
		err = -EIO;
878
	}
879
	assert_qgroups_uptodate(trans);
880

881
	kmem_cache_free(btrfs_trans_handle_cachep, trans);
Chris Mason's avatar
Chris Mason committed
882
883
884
885
	if (must_run_delayed_refs) {
		btrfs_async_run_delayed_refs(root, cur,
					     must_run_delayed_refs == 1);
	}
886
	return err;
Chris Mason's avatar
Chris Mason committed
887
888
}

889
890
891
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root)
{
892
	return __btrfs_end_transaction(trans, root, 0);
893
894
895
896
897
}

int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
{
898
	return __btrfs_end_transaction(trans, root, 1);
899
900
}

Chris Mason's avatar
Chris Mason committed
901
902
903
/*
 * when btree blocks are allocated, they have some corresponding bits set for
 * them in one of two extent_io trees.  This is used to make sure all of
904
 * those extents are sent to disk but does not wait on them
Chris Mason's avatar
Chris Mason committed
905
 */
906
int btrfs_write_marked_extents(struct btrfs_root *root,
907
			       struct extent_io_tree *dirty_pages, int mark)
Chris Mason's avatar
Chris Mason committed
908
{
909
	int err = 0;
910
	int werr = 0;
Josef Bacik's avatar
Josef Bacik committed
911
	struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
912
	struct extent_state *cached_state = NULL;
913
	u64 start = 0;
914
	u64 end;
915

Josef Bacik's avatar
Josef Bacik committed
916
	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
917
				      mark, &cached_state)) {
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
		bool wait_writeback = false;

		err = convert_extent_bit(dirty_pages, start, end,
					 EXTENT_NEED_WAIT,
					 mark, &cached_state, GFP_NOFS);
		/*
		 * convert_extent_bit can return -ENOMEM, which is most of the
		 * time a temporary error. So when it happens, ignore the error
		 * and wait for writeback of this range to finish - because we
		 * failed to set the bit EXTENT_NEED_WAIT for the range, a call
		 * to btrfs_wait_marked_extents() would not know that writeback
		 * for this range started and therefore wouldn't wait for it to
		 * finish - we don't want to commit a superblock that points to
		 * btree nodes/leafs for which writeback hasn't finished yet
		 * (and without errors).
		 * We cleanup any entries left in the io tree when committing
		 * the transaction (through clear_btree_io_tree()).
		 */
		if (err == -ENOMEM) {
			err = 0;
			wait_writeback = true;
		}
		if (!err)
			err = filemap_fdatawrite_range(mapping, start, end);
Josef Bacik's avatar
Josef Bacik committed
942