transaction.c 65.7 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Chris Mason's avatar
Chris Mason committed
19
#include <linux/fs.h>
20
#include <linux/slab.h>
Chris Mason's avatar
Chris Mason committed
21
#include <linux/sched.h>
22
#include <linux/writeback.h>
23
#include <linux/pagemap.h>
24
#include <linux/blkdev.h>
25
#include <linux/uuid.h>
Chris Mason's avatar
Chris Mason committed
26
27
28
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
29
#include "locking.h"
30
#include "tree-log.h"
31
#include "inode-map.h"
32
#include "volumes.h"
33
#include "dev-replace.h"
Josef Bacik's avatar
Josef Bacik committed
34
#include "qgroup.h"
Chris Mason's avatar
Chris Mason committed
35

36
37
#define BTRFS_ROOT_TRANS_TAG 0

38
static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
	[TRANS_STATE_RUNNING]		= 0U,
	[TRANS_STATE_BLOCKED]		= (__TRANS_USERSPACE |
					   __TRANS_START),
	[TRANS_STATE_COMMIT_START]	= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH),
	[TRANS_STATE_COMMIT_DOING]	= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN),
	[TRANS_STATE_UNBLOCKED]		= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN |
					   __TRANS_JOIN_NOLOCK),
	[TRANS_STATE_COMPLETED]		= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN |
					   __TRANS_JOIN_NOLOCK),
};

61
void btrfs_put_transaction(struct btrfs_transaction *transaction)
Chris Mason's avatar
Chris Mason committed
62
{
63
64
	WARN_ON(atomic_read(&transaction->use_count) == 0);
	if (atomic_dec_and_test(&transaction->use_count)) {
Josef Bacik's avatar
Josef Bacik committed
65
		BUG_ON(!list_empty(&transaction->list));
Liu Bo's avatar
Liu Bo committed
66
		WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
67
68
69
		if (transaction->delayed_refs.pending_csums)
			printk(KERN_ERR "pending csums is %llu\n",
			       transaction->delayed_refs.pending_csums);
70
71
72
73
74
75
76
77
		while (!list_empty(&transaction->pending_chunks)) {
			struct extent_map *em;

			em = list_first_entry(&transaction->pending_chunks,
					      struct extent_map, list);
			list_del_init(&em->list);
			free_extent_map(em);
		}
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
		/*
		 * If any block groups are found in ->deleted_bgs then it's
		 * because the transaction was aborted and a commit did not
		 * happen (things failed before writing the new superblock
		 * and calling btrfs_finish_extent_commit()), so we can not
		 * discard the physical locations of the block groups.
		 */
		while (!list_empty(&transaction->deleted_bgs)) {
			struct btrfs_block_group_cache *cache;

			cache = list_first_entry(&transaction->deleted_bgs,
						 struct btrfs_block_group_cache,
						 bg_list);
			list_del_init(&cache->bg_list);
			btrfs_put_block_group_trimming(cache);
			btrfs_put_block_group(cache);
		}
Chris Mason's avatar
Chris Mason committed
95
		kmem_cache_free(btrfs_transaction_cachep, transaction);
Chris Mason's avatar
Chris Mason committed
96
	}
Chris Mason's avatar
Chris Mason committed
97
98
}

99
100
101
static void clear_btree_io_tree(struct extent_io_tree *tree)
{
	spin_lock(&tree->lock);
102
103
104
105
106
107
	/*
	 * Do a single barrier for the waitqueue_active check here, the state
	 * of the waitqueue should not change once clear_btree_io_tree is
	 * called.
	 */
	smp_mb();
108
109
110
111
112
113
114
115
116
117
118
119
120
121
	while (!RB_EMPTY_ROOT(&tree->state)) {
		struct rb_node *node;
		struct extent_state *state;

		node = rb_first(&tree->state);
		state = rb_entry(node, struct extent_state, rb_node);
		rb_erase(&state->rb_node, &tree->state);
		RB_CLEAR_NODE(&state->rb_node);
		/*
		 * btree io trees aren't supposed to have tasks waiting for
		 * changes in the flags of extent states ever.
		 */
		ASSERT(!waitqueue_active(&state->wq));
		free_extent_state(state);
122
123

		cond_resched_lock(&tree->lock);
124
125
126
127
	}
	spin_unlock(&tree->lock);
}

128
129
static noinline void switch_commit_roots(struct btrfs_transaction *trans,
					 struct btrfs_fs_info *fs_info)
Josef Bacik's avatar
Josef Bacik committed
130
{
131
132
133
134
135
136
137
138
139
140
	struct btrfs_root *root, *tmp;

	down_write(&fs_info->commit_root_sem);
	list_for_each_entry_safe(root, tmp, &trans->switch_commits,
				 dirty_list) {
		list_del_init(&root->dirty_list);
		free_extent_buffer(root->commit_root);
		root->commit_root = btrfs_root_node(root);
		if (is_fstree(root->objectid))
			btrfs_unpin_free_ino(root);
141
		clear_btree_io_tree(&root->dirty_log_pages);
142
	}
143
144
145
146
147
148
149
150
151
152
153
154

	/* We can free old roots now. */
	spin_lock(&trans->dropped_roots_lock);
	while (!list_empty(&trans->dropped_roots)) {
		root = list_first_entry(&trans->dropped_roots,
					struct btrfs_root, root_list);
		list_del_init(&root->root_list);
		spin_unlock(&trans->dropped_roots_lock);
		btrfs_drop_and_free_fs_root(fs_info, root);
		spin_lock(&trans->dropped_roots_lock);
	}
	spin_unlock(&trans->dropped_roots_lock);
155
	up_write(&fs_info->commit_root_sem);
Josef Bacik's avatar
Josef Bacik committed
156
157
}

158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
					 unsigned int type)
{
	if (type & TRANS_EXTWRITERS)
		atomic_inc(&trans->num_extwriters);
}

static inline void extwriter_counter_dec(struct btrfs_transaction *trans,
					 unsigned int type)
{
	if (type & TRANS_EXTWRITERS)
		atomic_dec(&trans->num_extwriters);
}

static inline void extwriter_counter_init(struct btrfs_transaction *trans,
					  unsigned int type)
{
	atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0));
}

static inline int extwriter_counter_read(struct btrfs_transaction *trans)
{
	return atomic_read(&trans->num_extwriters);
181
182
}

Chris Mason's avatar
Chris Mason committed
183
184
185
/*
 * either allocate a new transaction or hop into the existing one
 */
186
static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
Chris Mason's avatar
Chris Mason committed
187
188
{
	struct btrfs_transaction *cur_trans;
189
	struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacik's avatar
Josef Bacik committed
190

191
	spin_lock(&fs_info->trans_lock);
192
loop:
193
	/* The file system has been taken offline. No new transactions. */
194
	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
195
		spin_unlock(&fs_info->trans_lock);
196
197
198
		return -EROFS;
	}

199
	cur_trans = fs_info->running_transaction;
Josef Bacik's avatar
Josef Bacik committed
200
	if (cur_trans) {
201
		if (cur_trans->aborted) {
202
			spin_unlock(&fs_info->trans_lock);
203
			return cur_trans->aborted;
204
		}
205
		if (btrfs_blocked_trans_types[cur_trans->state] & type) {
206
207
208
			spin_unlock(&fs_info->trans_lock);
			return -EBUSY;
		}
Josef Bacik's avatar
Josef Bacik committed
209
		atomic_inc(&cur_trans->use_count);
210
		atomic_inc(&cur_trans->num_writers);
211
		extwriter_counter_inc(cur_trans, type);
212
		spin_unlock(&fs_info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
213
		return 0;
Chris Mason's avatar
Chris Mason committed
214
	}
215
	spin_unlock(&fs_info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
216

217
218
219
220
221
222
223
	/*
	 * If we are ATTACH, we just want to catch the current transaction,
	 * and commit it. If there is no transaction, just return ENOENT.
	 */
	if (type == TRANS_ATTACH)
		return -ENOENT;

224
225
226
227
228
229
	/*
	 * JOIN_NOLOCK only happens during the transaction commit, so
	 * it is impossible that ->running_transaction is NULL
	 */
	BUG_ON(type == TRANS_JOIN_NOLOCK);

Josef Bacik's avatar
Josef Bacik committed
230
231
232
	cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
	if (!cur_trans)
		return -ENOMEM;
233

234
235
	spin_lock(&fs_info->trans_lock);
	if (fs_info->running_transaction) {
236
237
		/*
		 * someone started a transaction after we unlocked.  Make sure
238
		 * to redo the checks above
239
		 */
Josef Bacik's avatar
Josef Bacik committed
240
		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
241
		goto loop;
242
	} else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
243
		spin_unlock(&fs_info->trans_lock);
244
245
		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
		return -EROFS;
Chris Mason's avatar
Chris Mason committed
246
	}
247

Josef Bacik's avatar
Josef Bacik committed
248
	atomic_set(&cur_trans->num_writers, 1);
249
	extwriter_counter_init(cur_trans, type);
Josef Bacik's avatar
Josef Bacik committed
250
251
	init_waitqueue_head(&cur_trans->writer_wait);
	init_waitqueue_head(&cur_trans->commit_wait);
252
	init_waitqueue_head(&cur_trans->pending_wait);
253
	cur_trans->state = TRANS_STATE_RUNNING;
Josef Bacik's avatar
Josef Bacik committed
254
255
256
257
258
	/*
	 * One for this trans handle, one so it will live on until we
	 * commit the transaction.
	 */
	atomic_set(&cur_trans->use_count, 2);
259
	atomic_set(&cur_trans->pending_ordered, 0);
260
	cur_trans->flags = 0;
Josef Bacik's avatar
Josef Bacik committed
261
262
	cur_trans->start_time = get_seconds();

263
264
	memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));

Liu Bo's avatar
Liu Bo committed
265
	cur_trans->delayed_refs.href_root = RB_ROOT;
266
	cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
267
	atomic_set(&cur_trans->delayed_refs.num_entries, 0);
268
269
270
271
272
273

	/*
	 * although the tree mod log is per file system and not per transaction,
	 * the log must never go across transaction boundaries.
	 */
	smp_mb();
Julia Lawall's avatar
Julia Lawall committed
274
	if (!list_empty(&fs_info->tree_mod_seq_list))
275
		WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when "
276
			"creating a fresh transaction\n");
Julia Lawall's avatar
Julia Lawall committed
277
	if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
278
		WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when "
279
			"creating a fresh transaction\n");
280
	atomic64_set(&fs_info->tree_mod_seq, 0);
281

Josef Bacik's avatar
Josef Bacik committed
282
283
284
	spin_lock_init(&cur_trans->delayed_refs.lock);

	INIT_LIST_HEAD(&cur_trans->pending_snapshots);
285
	INIT_LIST_HEAD(&cur_trans->pending_chunks);
286
	INIT_LIST_HEAD(&cur_trans->switch_commits);
287
	INIT_LIST_HEAD(&cur_trans->dirty_bgs);
288
	INIT_LIST_HEAD(&cur_trans->io_bgs);
289
	INIT_LIST_HEAD(&cur_trans->dropped_roots);
290
	mutex_init(&cur_trans->cache_write_mutex);
291
	cur_trans->num_dirty_bgs = 0;
292
	spin_lock_init(&cur_trans->dirty_bgs_lock);
293
	INIT_LIST_HEAD(&cur_trans->deleted_bgs);
294
	spin_lock_init(&cur_trans->dropped_roots_lock);
295
	list_add_tail(&cur_trans->list, &fs_info->trans_list);
Josef Bacik's avatar
Josef Bacik committed
296
	extent_io_tree_init(&cur_trans->dirty_pages,
297
298
299
300
			     fs_info->btree_inode->i_mapping);
	fs_info->generation++;
	cur_trans->transid = fs_info->generation;
	fs_info->running_transaction = cur_trans;
301
	cur_trans->aborted = 0;
302
	spin_unlock(&fs_info->trans_lock);
303

Chris Mason's avatar
Chris Mason committed
304
305
306
	return 0;
}

Chris Mason's avatar
Chris Mason committed
307
/*
308
309
310
311
 * this does all the record keeping required to make sure that a reference
 * counted root is properly recorded in a given transaction.  This is required
 * to make sure the old root from before we joined the transaction is deleted
 * when the transaction commits
Chris Mason's avatar
Chris Mason committed
312
 */
Chris Mason's avatar
Chris Mason committed
313
static int record_root_in_trans(struct btrfs_trans_handle *trans,
314
315
			       struct btrfs_root *root,
			       int force)
316
{
317
318
	if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
	    root->last_trans < trans->transid) || force) {
319
		WARN_ON(root == root->fs_info->extent_root);
320
321
		WARN_ON(root->commit_root != root->node);

Chris Mason's avatar
Chris Mason committed
322
		/*
323
		 * see below for IN_TRANS_SETUP usage rules
Chris Mason's avatar
Chris Mason committed
324
325
326
		 * we have the reloc mutex held now, so there
		 * is only one writer in this function
		 */
327
		set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
Chris Mason's avatar
Chris Mason committed
328

329
		/* make sure readers find IN_TRANS_SETUP before
Chris Mason's avatar
Chris Mason committed
330
331
332
333
		 * they find our root->last_trans update
		 */
		smp_wmb();

Josef Bacik's avatar
Josef Bacik committed
334
		spin_lock(&root->fs_info->fs_roots_radix_lock);
335
		if (root->last_trans == trans->transid && !force) {
Josef Bacik's avatar
Josef Bacik committed
336
337
338
			spin_unlock(&root->fs_info->fs_roots_radix_lock);
			return 0;
		}
339
340
341
		radix_tree_tag_set(&root->fs_info->fs_roots_radix,
			   (unsigned long)root->root_key.objectid,
			   BTRFS_ROOT_TRANS_TAG);
Josef Bacik's avatar
Josef Bacik committed
342
		spin_unlock(&root->fs_info->fs_roots_radix_lock);
Chris Mason's avatar
Chris Mason committed
343
344
345
346
347
348
349
350
351
352
353
354
355
		root->last_trans = trans->transid;

		/* this is pretty tricky.  We don't want to
		 * take the relocation lock in btrfs_record_root_in_trans
		 * unless we're really doing the first setup for this root in
		 * this transaction.
		 *
		 * Normally we'd use root->last_trans as a flag to decide
		 * if we want to take the expensive mutex.
		 *
		 * But, we have to set root->last_trans before we
		 * init the relocation root, otherwise, we trip over warnings
		 * in ctree.c.  The solution used here is to flag ourselves
356
		 * with root IN_TRANS_SETUP.  When this is 1, we're still
Chris Mason's avatar
Chris Mason committed
357
358
359
360
361
362
363
		 * fixing up the reloc trees and everyone must wait.
		 *
		 * When this is zero, they can trust root->last_trans and fly
		 * through btrfs_record_root_in_trans without having to take the
		 * lock.  smp_wmb() makes sure that all the writes above are
		 * done before we pop in the zero below
		 */
364
		btrfs_init_reloc_root(trans, root);
365
		smp_mb__before_atomic();
366
		clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
367
368
369
	}
	return 0;
}
370

Chris Mason's avatar
Chris Mason committed
371

372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
			    struct btrfs_root *root)
{
	struct btrfs_transaction *cur_trans = trans->transaction;

	/* Add ourselves to the transaction dropped list */
	spin_lock(&cur_trans->dropped_roots_lock);
	list_add_tail(&root->root_list, &cur_trans->dropped_roots);
	spin_unlock(&cur_trans->dropped_roots_lock);

	/* Make sure we don't try to update the root at commit time */
	spin_lock(&root->fs_info->fs_roots_radix_lock);
	radix_tree_tag_clear(&root->fs_info->fs_roots_radix,
			     (unsigned long)root->root_key.objectid,
			     BTRFS_ROOT_TRANS_TAG);
	spin_unlock(&root->fs_info->fs_roots_radix_lock);
}

Chris Mason's avatar
Chris Mason committed
390
391
392
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root)
{
393
	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
Chris Mason's avatar
Chris Mason committed
394
395
396
		return 0;

	/*
397
	 * see record_root_in_trans for comments about IN_TRANS_SETUP usage
Chris Mason's avatar
Chris Mason committed
398
399
400
401
	 * and barriers
	 */
	smp_rmb();
	if (root->last_trans == trans->transid &&
402
	    !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
Chris Mason's avatar
Chris Mason committed
403
404
405
		return 0;

	mutex_lock(&root->fs_info->reloc_mutex);
406
	record_root_in_trans(trans, root, 0);
Chris Mason's avatar
Chris Mason committed
407
408
409
410
411
	mutex_unlock(&root->fs_info->reloc_mutex);

	return 0;
}

412
413
414
static inline int is_transaction_blocked(struct btrfs_transaction *trans)
{
	return (trans->state >= TRANS_STATE_BLOCKED &&
415
416
		trans->state < TRANS_STATE_UNBLOCKED &&
		!trans->aborted);
417
418
}

Chris Mason's avatar
Chris Mason committed
419
420
421
422
/* wait for commit against the current transaction to become unblocked
 * when this is done, it is safe to start a new transaction, but the current
 * transaction might not be fully on disk.
 */
Chris Mason's avatar
Chris Mason committed
423
static void wait_current_trans(struct btrfs_root *root)
Chris Mason's avatar
Chris Mason committed
424
{
425
	struct btrfs_transaction *cur_trans;
Chris Mason's avatar
Chris Mason committed
426

Josef Bacik's avatar
Josef Bacik committed
427
	spin_lock(&root->fs_info->trans_lock);
428
	cur_trans = root->fs_info->running_transaction;
429
	if (cur_trans && is_transaction_blocked(cur_trans)) {
430
		atomic_inc(&cur_trans->use_count);
Josef Bacik's avatar
Josef Bacik committed
431
		spin_unlock(&root->fs_info->trans_lock);
Li Zefan's avatar
Li Zefan committed
432
433

		wait_event(root->fs_info->transaction_wait,
434
435
			   cur_trans->state >= TRANS_STATE_UNBLOCKED ||
			   cur_trans->aborted);
436
		btrfs_put_transaction(cur_trans);
Josef Bacik's avatar
Josef Bacik committed
437
438
	} else {
		spin_unlock(&root->fs_info->trans_lock);
439
	}
Chris Mason's avatar
Chris Mason committed
440
441
}

442
443
static int may_wait_transaction(struct btrfs_root *root, int type)
{
Josef Bacik's avatar
Josef Bacik committed
444
445
446
447
448
449
450
451
	if (root->fs_info->log_root_recovering)
		return 0;

	if (type == TRANS_USERSPACE)
		return 1;

	if (type == TRANS_START &&
	    !atomic_read(&root->fs_info->open_ioctl_trans))
452
		return 1;
Josef Bacik's avatar
Josef Bacik committed
453

454
455
456
	return 0;
}

457
458
459
static inline bool need_reserve_reloc_root(struct btrfs_root *root)
{
	if (!root->fs_info->reloc_ctl ||
460
	    !test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
461
462
463
464
465
466
467
	    root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
	    root->reloc_root)
		return false;

	return true;
}

468
static struct btrfs_trans_handle *
469
470
start_transaction(struct btrfs_root *root, unsigned int num_items,
		  unsigned int type, enum btrfs_reserve_flush_enum flush)
Chris Mason's avatar
Chris Mason committed
471
{
472
473
	struct btrfs_trans_handle *h;
	struct btrfs_transaction *cur_trans;
474
	u64 num_bytes = 0;
475
	u64 qgroup_reserved = 0;
476
477
	bool reloc_reserved = false;
	int ret;
478

479
	/* Send isn't supposed to start transactions. */
480
	ASSERT(current->journal_info != BTRFS_SEND_TRANS_STUB);
481

482
	if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
483
		return ERR_PTR(-EROFS);
484

485
	if (current->journal_info) {
486
		WARN_ON(type & TRANS_EXTWRITERS);
487
488
		h = current->journal_info;
		h->use_count++;
489
		WARN_ON(h->use_count > 2);
490
491
492
493
		h->orig_rsv = h->block_rsv;
		h->block_rsv = NULL;
		goto got_it;
	}
494
495
496
497
498
499

	/*
	 * Do the reservation before we join the transaction so we can do all
	 * the appropriate flushing if need be.
	 */
	if (num_items > 0 && root != root->fs_info->chunk_root) {
500
501
502
503
		qgroup_reserved = num_items * root->nodesize;
		ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved);
		if (ret)
			return ERR_PTR(ret);
504

505
		num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
506
507
508
		/*
		 * Do the reservation for the relocation root creation
		 */
509
		if (need_reserve_reloc_root(root)) {
510
511
512
513
			num_bytes += root->nodesize;
			reloc_reserved = true;
		}

514
515
516
		ret = btrfs_block_rsv_add(root,
					  &root->fs_info->trans_block_rsv,
					  num_bytes, flush);
517
		if (ret)
518
			goto reserve_fail;
519
	}
520
again:
521
	h = kmem_cache_zalloc(btrfs_trans_handle_cachep, GFP_NOFS);
522
523
524
525
	if (!h) {
		ret = -ENOMEM;
		goto alloc_fail;
	}
Chris Mason's avatar
Chris Mason committed
526

527
528
529
530
531
532
	/*
	 * If we are JOIN_NOLOCK we're already committing a transaction and
	 * waiting on this guy, so we don't need to do the sb_start_intwrite
	 * because we're already holding a ref.  We need this because we could
	 * have raced in and did an fsync() on a file which can kick a commit
	 * and then we deadlock with somebody doing a freeze.
533
534
535
	 *
	 * If we are ATTACH, it means we just want to catch the current
	 * transaction and commit it, so we needn't do sb_start_intwrite(). 
536
	 */
537
	if (type & __TRANS_FREEZABLE)
538
		sb_start_intwrite(root->fs_info->sb);
539

540
	if (may_wait_transaction(root, type))
Chris Mason's avatar
Chris Mason committed
541
		wait_current_trans(root);
542

Josef Bacik's avatar
Josef Bacik committed
543
	do {
544
		ret = join_transaction(root, type);
545
		if (ret == -EBUSY) {
Josef Bacik's avatar
Josef Bacik committed
546
			wait_current_trans(root);
547
548
549
			if (unlikely(type == TRANS_ATTACH))
				ret = -ENOENT;
		}
Josef Bacik's avatar
Josef Bacik committed
550
551
	} while (ret == -EBUSY);

Tsutomu Itoh's avatar
Tsutomu Itoh committed
552
	if (ret < 0) {
553
554
		/* We must get the transaction if we are JOIN_NOLOCK. */
		BUG_ON(type == TRANS_JOIN_NOLOCK);
555
		goto join_fail;
Tsutomu Itoh's avatar
Tsutomu Itoh committed
556
	}
557

558
559
560
561
	cur_trans = root->fs_info->running_transaction;

	h->transid = cur_trans->transid;
	h->transaction = cur_trans;
562
	h->root = root;
563
	h->use_count = 1;
564

565
	h->type = type;
566
	h->can_flush_pending_bgs = true;
567
	INIT_LIST_HEAD(&h->qgroup_ref_list);
568
	INIT_LIST_HEAD(&h->new_bgs);
569

570
	smp_mb();
571
572
	if (cur_trans->state >= TRANS_STATE_BLOCKED &&
	    may_wait_transaction(root, type)) {
573
		current->journal_info = h;
574
575
576
577
		btrfs_commit_transaction(h, root);
		goto again;
	}

578
	if (num_bytes) {
Josef Bacik's avatar
Josef Bacik committed
579
		trace_btrfs_space_reservation(root->fs_info, "transaction",
580
					      h->transid, num_bytes, 1);
581
582
		h->block_rsv = &root->fs_info->trans_block_rsv;
		h->bytes_reserved = num_bytes;
583
		h->reloc_reserved = reloc_reserved;
584
	}
Josef Bacik's avatar
Josef Bacik committed
585

586
got_it:
Josef Bacik's avatar
Josef Bacik committed
587
	btrfs_record_root_in_trans(h, root);
588
589
590

	if (!current->journal_info && type != TRANS_USERSPACE)
		current->journal_info = h;
Chris Mason's avatar
Chris Mason committed
591
	return h;
592
593

join_fail:
594
	if (type & __TRANS_FREEZABLE)
595
596
597
598
599
600
601
		sb_end_intwrite(root->fs_info->sb);
	kmem_cache_free(btrfs_trans_handle_cachep, h);
alloc_fail:
	if (num_bytes)
		btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
					num_bytes);
reserve_fail:
602
	btrfs_qgroup_free_meta(root, qgroup_reserved);
603
	return ERR_PTR(ret);
Chris Mason's avatar
Chris Mason committed
604
605
}

606
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
607
						   unsigned int num_items)
608
{
609
610
	return start_transaction(root, num_items, TRANS_START,
				 BTRFS_RESERVE_FLUSH_ALL);
611
}
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
					struct btrfs_root *root,
					unsigned int num_items,
					int min_factor)
{
	struct btrfs_trans_handle *trans;
	u64 num_bytes;
	int ret;

	trans = btrfs_start_transaction(root, num_items);
	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
		return trans;

	trans = btrfs_start_transaction(root, 0);
	if (IS_ERR(trans))
		return trans;

	num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
	ret = btrfs_cond_migrate_bytes(root->fs_info,
				       &root->fs_info->trans_block_rsv,
				       num_bytes,
				       min_factor);
	if (ret) {
		btrfs_end_transaction(trans, root);
		return ERR_PTR(ret);
	}

	trans->block_rsv = &root->fs_info->trans_block_rsv;
	trans->bytes_reserved = num_bytes;
641
642
	trace_btrfs_space_reservation(root->fs_info, "transaction",
				      trans->transid, num_bytes, 1);
643
644
645

	return trans;
}
646

647
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
648
649
					struct btrfs_root *root,
					unsigned int num_items)
650
{
651
652
	return start_transaction(root, num_items, TRANS_START,
				 BTRFS_RESERVE_FLUSH_LIMIT);
653
654
}

655
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
656
{
657
658
	return start_transaction(root, 0, TRANS_JOIN,
				 BTRFS_RESERVE_NO_FLUSH);
659
660
}

661
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
662
{
663
664
	return start_transaction(root, 0, TRANS_JOIN_NOLOCK,
				 BTRFS_RESERVE_NO_FLUSH);
665
666
}

667
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
668
{
669
670
	return start_transaction(root, 0, TRANS_USERSPACE,
				 BTRFS_RESERVE_NO_FLUSH);
671
672
}

Miao Xie's avatar
Miao Xie committed
673
674
675
676
677
678
679
680
681
682
683
684
685
/*
 * btrfs_attach_transaction() - catch the running transaction
 *
 * It is used when we want to commit the current the transaction, but
 * don't want to start a new one.
 *
 * Note: If this function return -ENOENT, it just means there is no
 * running transaction. But it is possible that the inactive transaction
 * is still in the memory, not fully on disk. If you hope there is no
 * inactive transaction in the fs when -ENOENT is returned, you should
 * invoke
 *     btrfs_attach_transaction_barrier()
 */
686
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
687
{
688
689
	return start_transaction(root, 0, TRANS_ATTACH,
				 BTRFS_RESERVE_NO_FLUSH);
690
691
}

Miao Xie's avatar
Miao Xie committed
692
/*
693
 * btrfs_attach_transaction_barrier() - catch the running transaction
Miao Xie's avatar
Miao Xie committed
694
695
696
697
698
699
700
701
702
703
 *
 * It is similar to the above function, the differentia is this one
 * will wait for all the inactive transactions until they fully
 * complete.
 */
struct btrfs_trans_handle *
btrfs_attach_transaction_barrier(struct btrfs_root *root)
{
	struct btrfs_trans_handle *trans;

704
705
	trans = start_transaction(root, 0, TRANS_ATTACH,
				  BTRFS_RESERVE_NO_FLUSH);
Miao Xie's avatar
Miao Xie committed
706
707
708
709
710
711
	if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
		btrfs_wait_for_commit(root, 0);

	return trans;
}

Chris Mason's avatar
Chris Mason committed
712
/* wait for a transaction commit to be fully complete */
713
static noinline void wait_for_commit(struct btrfs_root *root,
714
715
				    struct btrfs_transaction *commit)
{
716
	wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED);
717
718
}

719
720
721
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
{
	struct btrfs_transaction *cur_trans = NULL, *t;
722
	int ret = 0;
723
724
725

	if (transid) {
		if (transid <= root->fs_info->last_trans_committed)
Josef Bacik's avatar
Josef Bacik committed
726
			goto out;
727
728

		/* find specified transaction */
Josef Bacik's avatar
Josef Bacik committed
729
		spin_lock(&root->fs_info->trans_lock);
730
731
732
		list_for_each_entry(t, &root->fs_info->trans_list, list) {
			if (t->transid == transid) {
				cur_trans = t;
Josef Bacik's avatar
Josef Bacik committed
733
				atomic_inc(&cur_trans->use_count);
734
				ret = 0;
735
736
				break;
			}
737
738
			if (t->transid > transid) {
				ret = 0;
739
				break;
740
			}
741
		}
Josef Bacik's avatar
Josef Bacik committed
742
		spin_unlock(&root->fs_info->trans_lock);
Sage Weil's avatar
Sage Weil committed
743
744
745
746
747
748
749
750

		/*
		 * The specified transaction doesn't exist, or we
		 * raced with btrfs_commit_transaction
		 */
		if (!cur_trans) {
			if (transid > root->fs_info->last_trans_committed)
				ret = -EINVAL;
751
			goto out;
Sage Weil's avatar
Sage Weil committed
752
		}
753
754
	} else {
		/* find newest transaction that is committing | committed */
Josef Bacik's avatar
Josef Bacik committed
755
		spin_lock(&root->fs_info->trans_lock);
756
757
		list_for_each_entry_reverse(t, &root->fs_info->trans_list,
					    list) {
758
759
			if (t->state >= TRANS_STATE_COMMIT_START) {
				if (t->state == TRANS_STATE_COMPLETED)
760
					break;
761
				cur_trans = t;
Josef Bacik's avatar
Josef Bacik committed
762
				atomic_inc(&cur_trans->use_count);
763
764
765
				break;
			}
		}
Josef Bacik's avatar
Josef Bacik committed
766
		spin_unlock(&root->fs_info->trans_lock);
767
		if (!cur_trans)
Josef Bacik's avatar
Josef Bacik committed
768
			goto out;  /* nothing committing|committed */
769
770
771
	}

	wait_for_commit(root, cur_trans);
772
	btrfs_put_transaction(cur_trans);
Josef Bacik's avatar
Josef Bacik committed
773
out:
774
775
776
	return ret;
}

Chris Mason's avatar
Chris Mason committed
777
778
void btrfs_throttle(struct btrfs_root *root)
{
Josef Bacik's avatar
Josef Bacik committed
779
	if (!atomic_read(&root->fs_info->open_ioctl_trans))
780
		wait_current_trans(root);
Chris Mason's avatar
Chris Mason committed
781
782
}

783
784
785
static int should_end_transaction(struct btrfs_trans_handle *trans,
				  struct btrfs_root *root)
{
786
	if (root->fs_info->global_block_rsv.space_info->full &&
787
	    btrfs_check_space_for_delayed_refs(trans, root))
788
		return 1;
789

790
	return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
791
792
793
794
795
796
797
}

int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root)
{
	struct btrfs_transaction *cur_trans = trans->transaction;
	int updates;
798
	int err;
799

Josef Bacik's avatar
Josef Bacik committed
800
	smp_mb();
801
802
	if (cur_trans->state >= TRANS_STATE_BLOCKED ||
	    cur_trans->delayed_refs.flushing)
803
804
805
806
		return 1;

	updates = trans->delayed_ref_updates;
	trans->delayed_ref_updates = 0;
807
	if (updates) {
808
		err = btrfs_run_delayed_refs(trans, root, updates * 2);
809
810
811
		if (err) /* Error code will also eval true */
			return err;
	}
812
813
814
815

	return should_end_transaction(trans, root);
}

816
static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
817
			  struct btrfs_root *root, int throttle)
Chris Mason's avatar
Chris Mason committed
818
{
819
	struct btrfs_transaction *cur_trans = trans->transaction;
820
	struct btrfs_fs_info *info = root->fs_info;
821
	unsigned long cur = trans->delayed_ref_updates;
822
	int lock = (trans->type != TRANS_JOIN_NOLOCK);
823
	int err = 0;
Chris Mason's avatar
Chris Mason committed
824
	int must_run_delayed_refs = 0;
825

826
827
	if (trans->use_count > 1) {
		trans->use_count--;
828
829
830
831
		trans->block_rsv = trans->orig_rsv;
		return 0;
	}

832
	btrfs_trans_release_metadata(trans, root);
833
	trans->block_rsv = NULL;
834

835
836
837
	if (!list_empty(&trans->new_bgs))
		btrfs_create_pending_block_groups(trans, root);

838
	trans->delayed_ref_updates = 0;
Chris Mason's avatar
Chris Mason committed
839
840
841
	if (!trans->sync) {
		must_run_delayed_refs =
			btrfs_should_throttle_delayed_refs(trans, root);
842
		cur = max_t(unsigned long, cur, 32);
Chris Mason's avatar
Chris Mason committed
843
844
845
846
847
848
849
850

		/*
		 * don't make the caller wait if they are from a NOLOCK
		 * or ATTACH transaction, it will deadlock with commit
		 */
		if (must_run_delayed_refs == 1 &&
		    (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH)))
			must_run_delayed_refs = 2;
851
	}
852

853
854
	btrfs_trans_release_metadata(trans, root);
	trans->block_rsv = NULL;
855

856
857
858
	if (!list_empty(&trans->new_bgs))
		btrfs_create_pending_block_groups(trans, root);

859
860
	btrfs_trans_release_chunk_metadata(trans);

Josef Bacik's avatar
Josef Bacik committed
861
	if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
862
863
864
865
866
867
	    should_end_transaction(trans, root) &&
	    ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
		spin_lock(&info->trans_lock);
		if (cur_trans->state == TRANS_STATE_RUNNING)
			cur_trans->state = TRANS_STATE_BLOCKED;
		spin_unlock(&info->trans_lock);
Josef Bacik's avatar
Josef Bacik committed
868
	}
869

870
	if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
871
		if (throttle)
872
			return btrfs_commit_transaction(trans, root);
873
		else
874
875
876
			wake_up_process(info->transaction_kthread);
	}

877
	if (trans->type & __TRANS_FREEZABLE)
878
		sb_end_intwrite(root->fs_info->sb);
879

880
	WARN_ON(cur_trans != info->running_transaction);
881
882
	WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
	atomic_dec(&cur_trans->num_writers);
883
	extwriter_counter_dec(cur_trans, trans->type);
884

885
886
887
	/*
	 * Make sure counter is updated before we wake up waiters.
	 */
888
	smp_mb();
Chris Mason's avatar
Chris Mason committed
889
890
	if (waitqueue_active(&cur_trans->writer_wait))
		wake_up(&cur_trans->writer_wait);
891
	btrfs_put_transaction(cur_trans);
Josef Bacik's avatar
Josef Bacik committed
892
893
894

	if (current->journal_info == trans)
		current->journal_info = NULL;
895

Yan, Zheng's avatar
Yan, Zheng committed
896
897
898
	if (throttle)
		btrfs_run_delayed_iputs(root);

899
	if (trans->aborted ||
900
901
	    test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
		wake_up_process(info->transaction_kthread);
902
		err = -EIO;
903
	}
904
	assert_qgroups_uptodate(trans);
905

906
	kmem_cache_free(btrfs_trans_handle_cachep, trans);
Chris Mason's avatar
Chris Mason committed
907
908
909
910
	if (must_run_delayed_refs) {
		btrfs_async_run_delayed_refs(root, cur,
					     must_run_delayed_refs == 1);
	}
911
	return err;
Chris Mason's avatar
Chris Mason committed
912
913
}

914
915
916
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root)
{
917
	return __btrfs_end_transaction(trans, root, 0);
918
919
920
921
922
}

int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
{
923
	return __btrfs_end_transaction(trans, root, 1);
924
925
}

Chris Mason's avatar
Chris Mason committed
926
927
928
/*
 * when btree blocks are allocated, they have some corresponding bits set for
 * them in one of two extent_io trees.  This is used to make sure all of
929
 * those extents are sent to disk but does not wait on them
Chris Mason's avatar
Chris Mason committed
930
 */
931
int btrfs_write_marked_extents(struct btrfs_root *root,
932
			       struct extent_io_tree *dirty_pages, int mark)
Chris Mason's avatar
Chris Mason committed
933
{
934
	int err = 0;