Commit 0aefda3e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "These are scattered fixes and one performance improvement.  The
  biggest functional change is in how we throttle metadata changes.  The
  new code bumps our average file creation rate up by ~13% in fs_mark,
  and lowers CPU usage.

  Stefan bisected out a regression in our allocation code that made
  balance loop on extents larger than 256MB."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: improve the delayed inode throttling
  Btrfs: fix a mismerge in btrfs_balance()
  Btrfs: enforce min_bytes parameter during extent allocation
  Btrfs: allow running defrag in parallel to administrative tasks
  Btrfs: avoid deadlock on transaction waiting list
  Btrfs: do not BUG_ON on aborted situation
  Btrfs: do not BUG_ON in prepare_to_reloc
  Btrfs: free all recorded tree blocks on error
  Btrfs: build up error handling for merge_reloc_roots
  Btrfs: check for NULL pointer in updating reloc roots
  Btrfs: fix unclosed transaction handler when the async transaction commitment fails
  Btrfs: fix wrong handle at error path of create_snapshot() when the commit fails
  Btrfs: use set_nlink if our i_nlink is 0
parents 2ef39204 de3cb945
......@@ -22,8 +22,9 @@
#include "disk-io.h"
#include "transaction.h"
#define BTRFS_DELAYED_WRITEBACK 400
#define BTRFS_DELAYED_BACKGROUND 100
#define BTRFS_DELAYED_WRITEBACK 512
#define BTRFS_DELAYED_BACKGROUND 128
#define BTRFS_DELAYED_BATCH 16
static struct kmem_cache *delayed_node_cache;
......@@ -494,6 +495,15 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
BTRFS_DELAYED_DELETION_ITEM);
}
static void finish_one_item(struct btrfs_delayed_root *delayed_root)
{
int seq = atomic_inc_return(&delayed_root->items_seq);
if ((atomic_dec_return(&delayed_root->items) <
BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
waitqueue_active(&delayed_root->wait))
wake_up(&delayed_root->wait);
}
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
{
struct rb_root *root;
......@@ -512,10 +522,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
rb_erase(&delayed_item->rb_node, root);
delayed_item->delayed_node->count--;
if (atomic_dec_return(&delayed_root->items) <
BTRFS_DELAYED_BACKGROUND &&
waitqueue_active(&delayed_root->wait))
wake_up(&delayed_root->wait);
finish_one_item(delayed_root);
}
static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
......@@ -1056,10 +1064,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
delayed_node->count--;
delayed_root = delayed_node->root->fs_info->delayed_root;
if (atomic_dec_return(&delayed_root->items) <
BTRFS_DELAYED_BACKGROUND &&
waitqueue_active(&delayed_root->wait))
wake_up(&delayed_root->wait);
finish_one_item(delayed_root);
}
}
......@@ -1304,35 +1309,44 @@ void btrfs_remove_delayed_node(struct inode *inode)
btrfs_release_delayed_node(delayed_node);
}
struct btrfs_async_delayed_node {
struct btrfs_root *root;
struct btrfs_delayed_node *delayed_node;
struct btrfs_async_delayed_work {
struct btrfs_delayed_root *delayed_root;
int nr;
struct btrfs_work work;
};
static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
static void btrfs_async_run_delayed_root(struct btrfs_work *work)
{
struct btrfs_async_delayed_node *async_node;
struct btrfs_async_delayed_work *async_work;
struct btrfs_delayed_root *delayed_root;
struct btrfs_trans_handle *trans;
struct btrfs_path *path;
struct btrfs_delayed_node *delayed_node = NULL;
struct btrfs_root *root;
struct btrfs_block_rsv *block_rsv;
int need_requeue = 0;
int total_done = 0;
async_node = container_of(work, struct btrfs_async_delayed_node, work);
async_work = container_of(work, struct btrfs_async_delayed_work, work);
delayed_root = async_work->delayed_root;
path = btrfs_alloc_path();
if (!path)
goto out;
path->leave_spinning = 1;
delayed_node = async_node->delayed_node;
again:
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
goto free_path;
delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
if (!delayed_node)
goto free_path;
path->leave_spinning = 1;
root = delayed_node->root;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
goto free_path;
goto release_path;
block_rsv = trans->block_rsv;
trans->block_rsv = &root->fs_info->delayed_block_rsv;
......@@ -1363,57 +1377,47 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
* Task1 will sleep until the transaction is commited.
*/
mutex_lock(&delayed_node->mutex);
if (delayed_node->count)
need_requeue = 1;
else
btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
delayed_node);
btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
mutex_unlock(&delayed_node->mutex);
trans->block_rsv = block_rsv;
btrfs_end_transaction_dmeta(trans, root);
btrfs_btree_balance_dirty_nodelay(root);
release_path:
btrfs_release_path(path);
total_done++;
btrfs_release_prepared_delayed_node(delayed_node);
if (async_work->nr == 0 || total_done < async_work->nr)
goto again;
free_path:
btrfs_free_path(path);
out:
if (need_requeue)
btrfs_requeue_work(&async_node->work);
else {
btrfs_release_prepared_delayed_node(delayed_node);
kfree(async_node);
}
wake_up(&delayed_root->wait);
kfree(async_work);
}
static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
struct btrfs_root *root, int all)
struct btrfs_root *root, int nr)
{
struct btrfs_async_delayed_node *async_node;
struct btrfs_delayed_node *curr;
int count = 0;
struct btrfs_async_delayed_work *async_work;
again:
curr = btrfs_first_prepared_delayed_node(delayed_root);
if (!curr)
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
return 0;
async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
if (!async_node) {
btrfs_release_prepared_delayed_node(curr);
async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
if (!async_work)
return -ENOMEM;
}
async_node->root = root;
async_node->delayed_node = curr;
async_node->work.func = btrfs_async_run_delayed_node_done;
async_node->work.flags = 0;
btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
count++;
if (all || count < 4)
goto again;
async_work->delayed_root = delayed_root;
async_work->work.func = btrfs_async_run_delayed_root;
async_work->work.flags = 0;
async_work->nr = nr;
btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work);
return 0;
}
......@@ -1424,30 +1428,55 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
WARN_ON(btrfs_first_delayed_node(delayed_root));
}
static int refs_newer(struct btrfs_delayed_root *delayed_root,
int seq, int count)
{
int val = atomic_read(&delayed_root->items_seq);
if (val < seq || val >= seq + count)
return 1;
return 0;
}
void btrfs_balance_delayed_items(struct btrfs_root *root)
{
struct btrfs_delayed_root *delayed_root;
int seq;
delayed_root = btrfs_get_delayed_root(root);
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
return;
seq = atomic_read(&delayed_root->items_seq);
if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
int ret;
ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
DEFINE_WAIT(__wait);
ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
if (ret)
return;
wait_event_interruptible_timeout(
delayed_root->wait,
(atomic_read(&delayed_root->items) <
BTRFS_DELAYED_BACKGROUND),
HZ);
return;
while (1) {
prepare_to_wait(&delayed_root->wait, &__wait,
TASK_INTERRUPTIBLE);
if (refs_newer(delayed_root, seq,
BTRFS_DELAYED_BATCH) ||
atomic_read(&delayed_root->items) <
BTRFS_DELAYED_BACKGROUND) {
break;
}
if (!signal_pending(current))
schedule();
else
break;
}
finish_wait(&delayed_root->wait, &__wait);
}
btrfs_wq_run_delayed_node(delayed_root, root, 0);
btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
}
/* Will return 0 or -ENOMEM */
......
......@@ -43,6 +43,7 @@ struct btrfs_delayed_root {
*/
struct list_head prepare_list;
atomic_t items; /* for delayed items */
atomic_t items_seq; /* for delayed items */
int nodes; /* for delayed nodes */
wait_queue_head_t wait;
};
......@@ -86,6 +87,7 @@ static inline void btrfs_init_delayed_root(
struct btrfs_delayed_root *delayed_root)
{
atomic_set(&delayed_root->items, 0);
atomic_set(&delayed_root->items_seq, 0);
delayed_root->nodes = 0;
spin_lock_init(&delayed_root->lock);
init_waitqueue_head(&delayed_root->wait);
......
......@@ -62,7 +62,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t);
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages,
......@@ -3687,7 +3687,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
return ret;
}
static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t)
{
struct btrfs_pending_snapshot *snapshot;
struct list_head splice;
......@@ -3700,10 +3700,8 @@ static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
snapshot = list_entry(splice.next,
struct btrfs_pending_snapshot,
list);
snapshot->error = -ECANCELED;
list_del_init(&snapshot->list);
kfree(snapshot);
}
}
......@@ -3840,6 +3838,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
cur_trans->blocked = 1;
wake_up(&root->fs_info->transaction_blocked_wait);
btrfs_evict_pending_snapshots(cur_trans);
cur_trans->blocked = 0;
wake_up(&root->fs_info->transaction_wait);
......@@ -3849,8 +3849,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
btrfs_destroy_delayed_inodes(root);
btrfs_assert_delayed_root_empty(root);
btrfs_destroy_pending_snapshots(cur_trans);
btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
EXTENT_DIRTY);
btrfs_destroy_pinned_extent(root,
......@@ -3894,6 +3892,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
wake_up(&root->fs_info->transaction_blocked_wait);
btrfs_evict_pending_snapshots(t);
t->blocked = 0;
smp_mb();
if (waitqueue_active(&root->fs_info->transaction_wait))
......@@ -3907,8 +3907,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
btrfs_destroy_delayed_inodes(root);
btrfs_assert_delayed_root_empty(root);
btrfs_destroy_pending_snapshots(t);
btrfs_destroy_delalloc_inodes(root);
spin_lock(&root->fs_info->trans_lock);
......
......@@ -8502,6 +8502,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
struct btrfs_key ins;
u64 cur_offset = start;
u64 i_size;
u64 cur_bytes;
int ret = 0;
bool own_trans = true;
......@@ -8516,8 +8517,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
}
}
ret = btrfs_reserve_extent(trans, root,
min(num_bytes, 256ULL * 1024 * 1024),
cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
cur_bytes = max(cur_bytes, min_size);
ret = btrfs_reserve_extent(trans, root, cur_bytes,
min_size, 0, *alloc_hint, &ins, 1);
if (ret) {
if (own_trans)
......
......@@ -527,6 +527,8 @@ fail:
if (async_transid) {
*async_transid = trans->transid;
err = btrfs_commit_transaction_async(trans, root, 1);
if (err)
err = btrfs_commit_transaction(trans, root);
} else {
err = btrfs_commit_transaction(trans, root);
}
......@@ -592,16 +594,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
*async_transid = trans->transid;
ret = btrfs_commit_transaction_async(trans,
root->fs_info->extent_root, 1);
if (ret)
ret = btrfs_commit_transaction(trans, root);
} else {
ret = btrfs_commit_transaction(trans,
root->fs_info->extent_root);
}
if (ret) {
/* cleanup_transaction has freed this for us */
if (trans->aborted)
pending_snapshot = NULL;
if (ret)
goto fail;
}
ret = pending_snapshot->error;
if (ret)
......@@ -2245,13 +2245,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
if (ret)
return ret;
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
mnt_drop_write_file(file);
return -EINVAL;
}
if (btrfs_root_readonly(root)) {
ret = -EROFS;
goto out;
......@@ -2306,7 +2299,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
ret = -EINVAL;
}
out:
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
mnt_drop_write_file(file);
return ret;
}
......
......@@ -1269,6 +1269,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
}
spin_unlock(&rc->reloc_root_tree.lock);
if (!node)
return 0;
BUG_ON((struct btrfs_root *)node->data != root);
if (!del) {
......@@ -2237,6 +2239,21 @@ again:
return err;
}
static noinline_for_stack
void free_reloc_roots(struct list_head *list)
{
struct btrfs_root *reloc_root;
while (!list_empty(list)) {
reloc_root = list_entry(list->next, struct btrfs_root,
root_list);
__update_reloc_root(reloc_root, 1);
free_extent_buffer(reloc_root->node);
free_extent_buffer(reloc_root->commit_root);
kfree(reloc_root);
}
}
static noinline_for_stack
int merge_reloc_roots(struct reloc_control *rc)
{
......@@ -2244,7 +2261,7 @@ int merge_reloc_roots(struct reloc_control *rc)
struct btrfs_root *reloc_root;
LIST_HEAD(reloc_roots);
int found = 0;
int ret;
int ret = 0;
again:
root = rc->extent_root;
......@@ -2270,20 +2287,33 @@ again:
BUG_ON(root->reloc_root != reloc_root);
ret = merge_reloc_root(rc, root);
BUG_ON(ret);
if (ret)
goto out;
} else {
list_del_init(&reloc_root->root_list);
}
ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
BUG_ON(ret < 0);
if (ret < 0) {
if (list_empty(&reloc_root->root_list))
list_add_tail(&reloc_root->root_list,
&reloc_roots);
goto out;
}
}
if (found) {
found = 0;
goto again;
}
out:
if (ret) {
btrfs_std_error(root->fs_info, ret);
if (!list_empty(&reloc_roots))
free_reloc_roots(&reloc_roots);
}
BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
return 0;
return ret;
}
static void free_block_list(struct rb_root *blocks)
......@@ -2818,8 +2848,10 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
int err = 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
if (!path) {
err = -ENOMEM;
goto out_path;
}
rb_node = rb_first(blocks);
while (rb_node) {
......@@ -2858,10 +2890,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
rb_node = rb_next(rb_node);
}
out:
free_block_list(blocks);
err = finish_pending_nodes(trans, rc, path, err);
btrfs_free_path(path);
out_path:
free_block_list(blocks);
return err;
}
......@@ -3698,7 +3731,15 @@ int prepare_to_relocate(struct reloc_control *rc)
set_reloc_control(rc);
trans = btrfs_join_transaction(rc->extent_root);
BUG_ON(IS_ERR(trans));
if (IS_ERR(trans)) {
unset_reloc_control(rc);
/*
* extent tree is not a ref_cow tree and has no reloc_root to
* cleanup. And callers are responsible to free the above
* block rsv.
*/
return PTR_ERR(trans);
}
btrfs_commit_transaction(trans, rc->extent_root);
return 0;
}
......@@ -3730,7 +3771,11 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
while (1) {
progress++;
trans = btrfs_start_transaction(rc->extent_root, 0);
BUG_ON(IS_ERR(trans));
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
trans = NULL;
break;
}
restart:
if (update_backref_cache(trans, &rc->backref_cache)) {
btrfs_end_transaction(trans, rc->extent_root);
......@@ -4264,14 +4309,9 @@ int btrfs_recover_relocation(struct btrfs_root *root)
out_free:
kfree(rc);
out:
while (!list_empty(&reloc_roots)) {
reloc_root = list_entry(reloc_roots.next,
struct btrfs_root, root_list);
list_del(&reloc_root->root_list);
free_extent_buffer(reloc_root->node);
free_extent_buffer(reloc_root->commit_root);
kfree(reloc_root);
}
if (!list_empty(&reloc_roots))
free_reloc_roots(&reloc_roots);
btrfs_free_path(path);
if (err == 0) {
......
......@@ -1052,7 +1052,12 @@ int btrfs_defrag_root(struct btrfs_root *root)
/*
* new snapshots need to be created at a very specific time in the
* transaction commit. This does the actual creation
* transaction commit. This does the actual creation.
*
* Note:
* If the error which may affect the commitment of the current transaction
* happens, we should return the error number. If the error which just affect
* the creation of the pending snapshots, just return 0.
*/
static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
......@@ -1071,7 +1076,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct extent_buffer *tmp;
struct extent_buffer *old;
struct timespec cur_time = CURRENT_TIME;
int ret;
int ret = 0;
u64 to_reserve = 0;
u64 index = 0;
u64 objectid;
......@@ -1080,40 +1085,36 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = pending->error = -ENOMEM;
return ret;
pending->error = -ENOMEM;
return 0;
}
new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
if (!new_root_item) {
ret = pending->error = -ENOMEM;
pending->error = -ENOMEM;
goto root_item_alloc_fail;
}
ret = btrfs_find_free_objectid(tree_root, &objectid);
if (ret) {
pending->error = ret;
pending->error = btrfs_find_free_objectid(tree_root, &objectid);
if (pending->error)
goto no_free_objectid;
}
btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
if (to_reserve > 0) {
ret = btrfs_block_rsv_add(root, &pending->block_rsv,
to_reserve,
BTRFS_RESERVE_NO_FLUSH);
if (ret) {
pending->error = ret;
pending->error = btrfs_block_rsv_add(root,
&pending->block_rsv,
to_reserve,
BTRFS_RESERVE_NO_FLUSH);
if (pending->error)
goto no_free_objectid;
}
}
ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid,
objectid, pending->inherit);
if (ret) {
pending->error = ret;
pending->error = btrfs_qgroup_inherit(trans, fs_info,
root->root_key.objectid,
objectid, pending->inherit);
if (pending->error)
goto no_free_objectid;
}