Commit a22285a6 authored by Yan, Zheng's avatar Yan, Zheng Committed by Chris Mason
Browse files

Btrfs: Integrate metadata reservation with start_transaction



Besides simplify the code, this change makes sure all metadata
reservation for normal metadata operations are released after
committing transaction.

Changes since V1:

Add code that check if unlink and rmdir will free space.

Add ENOSPC handling for clone ioctl.
Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent f0486c68
......@@ -34,6 +34,7 @@
struct btrfs_trans_handle;
struct btrfs_transaction;
struct btrfs_pending_snapshot;
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_transaction_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
......@@ -970,6 +971,7 @@ struct btrfs_fs_info {
int do_barriers;
int closing;
int log_root_recovering;
int enospc_unlink;
u64 total_pinned;
......@@ -1995,6 +1997,9 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags);
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num, int reserved);
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
......@@ -2075,8 +2080,6 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items);
int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items);
int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
struct inode *inode, int num_items);
int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
......@@ -2089,6 +2092,13 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items, int *retries);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
void btrfs_free_block_rsv(struct btrfs_root *root,
......@@ -2296,6 +2306,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, u64 *index);
struct btrfs_inode_ref *
btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, int mod);
int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid);
......
......@@ -318,107 +318,6 @@ out:
return ret;
}
/*
* helper function to lookup reference count and flags of extent.
*
* the head node for delayed ref is used to store the sum of all the
* reference count modifications queued up in the rbtree. the head
* node may also store the extent flags to set. This way you can check
* to see what the reference count and extent flags would be if all of
* the delayed refs are not processed.
*/
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags)
{
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_path *path;
struct btrfs_extent_item *ei;
struct extent_buffer *leaf;
struct btrfs_key key;
u32 item_size;
u64 num_refs;
u64 extent_flags;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
delayed_refs = &trans->transaction->delayed_refs;
again:
ret = btrfs_search_slot(trans, root->fs_info->extent_root,
&key, path, 0, 0);
if (ret < 0)
goto out;
if (ret == 0) {
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
if (item_size >= sizeof(*ei)) {
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item);
num_refs = btrfs_extent_refs(leaf, ei);
extent_flags = btrfs_extent_flags(leaf, ei);
} else {
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
struct btrfs_extent_item_v0 *ei0;
BUG_ON(item_size != sizeof(*ei0));
ei0 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item_v0);
num_refs = btrfs_extent_refs_v0(leaf, ei0);
/* FIXME: this isn't correct for data */
extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
#else
BUG();
#endif
}
BUG_ON(num_refs == 0);
} else {
num_refs = 0;
extent_flags = 0;
ret = 0;
}
spin_lock(&delayed_refs->lock);
ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
if (ref) {
head = btrfs_delayed_node_to_head(ref);
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&ref->refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(root->fs_info->extent_root, path);
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(ref);
goto again;
}
if (head->extent_op && head->extent_op->update_flags)
extent_flags |= head->extent_op->flags_to_set;
else
BUG_ON(num_refs == 0);
num_refs += ref->ref_mod;
mutex_unlock(&head->mutex);
}
WARN_ON(num_refs == 0);
if (refs)
*refs = num_refs;
if (flags)
*flags = extent_flags;
out:
spin_unlock(&delayed_refs->lock);
btrfs_free_path(path);
return ret;
}
/*
* helper function to update an extent delayed ref in the
* rbtree. existing and update must both have the same
......
......@@ -167,9 +167,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags);
int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 orig_parent,
u64 parent, u64 orig_ref_root, u64 ref_root,
......
......@@ -1522,7 +1522,7 @@ static int transaction_kthread(void *arg)
goto sleep;
}
mutex_unlock(&root->fs_info->trans_mutex);
trans = btrfs_start_transaction(root, 1);
trans = btrfs_join_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
sleep:
......@@ -2409,11 +2409,11 @@ int btrfs_commit_super(struct btrfs_root *root)
down_write(&root->fs_info->cleanup_work_sem);
up_write(&root->fs_info->cleanup_work_sem);
trans = btrfs_start_transaction(root, 1);
trans = btrfs_join_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
/* run commit again to drop the original snapshot */
trans = btrfs_start_transaction(root, 1);
trans = btrfs_join_transaction(root, 1);
btrfs_commit_transaction(trans, root);
ret = btrfs_write_and_wait_transaction(NULL, root);
BUG_ON(ret);
......
......@@ -615,6 +615,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
return ret;
}
/*
* helper function to lookup reference count and flags of extent.
*
* the head node for delayed ref is used to store the sum of all the
* reference count modifications queued up in the rbtree. the head
* node may also store the extent flags to set. This way you can check
* to see what the reference count and extent flags would be if all of
* the delayed refs are not processed.
*/
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags)
{
struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_path *path;
struct btrfs_extent_item *ei;
struct extent_buffer *leaf;
struct btrfs_key key;
u32 item_size;
u64 num_refs;
u64 extent_flags;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
if (!trans) {
path->skip_locking = 1;
path->search_commit_root = 1;
}
again:
ret = btrfs_search_slot(trans, root->fs_info->extent_root,
&key, path, 0, 0);
if (ret < 0)
goto out_free;
if (ret == 0) {
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
if (item_size >= sizeof(*ei)) {
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item);
num_refs = btrfs_extent_refs(leaf, ei);
extent_flags = btrfs_extent_flags(leaf, ei);
} else {
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
struct btrfs_extent_item_v0 *ei0;
BUG_ON(item_size != sizeof(*ei0));
ei0 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item_v0);
num_refs = btrfs_extent_refs_v0(leaf, ei0);
/* FIXME: this isn't correct for data */
extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
#else
BUG();
#endif
}
BUG_ON(num_refs == 0);
} else {
num_refs = 0;
extent_flags = 0;
ret = 0;
}
if (!trans)
goto out;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
head = btrfs_find_delayed_ref_head(trans, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(root->fs_info->extent_root, path);
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node);
goto again;
}
if (head->extent_op && head->extent_op->update_flags)
extent_flags |= head->extent_op->flags_to_set;
else
BUG_ON(num_refs == 0);
num_refs += head->node.ref_mod;
mutex_unlock(&head->mutex);
}
spin_unlock(&delayed_refs->lock);
out:
WARN_ON(num_refs == 0);
if (refs)
*refs = num_refs;
if (flags)
*flags = extent_flags;
out_free:
btrfs_free_path(path);
return ret;
}
/*
* Back reference rules. Back refs have three main goals:
*
......@@ -2948,113 +3055,6 @@ again:
return 0;
}
/*
* unreserve num_items number of items worth of metadata space. This needs to
* be paired with btrfs_reserve_metadata_space.
*
* NOTE: if you have the option, run this _AFTER_ you do a
* btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
* oprations which will result in more used metadata, so we want to make sure we
* can do that without issue.
*/
int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
{
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_space_info *meta_sinfo;
u64 num_bytes;
u64 alloc_target;
bool bug = false;
/* get the space info for where the metadata will live */
alloc_target = btrfs_get_alloc_profile(root, 0);
meta_sinfo = __find_space_info(info, alloc_target);
num_bytes = calculate_bytes_needed(root, num_items);
spin_lock(&meta_sinfo->lock);
if (meta_sinfo->bytes_may_use < num_bytes) {
bug = true;
meta_sinfo->bytes_may_use = 0;
} else {
meta_sinfo->bytes_may_use -= num_bytes;
}
spin_unlock(&meta_sinfo->lock);
BUG_ON(bug);
return 0;
}
/*
* Reserve some metadata space for use. We'll calculate the worste case number
* of bytes that would be needed to modify num_items number of items. If we
* have space, fantastic, if not, you get -ENOSPC. Please call
* btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
* items you reserved, since whatever metadata you needed should have already
* been allocated.
*
* This will commit the transaction to make more space if we don't have enough
* metadata space. THe only time we don't do this is if we're reserving space
* inside of a transaction, then we will just return -ENOSPC and it is the
* callers responsibility to handle it properly.
*/
int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
{
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_space_info *meta_sinfo;
u64 num_bytes;
u64 used;
u64 alloc_target;
int retries = 0;
/* get the space info for where the metadata will live */
alloc_target = btrfs_get_alloc_profile(root, 0);
meta_sinfo = __find_space_info(info, alloc_target);
num_bytes = calculate_bytes_needed(root, num_items);
again:
spin_lock(&meta_sinfo->lock);
if (unlikely(!meta_sinfo->bytes_root))
meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
if (!retries)
meta_sinfo->bytes_may_use += num_bytes;
used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
meta_sinfo->bytes_super + meta_sinfo->bytes_root +
meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
if (used > meta_sinfo->total_bytes) {
retries++;
if (retries == 1) {
if (maybe_allocate_chunk(NULL, root, meta_sinfo,
num_bytes))
goto again;
retries++;
} else {
spin_unlock(&meta_sinfo->lock);
}
if (retries == 2) {
shrink_delalloc(NULL, root, meta_sinfo, num_bytes);
goto again;
}
spin_lock(&meta_sinfo->lock);
meta_sinfo->bytes_may_use -= num_bytes;
spin_unlock(&meta_sinfo->lock);
dump_space_info(meta_sinfo, 0, 0);
return -ENOSPC;
}
check_force_delalloc(meta_sinfo);
spin_unlock(&meta_sinfo->lock);
return 0;
}
/*
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
......@@ -3095,9 +3095,9 @@ again:
spin_unlock(&data_sinfo->lock);
alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
trans = btrfs_start_transaction(root, 1);
if (!trans)
return -ENOMEM;
trans = btrfs_join_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
bytes + 2 * 1024 * 1024,
......@@ -3118,8 +3118,8 @@ alloc:
if (!committed && !root->fs_info->open_ioctl_trans) {
committed = 1;
trans = btrfs_join_transaction(root, 1);
if (!trans)
return -ENOMEM;
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans, root);
if (ret)
return ret;
......@@ -3701,6 +3701,59 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
}
static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
{
return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3 * num_items;
}
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items, int *retries)
{
u64 num_bytes;
int ret;
if (num_items == 0 || root->fs_info->chunk_root == root)
return 0;
num_bytes = calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
num_bytes, retries);
if (!ret) {
trans->bytes_reserved += num_bytes;
trans->block_rsv = &root->fs_info->trans_block_rsv;
}
return ret;
}
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (!trans->bytes_reserved)
return;
BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv);
btrfs_block_rsv_release(root, trans->block_rsv,
trans->bytes_reserved);
trans->bytes_reserved = 0;
}
int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending)
{
struct btrfs_root *root = pending->root;
struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
struct btrfs_block_rsv *dst_rsv = &pending->block_rsv;
/*
* two for root back/forward refs, two for directory entries
* and one for root of the snapshot.
*/
u64 num_bytes = calc_trans_metadata_size(root, 5);
dst_rsv->space_info = src_rsv->space_info;
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc)
......@@ -5824,7 +5877,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
wc = kzalloc(sizeof(*wc), GFP_NOFS);
BUG_ON(!wc);
trans = btrfs_start_transaction(tree_root, 1);
trans = btrfs_start_transaction(tree_root, 0);
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
level = btrfs_header_level(root->node);
......@@ -5920,7 +5973,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
BUG_ON(ret);
btrfs_end_transaction(trans, tree_root);
trans = btrfs_start_transaction(tree_root, 1);
trans = btrfs_start_transaction(tree_root, 0);
if (IS_ERR(trans))
return PTR_ERR(trans);
} else {
unsigned long update;
update = trans->delayed_ref_updates;
......
......@@ -126,8 +126,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
end_of_last_block = start_pos + num_bytes - 1;
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
NULL);
if (err)
return err;
BUG_ON(err);
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
......@@ -142,7 +141,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
* at this time.
*/
}
return err;
return 0;
}
/*
......@@ -1008,7 +1007,7 @@ out_nolock:
num_written = err;
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
trans = btrfs_start_transaction(root, 1);
trans = btrfs_start_transaction(root, 0);
ret = btrfs_log_dentry_safe(trans, root,
file->f_dentry);
if (ret == 0) {
......@@ -1104,9 +1103,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
if (file && file->private_data)
btrfs_ioctl_trans_end(file);
trans = btrfs_start_transaction(root, 1);
if (!trans) {
ret = -ENOMEM;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
......
......@@ -49,6 +49,33 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
return 0;
}
struct btrfs_inode_ref *
btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, int mod)
{
struct btrfs_key key;
struct btrfs_inode_ref *ref;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
int ret;
key.objectid = inode_objectid;
key.type = BTRFS_INODE_REF_KEY;
key.offset = ref_objectid;
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
return ERR_PTR(ret);
if (ret > 0)
return NULL;
if (!find_name_in_backref(path, name, name_len, &ref))
return NULL;
return ref;
}
int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
......
......@@ -2135,7 +2135,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)