Commit 817d52f8 authored by Josef Bacik's avatar Josef Bacik Committed by Chris Mason
Browse files

Btrfs: async block group caching



This patch moves the caching of the block group off to a kthread in order to
allow people to allocate sooner.  Instead of blocking up behind the caching
mutex, we instead kick of the caching kthread, and then attempt to make an
allocation.  If we cannot, we wait on the block groups caching waitqueue, which
the caching kthread will wake the waiting threads up everytime it finds 2 meg
worth of space, and then again when its finished caching.  This is how I tested
the speedup from this

mkfs the disk
mount the disk
fill the disk up with fs_mark
unmount the disk
mount the disk
time touch /mnt/foo

Without my changes this took 11 seconds on my box, with these changes it now
takes 1 second.

Another change thats been put in place is we lock the super mirror's in the
pinned extent map in order to keep us from adding that stuff as free space when
caching the block group.  This doesn't really change anything else as far as the
pinned extent map is concerned, since for actual pinned extents we use
EXTENT_DIRTY, but it does mean that when we unmount we have to go in and unlock
those extents to keep from leaking memory.

I've also added a check where when we are reading block groups from disk, if the
amount of space used == the size of the block group, we go ahead and mark the
block group as cached.  This drastically reduces the amount of time it takes to
cache the block groups.  Using the same test as above, except doing a dd to a
file and then unmounting, it used to take 33 seconds to umount, now it takes 3
seconds.

This version uses the commit_root in the caching kthread, and then keeps track
of how many async caching threads are running at any given time so if one of the
async threads is still running as we cross transactions we can wait until its
finished before handling the pinned extents.  Thank you,
Signed-off-by: default avatarJosef Bacik <jbacik@redhat.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 96303081
......@@ -691,6 +691,7 @@ struct btrfs_space_info {
struct list_head block_groups;
spinlock_t lock;
struct rw_semaphore groups_sem;
atomic_t caching_threads;
};
/*
......@@ -721,11 +722,17 @@ struct btrfs_free_cluster {
struct list_head block_group_list;
};
enum btrfs_caching_type {
BTRFS_CACHE_NO = 0,
BTRFS_CACHE_STARTED = 1,
BTRFS_CACHE_FINISHED = 2,
};
struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
struct btrfs_fs_info *fs_info;
spinlock_t lock;
struct mutex cache_mutex;
u64 pinned;
u64 reserved;
u64 flags;
......@@ -733,15 +740,19 @@ struct btrfs_block_group_cache {
int extents_thresh;
int free_extents;
int total_bitmaps;
int cached;
int ro;
int dirty;
/* cache tracking stuff */
wait_queue_head_t caching_q;
int cached;
struct btrfs_space_info *space_info;
/* free space cache stuff */
spinlock_t tree_lock;
struct rb_root free_space_offset;
u64 free_space;
/* block group cache stuff */
struct rb_node cache_node;
......@@ -834,6 +845,7 @@ struct btrfs_fs_info {
atomic_t async_submit_draining;
atomic_t nr_async_bios;
atomic_t async_delalloc_pages;
atomic_t async_caching_threads;
/*
* this is used by the balancing code to wait for all the pending
......@@ -950,6 +962,9 @@ struct btrfs_root {
/* the node lock is held while changing the node pointer */
spinlock_t node_lock;
/* taken when updating the commit root */
struct rw_semaphore commit_root_sem;
struct extent_buffer *commit_root;
struct btrfs_root *log_root;
struct btrfs_root *reloc_root;
......@@ -1911,7 +1926,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_update_pinned_extents(struct btrfs_root *root,
u64 bytenr, u64 num, int pin);
u64 bytenr, u64 num, int pin, int mark_free);
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *leaf);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
......@@ -1996,6 +2011,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_free_super_mirror_extents(struct btrfs_fs_info *info);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
......
......@@ -907,6 +907,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
spin_lock_init(&root->inode_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
init_rwsem(&root->commit_root_sem);
init_waitqueue_head(&root->log_writer_wait);
init_waitqueue_head(&root->log_commit_wait[0]);
init_waitqueue_head(&root->log_commit_wait[1]);
......@@ -1566,6 +1567,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0);
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->async_caching_threads, 0);
fs_info->sb = sb;
fs_info->max_extent = (u64)-1;
fs_info->max_inline = 8192 * 1024;
......@@ -2337,6 +2339,7 @@ int close_ctree(struct btrfs_root *root)
free_extent_buffer(root->fs_info->csum_root->commit_root);
btrfs_free_block_groups(root->fs_info);
btrfs_free_super_mirror_extents(root->fs_info);
del_fs_roots(fs_info);
......
This diff is collapsed.
......@@ -238,6 +238,7 @@ static void unlink_free_space(struct btrfs_block_group_cache *block_group,
{
rb_erase(&info->offset_index, &block_group->free_space_offset);
block_group->free_extents--;
block_group->free_space -= info->bytes;
}
static int link_free_space(struct btrfs_block_group_cache *block_group,
......@@ -251,6 +252,7 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
if (ret)
return ret;
block_group->free_space += info->bytes;
block_group->free_extents++;
return ret;
}
......@@ -285,36 +287,40 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
}
}
static void bitmap_clear_bits(struct btrfs_free_space *info, u64 offset, u64 bytes,
u64 sectorsize)
static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *info, u64 offset,
u64 bytes)
{
unsigned long start, end;
unsigned long i;
start = offset_to_bit(info->offset, sectorsize, offset);
end = start + bytes_to_bits(bytes, sectorsize);
start = offset_to_bit(info->offset, block_group->sectorsize, offset);
end = start + bytes_to_bits(bytes, block_group->sectorsize);
BUG_ON(end > BITS_PER_BITMAP);
for (i = start; i < end; i++)
clear_bit(i, info->bitmap);
info->bytes -= bytes;
block_group->free_space -= bytes;
}
static void bitmap_set_bits(struct btrfs_free_space *info, u64 offset, u64 bytes,
u64 sectorsize)
static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *info, u64 offset,
u64 bytes)
{
unsigned long start, end;
unsigned long i;
start = offset_to_bit(info->offset, sectorsize, offset);
end = start + bytes_to_bits(bytes, sectorsize);
start = offset_to_bit(info->offset, block_group->sectorsize, offset);
end = start + bytes_to_bits(bytes, block_group->sectorsize);
BUG_ON(end > BITS_PER_BITMAP);
for (i = start; i < end; i++)
set_bit(i, info->bitmap);
info->bytes += bytes;
block_group->free_space += bytes;
}
static int search_bitmap(struct btrfs_block_group_cache *block_group,
......@@ -414,13 +420,12 @@ again:
(u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
if (*offset > bitmap_info->offset && *offset + *bytes > end) {
bitmap_clear_bits(bitmap_info, *offset,
end - *offset + 1, block_group->sectorsize);
bitmap_clear_bits(block_group, bitmap_info, *offset,
end - *offset + 1);
*bytes -= end - *offset + 1;
*offset = end + 1;
} else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
bitmap_clear_bits(bitmap_info, *offset,
*bytes, block_group->sectorsize);
bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
*bytes = 0;
}
......@@ -495,14 +500,13 @@ again:
(u64)(BITS_PER_BITMAP * block_group->sectorsize);
if (offset >= bitmap_info->offset && offset + bytes > end) {
bitmap_set_bits(bitmap_info, offset, end - offset,
block_group->sectorsize);
bitmap_set_bits(block_group, bitmap_info, offset,
end - offset);
bytes -= end - offset;
offset = end;
added = 0;
} else if (offset >= bitmap_info->offset && offset + bytes <= end) {
bitmap_set_bits(bitmap_info, offset, bytes,
block_group->sectorsize);
bitmap_set_bits(block_group, bitmap_info, offset, bytes);
bytes = 0;
} else {
BUG();
......@@ -870,8 +874,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
ret = offset;
if (entry->bitmap) {
bitmap_clear_bits(entry, offset, bytes,
block_group->sectorsize);
bitmap_clear_bits(block_group, entry, offset, bytes);
if (!entry->bytes) {
unlink_free_space(block_group, entry);
kfree(entry->bitmap);
......@@ -891,6 +894,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
out:
spin_unlock(&block_group->tree_lock);
return ret;
}
......@@ -967,7 +971,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
goto out;
ret = search_start;
bitmap_clear_bits(entry, ret, bytes, block_group->sectorsize);
bitmap_clear_bits(block_group, entry, ret, bytes);
out:
spin_unlock(&cluster->lock);
spin_unlock(&block_group->tree_lock);
......
......@@ -40,6 +40,14 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
}
}
static noinline void switch_commit_root(struct btrfs_root *root)
{
down_write(&root->commit_root_sem);
free_extent_buffer(root->commit_root);
root->commit_root = btrfs_root_node(root);
up_write(&root->commit_root_sem);
}
/*
* either allocate a new transaction or hop into the existing one
*/
......@@ -458,8 +466,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
ret = btrfs_write_dirty_block_groups(trans, root);
BUG_ON(ret);
}
free_extent_buffer(root->commit_root);
root->commit_root = btrfs_root_node(root);
switch_commit_root(root);
return 0;
}
......@@ -537,8 +544,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
btrfs_update_reloc_root(trans, root);
if (root->commit_root != root->node) {
free_extent_buffer(root->commit_root);
root->commit_root = btrfs_root_node(root);
switch_commit_root(root);
btrfs_set_root_node(&root->root_item,
root->node);
}
......@@ -1002,15 +1008,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_set_root_node(&root->fs_info->tree_root->root_item,
root->fs_info->tree_root->node);
free_extent_buffer(root->fs_info->tree_root->commit_root);
root->fs_info->tree_root->commit_root =
btrfs_root_node(root->fs_info->tree_root);
switch_commit_root(root->fs_info->tree_root);
btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
root->fs_info->chunk_root->node);
free_extent_buffer(root->fs_info->chunk_root->commit_root);
root->fs_info->chunk_root->commit_root =
btrfs_root_node(root->fs_info->chunk_root);
switch_commit_root(root->fs_info->chunk_root);
update_super_roots(root);
......@@ -1050,6 +1052,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
cur_trans->commit_done = 1;
root->fs_info->last_trans_committed = cur_trans->transid;
wake_up(&cur_trans->commit_wait);
put_transaction(cur_trans);
......
......@@ -264,7 +264,7 @@ static int process_one_buffer(struct btrfs_root *log,
{
if (wc->pin)
btrfs_update_pinned_extents(log->fs_info->extent_root,
eb->start, eb->len, 1);
eb->start, eb->len, 1, 0);
if (btrfs_buffer_uptodate(eb, gen)) {
if (wc->write)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment