Commit 890871be authored by Chris Mason's avatar Chris Mason
Browse files

Btrfs: switch extent_map to a rw lock



There are two main users of the extent_map tree.  The
first is regular file inodes, where it is evenly spread
between readers and writers.

The second is the chunk allocation tree, which maps blocks from
logical addresses to phyiscal ones, and it is 99.99% reads.

The mapping tree is a point of lock contention during heavy IO
workloads, so this commit switches things to a rw lock.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 57fd5a5f
......@@ -507,10 +507,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
*/
set_page_extent_mapped(page);
lock_extent(tree, last_offset, end, GFP_NOFS);
spin_lock(&em_tree->lock);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, last_offset,
PAGE_CACHE_SIZE);
spin_unlock(&em_tree->lock);
read_unlock(&em_tree->lock);
if (!em || last_offset < em->start ||
(last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
......@@ -594,11 +594,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
em_tree = &BTRFS_I(inode)->extent_tree;
/* we need the actual starting offset of this extent in the file */
spin_lock(&em_tree->lock);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree,
page_offset(bio->bi_io_vec->bv_page),
PAGE_CACHE_SIZE);
spin_unlock(&em_tree->lock);
read_unlock(&em_tree->lock);
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
......
......@@ -121,15 +121,15 @@ static struct extent_map *btree_get_extent(struct inode *inode,
struct extent_map *em;
int ret;
spin_lock(&em_tree->lock);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em) {
em->bdev =
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
spin_unlock(&em_tree->lock);
read_unlock(&em_tree->lock);
goto out;
}
spin_unlock(&em_tree->lock);
read_unlock(&em_tree->lock);
em = alloc_extent_map(GFP_NOFS);
if (!em) {
......@@ -142,7 +142,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
em->block_start = 0;
em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
spin_lock(&em_tree->lock);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
if (ret == -EEXIST) {
u64 failed_start = em->start;
......@@ -161,7 +161,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
free_extent_map(em);
em = NULL;
}
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
if (ret)
em = ERR_PTR(ret);
......@@ -1323,9 +1323,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
offset = page_offset(page);
em_tree = &BTRFS_I(inode)->extent_tree;
spin_lock(&em_tree->lock);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
spin_unlock(&em_tree->lock);
read_unlock(&em_tree->lock);
if (!em) {
__unplug_io_fn(bdi, page);
return;
......
......@@ -5396,9 +5396,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
while (1) {
int ret;
spin_lock(&em_tree->lock);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
......
......@@ -2786,15 +2786,15 @@ int try_release_extent_mapping(struct extent_map_tree *map,
u64 len;
while (start <= end) {
len = end - start + 1;
spin_lock(&map->lock);
write_lock(&map->lock);
em = lookup_extent_mapping(map, start, len);
if (!em || IS_ERR(em)) {
spin_unlock(&map->lock);
write_unlock(&map->lock);
break;
}
if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
em->start != start) {
spin_unlock(&map->lock);
write_unlock(&map->lock);
free_extent_map(em);
break;
}
......@@ -2808,7 +2808,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
free_extent_map(em);
}
start = extent_map_end(em);
spin_unlock(&map->lock);
write_unlock(&map->lock);
/* once for us */
free_extent_map(em);
......
......@@ -36,7 +36,7 @@ void extent_map_exit(void)
void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
{
tree->map.rb_node = NULL;
spin_lock_init(&tree->lock);
rwlock_init(&tree->lock);
}
/**
......@@ -222,7 +222,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
ret = -EEXIST;
goto out;
}
assert_spin_locked(&tree->lock);
rb = tree_insert(&tree->map, em->start, &em->rb_node);
if (rb) {
ret = -EEXIST;
......@@ -285,7 +284,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
struct rb_node *next = NULL;
u64 end = range_end(start, len);
assert_spin_locked(&tree->lock);
rb_node = __tree_search(&tree->map, start, &prev, &next);
if (!rb_node && prev) {
em = rb_entry(prev, struct extent_map, rb_node);
......@@ -331,7 +329,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
int ret = 0;
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
assert_spin_locked(&tree->lock);
rb_erase(&em->rb_node, &tree->map);
em->in_tree = 0;
return ret;
......
......@@ -31,7 +31,7 @@ struct extent_map {
struct extent_map_tree {
struct rb_root map;
spinlock_t lock;
rwlock_t lock;
};
static inline u64 extent_map_end(struct extent_map *em)
......
......@@ -188,15 +188,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
if (!split2)
split2 = alloc_extent_map(GFP_NOFS);
spin_lock(&em_tree->lock);
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (!em) {
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
break;
}
flags = em->flags;
if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
if (em->start <= start &&
(!testend || em->start + em->len >= start + len)) {
free_extent_map(em);
......@@ -259,7 +259,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
free_extent_map(split);
split = NULL;
}
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
/* once for us */
free_extent_map(em);
......
......@@ -612,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode,
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
while (1) {
spin_lock(&em_tree->lock);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
......@@ -748,9 +748,9 @@ static noinline int cow_file_range(struct inode *inode,
set_bit(EXTENT_FLAG_PINNED, &em->flags);
while (1) {
spin_lock(&em_tree->lock);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
......@@ -1081,9 +1081,9 @@ out_check:
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
while (1) {
spin_lock(&em_tree->lock);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
......@@ -1670,13 +1670,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
failrec->last_mirror = 0;
failrec->bio_flags = 0;
spin_lock(&em_tree->lock);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, failrec->len);
if (em->start > start || em->start + em->len < start) {
free_extent_map(em);
em = NULL;
}
spin_unlock(&em_tree->lock);
read_unlock(&em_tree->lock);
if (!em || IS_ERR(em)) {
kfree(failrec);
......@@ -4069,11 +4069,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
int compressed;
again:
spin_lock(&em_tree->lock);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em)
em->bdev = root->fs_info->fs_devices->latest_bdev;
spin_unlock(&em_tree->lock);
read_unlock(&em_tree->lock);
if (em) {
if (em->start > start || em->start + em->len <= start)
......@@ -4264,7 +4264,7 @@ insert:
}
err = 0;
spin_lock(&em_tree->lock);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
......@@ -4304,7 +4304,7 @@ insert:
err = 0;
}
}
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
out:
if (path)
btrfs_free_path(path);
......
......@@ -2646,9 +2646,9 @@ int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
while (1) {
int ret;
spin_lock(&em_tree->lock);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
......
......@@ -1749,9 +1749,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
* step two, delete the device extents and the
* chunk tree entries
*/
spin_lock(&em_tree->lock);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, chunk_offset, 1);
spin_unlock(&em_tree->lock);
read_unlock(&em_tree->lock);
BUG_ON(em->start > chunk_offset ||
em->start + em->len < chunk_offset);
......@@ -1780,9 +1780,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
BUG_ON(ret);
spin_lock(&em_tree->lock);
write_lock(&em_tree->lock);
remove_extent_mapping(em_tree, em);
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
kfree(map);
em->bdev = NULL;
......@@ -2294,9 +2294,9 @@ again:
em->block_len = em->len;
em_tree = &extent_root->fs_info->mapping_tree.map_tree;
spin_lock(&em_tree->lock);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
spin_unlock(&em_tree->lock);
write_unlock(&em_tree->lock);
BUG_ON(ret);
free_extent_map(em);
......@@ -2491,9 +2491,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
int readonly = 0;
int i;
spin_lock(&map_tree->map_tree.lock);
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
spin_unlock(&map_tree->map_tree.lock);
read_unlock(&map_tree->map_tree.lock);
if (!em)
return 1;
......@@ -2518,11 +2518,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
struct extent_map *em;
while (1) {
spin_lock(&tree->map_tree.lock);
write_lock(&tree->map_tree.lock);
em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
if (em)
remove_extent_mapping(&tree->map_tree, em);
spin_unlock(&tree->map_tree.lock);
write_unlock(&tree->map_tree.lock);
if (!em)
break;
kfree(em->bdev);
......@@ -2540,9 +2540,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
struct extent_map_tree *em_tree = &map_tree->map_tree;
int ret;
spin_lock(&em_tree->lock);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, len);
spin_unlock(&em_tree->lock);
read_unlock(&em_tree->lock);
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
......@@ -2604,9 +2604,9 @@ again:
atomic_set(&multi->error, 0);
}
spin_lock(&em_tree->lock);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, *length);
spin_unlock(&em_tree->lock);
read_unlock(&em_tree->lock);
if (!em && unplug_page)
return 0;
......@@ -2763,9 +2763,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
u64 stripe_nr;
int i, j, nr = 0;
spin_lock(&em_tree->lock);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, chunk_start, 1);
spin_unlock(&em_tree->lock);
read_unlock(&em_tree->lock);
BUG_ON(!em || em->start != chunk_start);
map = (struct map_lookup *)em->bdev;
......@@ -3053,9 +3053,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
spin_lock(&map_tree->map_tree.lock);
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
spin_unlock(&map_tree->map_tree.lock);
read_unlock(&map_tree->map_tree.lock);
/* already mapped? */
if (em && em->start <= logical && em->start + em->len > logical) {
......@@ -3114,9 +3114,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
map->stripes[i].dev->in_fs_metadata = 1;
}
spin_lock(&map_tree->map_tree.lock);
write_lock(&map_tree->map_tree.lock);
ret = add_extent_mapping(&map_tree->map_tree, em);
spin_unlock(&map_tree->map_tree.lock);
write_unlock(&map_tree->map_tree.lock);
BUG_ON(ret);
free_extent_map(em);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment