Commit e13d100b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "This fixes some lockups in btrfs reported with rc1.  It probably has
  some performance impact because it is backing off our spinning locks
  more often and switching to a blocking lock.  I'll be able to nail
  that down next week, but for now I want to get the lockups taken care
  of.

  Otherwise some more stack reduction and assorted fixes"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: fix wrong error handle when the device is missing or is not writeable
  Btrfs: fix deadlock when mounting a degraded fs
  Btrfs: use bio_endio_nodec instead of open code
  Btrfs: fix NULL pointer crash when running balance and scrub concurrently
  btrfs: Skip scrubbing removed chunks to avoid -ENOENT.
  Btrfs: fix broken free space cache after the system crashed
  Btrfs: make free space cache write out functions more readable
  Btrfs: remove unused wait queue in struct extent_buffer
  Btrfs: fix deadlocks with trylock on tree nodes
parents 147f1404 8408c716
......@@ -1259,11 +1259,19 @@ struct btrfs_block_group_cache {
spinlock_t lock;
u64 pinned;
u64 reserved;
u64 delalloc_bytes;
u64 bytes_super;
u64 flags;
u64 sectorsize;
u64 cache_generation;
/*
* It is just used for the delayed data space allocation because
* only the data space allocation and the relative metadata update
* can be done cross the transaction.
*/
struct rw_semaphore data_rwsem;
/* for raid56, this is a full stripe, without parity */
unsigned long full_stripe_len;
......@@ -3316,7 +3324,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_key *ins);
int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data);
struct btrfs_key *ins, int is_data, int delalloc);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref, int no_quota);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
......@@ -3330,7 +3338,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int no_quota);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len,
int delalloc);
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
u64 start, u64 len);
void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
......
......@@ -105,7 +105,8 @@ static int find_next_key(struct btrfs_path *path, int level,
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve);
u64 num_bytes, int reserve,
int delalloc);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_pin_extent(struct btrfs_root *root,
......@@ -3260,7 +3261,8 @@ again:
spin_lock(&block_group->lock);
if (block_group->cached != BTRFS_CACHE_FINISHED ||
!btrfs_test_opt(root, SPACE_CACHE)) {
!btrfs_test_opt(root, SPACE_CACHE) ||
block_group->delalloc_bytes) {
/*
* don't bother trying to write stuff out _if_
* a) we're not cached,
......@@ -5613,6 +5615,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
* @cache: The cache we are manipulating
* @num_bytes: The number of bytes in question
* @reserve: One of the reservation enums
* @delalloc: The blocks are allocated for the delalloc write
*
* This is called by the allocator when it reserves space, or by somebody who is
* freeing space that was never actually used on disk. For example if you
......@@ -5631,7 +5634,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
* succeeds.
*/
static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve)
u64 num_bytes, int reserve, int delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
int ret = 0;
......@@ -5650,12 +5653,18 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
num_bytes, 0);
space_info->bytes_may_use -= num_bytes;
}
if (delalloc)
cache->delalloc_bytes += num_bytes;
}
} else {
if (cache->ro)
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
if (delalloc)
cache->delalloc_bytes -= num_bytes;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
......@@ -6206,7 +6215,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
btrfs_add_free_space(cache, buf->start, buf->len);
btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0;
}
......@@ -6365,6 +6374,70 @@ enum btrfs_loop_type {
LOOP_NO_EMPTY_SIZE = 3,
};
static inline void
btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
int delalloc)
{
if (delalloc)
down_read(&cache->data_rwsem);
}
static inline void
btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
int delalloc)
{
btrfs_get_block_group(cache);
if (delalloc)
down_read(&cache->data_rwsem);
}
static struct btrfs_block_group_cache *
btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
int delalloc)
{
struct btrfs_block_group_cache *used_bg;
bool locked = false;
again:
spin_lock(&cluster->refill_lock);
if (locked) {
if (used_bg == cluster->block_group)
return used_bg;
up_read(&used_bg->data_rwsem);
btrfs_put_block_group(used_bg);
}
used_bg = cluster->block_group;
if (!used_bg)
return NULL;
if (used_bg == block_group)
return used_bg;
btrfs_get_block_group(used_bg);
if (!delalloc)
return used_bg;
if (down_read_trylock(&used_bg->data_rwsem))
return used_bg;
spin_unlock(&cluster->refill_lock);
down_read(&used_bg->data_rwsem);
locked = true;
goto again;
}
static inline void
btrfs_release_block_group(struct btrfs_block_group_cache *cache,
int delalloc)
{
if (delalloc)
up_read(&cache->data_rwsem);
btrfs_put_block_group(cache);
}
/*
* walks the btree of allocated extents and find a hole of a given size.
* The key ins is changed to record the hole:
......@@ -6379,7 +6452,7 @@ enum btrfs_loop_type {
static noinline int find_free_extent(struct btrfs_root *orig_root,
u64 num_bytes, u64 empty_size,
u64 hint_byte, struct btrfs_key *ins,
u64 flags)
u64 flags, int delalloc)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
......@@ -6467,6 +6540,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
up_read(&space_info->groups_sem);
} else {
index = get_block_group_index(block_group);
btrfs_lock_block_group(block_group, delalloc);
goto have_block_group;
}
} else if (block_group) {
......@@ -6481,7 +6555,7 @@ search:
u64 offset;
int cached;
btrfs_get_block_group(block_group);
btrfs_grab_block_group(block_group, delalloc);
search_start = block_group->key.objectid;
/*
......@@ -6529,16 +6603,16 @@ have_block_group:
* the refill lock keeps out other
* people trying to start a new cluster
*/
spin_lock(&last_ptr->refill_lock);
used_block_group = last_ptr->block_group;
if (used_block_group != block_group &&
(!used_block_group ||
used_block_group->ro ||
!block_group_bits(used_block_group, flags)))
used_block_group = btrfs_lock_cluster(block_group,
last_ptr,
delalloc);
if (!used_block_group)
goto refill_cluster;
if (used_block_group != block_group)
btrfs_get_block_group(used_block_group);
if (used_block_group != block_group &&
(used_block_group->ro ||
!block_group_bits(used_block_group, flags)))
goto release_cluster;
offset = btrfs_alloc_from_cluster(used_block_group,
last_ptr,
......@@ -6552,16 +6626,15 @@ have_block_group:
used_block_group,
search_start, num_bytes);
if (used_block_group != block_group) {
btrfs_put_block_group(block_group);
btrfs_release_block_group(block_group,
delalloc);
block_group = used_block_group;
}
goto checks;
}
WARN_ON(last_ptr->block_group != used_block_group);
if (used_block_group != block_group)
btrfs_put_block_group(used_block_group);
refill_cluster:
release_cluster:
/* If we are on LOOP_NO_EMPTY_SIZE, we can't
* set up a new clusters, so lets just skip it
* and let the allocator find whatever block
......@@ -6578,8 +6651,10 @@ refill_cluster:
* succeeding in the unclustered
* allocation. */
if (loop >= LOOP_NO_EMPTY_SIZE &&
last_ptr->block_group != block_group) {
used_block_group != block_group) {
spin_unlock(&last_ptr->refill_lock);
btrfs_release_block_group(used_block_group,
delalloc);
goto unclustered_alloc;
}
......@@ -6589,6 +6664,10 @@ refill_cluster:
*/
btrfs_return_cluster_to_free_space(NULL, last_ptr);
if (used_block_group != block_group)
btrfs_release_block_group(used_block_group,
delalloc);
refill_cluster:
if (loop >= LOOP_NO_EMPTY_SIZE) {
spin_unlock(&last_ptr->refill_lock);
goto unclustered_alloc;
......@@ -6696,7 +6775,7 @@ checks:
BUG_ON(offset > search_start);
ret = btrfs_update_reserved_bytes(block_group, num_bytes,
alloc_type);
alloc_type, delalloc);
if (ret == -EAGAIN) {
btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
......@@ -6708,13 +6787,13 @@ checks:
trace_btrfs_reserve_extent(orig_root, block_group,
search_start, num_bytes);
btrfs_put_block_group(block_group);
btrfs_release_block_group(block_group, delalloc);
break;
loop:
failed_cluster_refill = false;
failed_alloc = false;
BUG_ON(index != get_block_group_index(block_group));
btrfs_put_block_group(block_group);
btrfs_release_block_group(block_group, delalloc);
}
up_read(&space_info->groups_sem);
......@@ -6827,7 +6906,7 @@ again:
int btrfs_reserve_extent(struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data)
struct btrfs_key *ins, int is_data, int delalloc)
{
bool final_tried = false;
u64 flags;
......@@ -6837,7 +6916,7 @@ int btrfs_reserve_extent(struct btrfs_root *root,
again:
WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
flags);
flags, delalloc);
if (ret == -ENOSPC) {
if (!final_tried && ins->offset) {
......@@ -6862,7 +6941,8 @@ again:
}
static int __btrfs_free_reserved_extent(struct btrfs_root *root,
u64 start, u64 len, int pin)
u64 start, u64 len,
int pin, int delalloc)
{
struct btrfs_block_group_cache *cache;
int ret = 0;
......@@ -6881,7 +6961,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
pin_down_extent(root, cache, start, len, 1);
else {
btrfs_add_free_space(cache, start, len);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
}
btrfs_put_block_group(cache);
......@@ -6891,15 +6971,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
}
int btrfs_free_reserved_extent(struct btrfs_root *root,
u64 start, u64 len)
u64 start, u64 len, int delalloc)
{
return __btrfs_free_reserved_extent(root, start, len, 0);
return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
}
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
u64 start, u64 len)
{
return __btrfs_free_reserved_extent(root, start, len, 1);
return __btrfs_free_reserved_extent(root, start, len, 1, 0);
}
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
......@@ -7114,7 +7194,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
return -EINVAL;
ret = btrfs_update_reserved_bytes(block_group, ins->offset,
RESERVE_ALLOC_NO_ACCOUNT);
RESERVE_ALLOC_NO_ACCOUNT, 0);
BUG_ON(ret); /* logic error */
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
......@@ -7256,7 +7336,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
return ERR_CAST(block_rsv);
ret = btrfs_reserve_extent(root, blocksize, blocksize,
empty_size, hint, &ins, 0);
empty_size, hint, &ins, 0, 0);
if (ret) {
unuse_block_rsv(root->fs_info, block_rsv, blocksize);
return ERR_PTR(ret);
......@@ -8659,6 +8739,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
start);
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
init_rwsem(&cache->data_rwsem);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
INIT_LIST_HEAD(&cache->new_bg_list);
......
......@@ -158,7 +158,6 @@ struct extent_buffer {
* to unlock
*/
wait_queue_head_t read_lock_wq;
wait_queue_head_t lock_wq;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
......
......@@ -75,6 +75,8 @@ void free_extent_map(struct extent_map *em)
if (atomic_dec_and_test(&em->refs)) {
WARN_ON(extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
kfree(em->bdev);
kmem_cache_free(extent_map_cache, em);
}
}
......
......@@ -15,6 +15,7 @@
#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
#define EXTENT_FLAG_LOGGING 4 /* Logging this extent */
#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */
#define EXTENT_FLAG_FS_MAPPING 6 /* filesystem extent mapping type */
struct extent_map {
struct rb_node rb_node;
......
......@@ -274,18 +274,32 @@ struct io_ctl {
};
static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
struct btrfs_root *root)
struct btrfs_root *root, int write)
{
int num_pages;
int check_crcs = 0;
num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
check_crcs = 1;
/* Make sure we can fit our crcs into the first page */
if (write && check_crcs &&
(num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
return -ENOSPC;
memset(io_ctl, 0, sizeof(struct io_ctl));
io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
GFP_NOFS);
io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
if (!io_ctl->pages)
return -ENOMEM;
io_ctl->num_pages = num_pages;
io_ctl->root = root;
if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
io_ctl->check_crcs = 1;
io_ctl->check_crcs = check_crcs;
return 0;
}
......@@ -666,6 +680,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
generation = btrfs_free_space_generation(leaf, header);
btrfs_release_path(path);
if (!BTRFS_I(inode)->generation) {
btrfs_info(root->fs_info,
"The free space cache file (%llu) is invalid. skip it\n",
offset);
return 0;
}
if (BTRFS_I(inode)->generation != generation) {
btrfs_err(root->fs_info,
"free space inode generation (%llu) "
......@@ -677,7 +698,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
if (!num_entries)
return 0;
ret = io_ctl_init(&io_ctl, inode, root);
ret = io_ctl_init(&io_ctl, inode, root, 0);
if (ret)
return ret;
......@@ -957,19 +978,18 @@ fail:
}
static noinline_for_stack int
add_ioctl_entries(struct btrfs_root *root,
struct inode *inode,
struct btrfs_block_group_cache *block_group,
struct io_ctl *io_ctl,
struct extent_state **cached_state,
struct list_head *bitmap_list,
int *entries)
write_pinned_extent_entries(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group,
struct io_ctl *io_ctl,
int *entries)
{
u64 start, extent_start, extent_end, len;
struct list_head *pos, *n;
struct extent_io_tree *unpin = NULL;
int ret;
if (!block_group)
return 0;
/*
* We want to add any pinned extents to our free space cache
* so we don't leak the space
......@@ -979,23 +999,19 @@ add_ioctl_entries(struct btrfs_root *root,
*/
unpin = root->fs_info->pinned_extents;
if (block_group)
start = block_group->key.objectid;
start = block_group->key.objectid;
while (block_group && (start < block_group->key.objectid +
block_group->key.offset)) {
while (start < block_group->key.objectid + block_group->key.offset) {
ret = find_first_extent_bit(unpin, start,
&extent_start, &extent_end,
EXTENT_DIRTY, NULL);
if (ret) {
ret = 0;
break;
}
if (ret)
return 0;
/* This pinned extent is out of our range */
if (extent_start >= block_group->key.objectid +
block_group->key.offset)
break;
return 0;
extent_start = max(extent_start, start);
extent_end = min(block_group->key.objectid +
......@@ -1005,11 +1021,20 @@ add_ioctl_entries(struct btrfs_root *root,
*entries += 1;
ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
if (ret)
goto out_nospc;
return -ENOSPC;
start = extent_end;
}
return 0;
}
static noinline_for_stack int
write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list)
{
struct list_head *pos, *n;
int ret;
/* Write out the bitmaps */
list_for_each_safe(pos, n, bitmap_list) {
struct btrfs_free_space *entry =
......@@ -1017,36 +1042,24 @@ add_ioctl_entries(struct btrfs_root *root,
ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
if (ret)
goto out_nospc;
return -ENOSPC;
list_del_init(&entry->list);
}
/* Zero out the rest of the pages just to make sure */
io_ctl_zero_remaining_pages(io_ctl);
ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages,
0, i_size_read(inode), cached_state);
io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, cached_state, GFP_NOFS);
return 0;
}
if (ret)
goto fail;
static int flush_dirty_cache(struct inode *inode)
{
int ret;
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (ret) {
if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
GFP_NOFS);
goto fail;
}
return 0;
fail:
return -1;
out_nospc:
return -ENOSPC;
return ret;
}
static void noinline_for_stack
......@@ -1056,6 +1069,7 @@ cleanup_write_cache_enospc(struct inode *inode,
struct list_head *bitmap_list)
{
struct list_head *pos, *n;
list_for_each_safe(pos, n, bitmap_list) {
struct btrfs_free_space *entry =
list_entry(pos, struct btrfs_free_space, list);
......@@ -1088,64 +1102,104 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
{
struct extent_state *cached_state = NULL;
struct io_ctl io_ctl;
struct list_head bitmap_list;
LIST_HEAD(bitmap_list);
int entries = 0;
int bitmaps = 0;
int ret;
int err = -1;
INIT_LIST_HEAD(&bitmap_list);
if (!i_size_read(inode))
return -1;
ret = io_ctl_init(&io_ctl, inode, root);
ret = io_ctl_init(&io_ctl, inode, root, 1);
if (ret)
return -1;
if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
down_write(&block_group->data_rwsem);
spin_lock(&block_group->lock);
if (block_group->delalloc_bytes) {
block_group->disk_cache_state = BTRFS_DC_WRITTEN;
spin_unlock(&block_group->lock);
up_write(&block_group->data_rwsem);
BTRFS_I(inode)->generation = 0;
ret = 0;
goto out;
}
spin_unlock(&block_group->lock);
}
/* Lock all pages first so we can lock the extent safely. */
io_ctl_prepare_pages(&io_ctl, inode, 0);
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,