Commit f7b00693 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "This has our collection of bug fixes.  I missed the last rc because I
  thought our patches were making NFS crash during my xfs test runs.
  Turns out it was an NFS client bug fixed by someone else while I tried
  to bisect it.

  All of these fixes are small, but some are fairly high impact.  The
  biggest are fixes for our mount -o remount handling, a deadlock due to
  GFP_KERNEL allocations in readdir, and a RAID10 error handling bug.

  This was tested against both 3.3 and Linus' master as of this morning."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (26 commits)
  Btrfs: reduce lock contention during extent insertion
  Btrfs: avoid deadlocks from GFP_KERNEL allocations during btrfs_real_readdir
  Btrfs: Fix space checking during fs resize
  Btrfs: fix block_rsv and space_info lock ordering
  Btrfs: Prevent root_list corruption
  Btrfs: fix repair code for RAID10
  Btrfs: do not start delalloc inodes during sync
  Btrfs: fix that check_int_data mount option was ignored
  Btrfs: don't count CRC or header errors twice while scrubbing
  Btrfs: fix btrfs_ioctl_dev_info() crash on missing device
  btrfs: don't return EINTR
  Btrfs: double unlock bug in error handling
  Btrfs: always store the mirror we read the eb from
  fs/btrfs/volumes.c: add missing free_fs_devices
  btrfs: fix early abort in 'remount'
  Btrfs: fix max chunk size check in chunk allocator
  Btrfs: add missing read locks in backref.c
  Btrfs: don't call free_extent_buffer twice in iterate_irefs
  Btrfs: Make free_ipath() deal gracefully with NULL pointers
  Btrfs: avoid possible use-after-free in clear_extent_bit()
  ...
parents b990f9b3 dc7fdde3
......@@ -22,6 +22,7 @@
#include "ulist.h"
#include "transaction.h"
#include "delayed-ref.h"
#include "locking.h"
/*
* this structure records all encountered refs on the way up to the root
......@@ -893,18 +894,22 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
s64 bytes_left = size - 1;
struct extent_buffer *eb = eb_in;
struct btrfs_key found_key;
int leave_spinning = path->leave_spinning;
if (bytes_left >= 0)
dest[bytes_left] = '\0';
path->leave_spinning = 1;
while (1) {
len = btrfs_inode_ref_name_len(eb, iref);
bytes_left -= len;
if (bytes_left >= 0)
read_extent_buffer(eb, dest + bytes_left,
(unsigned long)(iref + 1), len);
if (eb != eb_in)
if (eb != eb_in) {
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
}
ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
if (ret > 0)
ret = -ENOENT;
......@@ -919,8 +924,11 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
slot = path->slots[0];
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
if (eb != eb_in)
if (eb != eb_in) {
atomic_inc(&eb->refs);
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
}
btrfs_release_path(path);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
......@@ -931,6 +939,7 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
}
btrfs_release_path(path);
path->leave_spinning = leave_spinning;
if (ret)
return ERR_PTR(ret);
......@@ -1247,7 +1256,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_path *path,
iterate_irefs_t *iterate, void *ctx)
{
int ret;
int ret = 0;
int slot;
u32 cur;
u32 len;
......@@ -1259,7 +1268,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_inode_ref *iref;
struct btrfs_key found_key;
while (1) {
while (!ret) {
path->leave_spinning = 1;
ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
&found_key);
if (ret < 0)
......@@ -1275,6 +1285,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
atomic_inc(&eb->refs);
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path);
item = btrfs_item_nr(eb, slot);
......@@ -1288,13 +1300,12 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
(unsigned long long)found_key.objectid,
(unsigned long long)fs_root->objectid);
ret = iterate(parent, iref, eb, ctx);
if (ret) {
free_extent_buffer(eb);
if (ret)
break;
}
len = sizeof(*iref) + name_len;
iref = (struct btrfs_inode_ref *)((char *)iref + len);
}
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
}
......@@ -1414,6 +1425,8 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
void free_ipath(struct inode_fs_paths *ipath)
{
if (!ipath)
return;
kfree(ipath->fspath);
kfree(ipath);
}
......@@ -1078,7 +1078,7 @@ struct btrfs_fs_info {
* is required instead of the faster short fsync log commits
*/
u64 last_trans_log_full_commit;
unsigned long mount_opt:21;
unsigned long mount_opt;
unsigned long compress_type:4;
u64 max_inline;
u64 alloc_start;
......
......@@ -383,17 +383,16 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
break;
if (!failed_mirror) {
failed = 1;
printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror);
failed_mirror = eb->failed_mirror;
}
num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
eb->start, eb->len);
if (num_copies == 1)
break;
if (!failed_mirror) {
failed = 1;
failed_mirror = eb->read_mirror;
}
mirror_num++;
if (mirror_num == failed_mirror)
mirror_num++;
......@@ -564,7 +563,7 @@ struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
}
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
struct extent_state *state, int mirror)
{
struct extent_io_tree *tree;
u64 found_start;
......@@ -589,6 +588,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
if (!reads_done)
goto err;
eb->read_mirror = mirror;
if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
ret = -EIO;
goto err;
......@@ -652,7 +652,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
eb = (struct extent_buffer *)page->private;
set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
eb->failed_mirror = failed_mirror;
eb->read_mirror = failed_mirror;
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(root, eb, eb->start, -EIO);
return -EIO; /* we fixed nothing */
......@@ -2254,9 +2254,9 @@ int open_ctree(struct super_block *sb,
goto fail_sb_buffer;
}
if (sectorsize < PAGE_SIZE) {
printk(KERN_WARNING "btrfs: Incompatible sector size "
"found on %s\n", sb->s_id);
if (sectorsize != PAGE_SIZE) {
printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) "
"found on %s\n", (unsigned long)sectorsize, sb->s_id);
goto fail_sb_buffer;
}
......
......@@ -2301,6 +2301,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
if (ret) {
printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
spin_lock(&delayed_refs->lock);
return ret;
}
......@@ -2331,6 +2332,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
if (ret) {
printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
spin_lock(&delayed_refs->lock);
return ret;
}
......@@ -3769,13 +3771,10 @@ again:
*/
if (current->journal_info)
return -EAGAIN;
ret = wait_event_interruptible(space_info->wait,
!space_info->flush);
/* Must have been interrupted, return */
if (ret) {
printk(KERN_DEBUG "btrfs: %s returning -EINTR\n", __func__);
ret = wait_event_killable(space_info->wait, !space_info->flush);
/* Must have been killed, return */
if (ret)
return -EINTR;
}
spin_lock(&space_info->lock);
}
......@@ -4215,8 +4214,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
num_bytes = calc_global_metadata_size(fs_info);
spin_lock(&block_rsv->lock);
spin_lock(&sinfo->lock);
spin_lock(&block_rsv->lock);
block_rsv->size = num_bytes;
......@@ -4242,8 +4241,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->full = 1;
}
spin_unlock(&sinfo->lock);
spin_unlock(&block_rsv->lock);
spin_unlock(&sinfo->lock);
}
static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
......
......@@ -402,20 +402,28 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
return 0;
}
static struct extent_state *next_state(struct extent_state *state)
{
struct rb_node *next = rb_next(&state->rb_node);
if (next)
return rb_entry(next, struct extent_state, rb_node);
else
return NULL;
}
/*
* utility function to clear some bits in an extent state struct.
* it will optionally wake up any one waiting on this state (wake == 1), or
* forcibly remove the state from the tree (delete == 1).
* it will optionally wake up any one waiting on this state (wake == 1)
*
* If no bits are set on the state struct after clearing things, the
* struct is freed and removed from the tree
*/
static int clear_state_bit(struct extent_io_tree *tree,
struct extent_state *state,
int *bits, int wake)
static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
struct extent_state *state,
int *bits, int wake)
{
struct extent_state *next;
int bits_to_clear = *bits & ~EXTENT_CTLBITS;
int ret = state->state & bits_to_clear;
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
......@@ -427,6 +435,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
if (wake)
wake_up(&state->wq);
if (state->state == 0) {
next = next_state(state);
if (state->tree) {
rb_erase(&state->rb_node, &tree->state);
state->tree = NULL;
......@@ -436,8 +445,9 @@ static int clear_state_bit(struct extent_io_tree *tree,
}
} else {
merge_state(tree, state);
next = next_state(state);
}
return ret;
return next;
}
static struct extent_state *
......@@ -476,7 +486,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state;
struct extent_state *cached;
struct extent_state *prealloc = NULL;
struct rb_node *next_node;
struct rb_node *node;
u64 last_end;
int err;
......@@ -528,14 +537,11 @@ hit_next:
WARN_ON(state->end < start);
last_end = state->end;
if (state->end < end && !need_resched())
next_node = rb_next(&state->rb_node);
else
next_node = NULL;
/* the state doesn't have the wanted bits, go ahead */
if (!(state->state & bits))
if (!(state->state & bits)) {
state = next_state(state);
goto next;
}
/*
* | ---- desired range ---- |
......@@ -593,16 +599,13 @@ hit_next:
goto out;
}
clear_state_bit(tree, state, &bits, wake);
state = clear_state_bit(tree, state, &bits, wake);
next:
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
if (start <= end && next_node) {
state = rb_entry(next_node, struct extent_state,
rb_node);
if (start <= end && state && !need_resched())
goto hit_next;
}
goto search_again;
out:
......@@ -2301,7 +2304,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
u64 start;
u64 end;
int whole_page;
int failed_mirror;
int mirror;
int ret;
if (err)
......@@ -2340,20 +2343,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
}
spin_unlock(&tree->lock);
mirror = (int)(unsigned long)bio->bi_bdev;
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state);
state, mirror);
if (ret)
uptodate = 0;
else
clean_io_failure(start, page);
}
if (!uptodate)
failed_mirror = (int)(unsigned long)bio->bi_bdev;
if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
ret = tree->ops->readpage_io_failed_hook(page, failed_mirror);
ret = tree->ops->readpage_io_failed_hook(page, mirror);
if (!ret && !err &&
test_bit(BIO_UPTODATE, &bio->bi_flags))
uptodate = 1;
......@@ -2368,8 +2369,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
* can't handle the error it will return -EIO and we
* remain responsible for that page.
*/
ret = bio_readpage_error(bio, page, start, end,
failed_mirror, NULL);
ret = bio_readpage_error(bio, page, start, end, mirror, NULL);
if (ret == 0) {
uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
......@@ -4462,7 +4462,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
}
clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
eb->failed_mirror = 0;
eb->read_mirror = 0;
atomic_set(&eb->io_pages, num_reads);
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
......
......@@ -79,7 +79,7 @@ struct extent_io_ops {
u64 start, u64 end,
struct extent_state *state);
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state);
struct extent_state *state, int mirror);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
......@@ -135,7 +135,7 @@ struct extent_buffer {
spinlock_t refs_lock;
atomic_t refs;
atomic_t io_pages;
int failed_mirror;
int read_mirror;
struct list_head leak_list;
struct rcu_head rcu_head;
pid_t lock_owner;
......
......@@ -567,6 +567,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
int extent_type;
int recow;
int ret;
int modify_tree = -1;
if (drop_cache)
btrfs_drop_extent_cache(inode, start, end - 1, 0);
......@@ -575,10 +576,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
if (!path)
return -ENOMEM;
if (start >= BTRFS_I(inode)->disk_i_size)
modify_tree = 0;
while (1) {
recow = 0;
ret = btrfs_lookup_file_extent(trans, root, path, ino,
search_start, -1);
search_start, modify_tree);
if (ret < 0)
break;
if (ret > 0 && path->slots[0] > 0 && search_start == start) {
......@@ -634,7 +638,8 @@ next_slot:
}
search_start = max(key.offset, start);
if (recow) {
if (recow || !modify_tree) {
modify_tree = -1;
btrfs_release_path(path);
continue;
}
......
......@@ -1947,7 +1947,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
* extent_io.c will try to find good copies for us.
*/
static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
struct extent_state *state, int mirror)
{
size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
struct inode *inode = page->mapping->host;
......@@ -4069,7 +4069,7 @@ static struct inode *new_simple_dir(struct super_block *s,
BTRFS_I(inode)->dummy_inode = 1;
inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
inode->i_op = &simple_dir_inode_operations;
inode->i_op = &btrfs_dir_ro_inode_operations;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
......@@ -4140,14 +4140,18 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
static int btrfs_dentry_delete(const struct dentry *dentry)
{
struct btrfs_root *root;
struct inode *inode = dentry->d_inode;
if (!dentry->d_inode && !IS_ROOT(dentry))
dentry = dentry->d_parent;
if (!inode && !IS_ROOT(dentry))
inode = dentry->d_parent->d_inode;
if (dentry->d_inode) {
root = BTRFS_I(dentry->d_inode)->root;
if (inode) {
root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0)
return 1;
if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return 1;
}
return 0;
}
......@@ -4188,7 +4192,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
struct btrfs_path *path;
struct list_head ins_list;
struct list_head del_list;
struct qstr q;
int ret;
struct extent_buffer *leaf;
int slot;
......@@ -4279,7 +4282,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
while (di_cur < di_total) {
struct btrfs_key location;
struct dentry *tmp;
if (verify_dir_item(root, leaf, di))
break;
......@@ -4300,35 +4302,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
btrfs_dir_item_key_to_cpu(leaf, di, &location);
q.name = name_ptr;
q.len = name_len;
q.hash = full_name_hash(q.name, q.len);
tmp = d_lookup(filp->f_dentry, &q);
if (!tmp) {
struct btrfs_key *newkey;
newkey = kzalloc(sizeof(struct btrfs_key),
GFP_NOFS);
if (!newkey)
goto no_dentry;
tmp = d_alloc(filp->f_dentry, &q);
if (!tmp) {
kfree(newkey);
dput(tmp);
goto no_dentry;
}
memcpy(newkey, &location,
sizeof(struct btrfs_key));
tmp->d_fsdata = newkey;
tmp->d_flags |= DCACHE_NEED_LOOKUP;
d_rehash(tmp);
dput(tmp);
} else {
dput(tmp);
}
no_dentry:
/* is this a reference to our own snapshot? If so
* skip it
* skip it.
*
* In contrast to old kernels, we insert the snapshot's
* dir item and dir index after it has been created, so
* we won't find a reference to our own snapshot. We
* still keep the following code for backward
* compatibility.
*/
if (location.type == BTRFS_ROOT_ITEM_KEY &&
location.objectid == root->root_key.objectid) {
......
......@@ -2262,7 +2262,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
di_args->bytes_used = dev->bytes_used;
di_args->total_bytes = dev->total_bytes;
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
strncpy(di_args->path, dev->name, sizeof(di_args->path));
if (dev->name)
strncpy(di_args->path, dev->name, sizeof(di_args->path));
else
di_args->path[0] = '\0';
out:
if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
......
......@@ -250,14 +250,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
struct btrfs_bio *bbio)
{
int ret;
int looped = 0;
struct reada_zone *zone;
struct btrfs_block_group_cache *cache = NULL;
u64 start;
u64 end;
int i;
again:
zone = NULL;
spin_lock(&fs_info->reada_lock);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
......@@ -274,9 +272,6 @@ again:
spin_unlock(&fs_info->reada_lock);
}
if (looped)
return NULL;
cache = btrfs_lookup_block_group(fs_info, logical);
if (!cache)
return NULL;
......@@ -307,13 +302,15 @@ again:
ret = radix_tree_insert(&dev->reada_zones,
(unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
zone);
spin_unlock(&fs_info->reada_lock);
if (ret) {
if (ret == -EEXIST) {
kfree(zone);
looped = 1;
goto again;
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
logical >> PAGE_CACHE_SHIFT, 1);
if (ret == 1)
kref_get(&zone->refcnt);
}
spin_unlock(&fs_info->reada_lock);
return zone;
}
......@@ -323,26 +320,26 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
struct btrfs_key *top, int level)
{
int ret;
int looped = 0;
struct reada_extent *re = NULL;
struct reada_extent *re_exist = NULL;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
struct btrfs_bio *bbio = NULL;
struct btrfs_device *dev;
struct btrfs_device *prev_dev;
u32 blocksize;
u64 length;
int nzones = 0;
int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT;
again:
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
if (re)
kref_get(&re->refcnt);
spin_unlock(&fs_info->reada_lock);
if (re || looped)
if (re)
return re;
re = kzalloc(sizeof(*re), GFP_NOFS);
......@@ -398,16 +395,31 @@ again:
/* insert extent in reada_tree + all per-device trees, all or nothing */
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) {
re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
BUG_ON(!re_exist);
kref_get(&re_exist->refcnt);
spin_unlock(&fs_info->reada_lock);
goto error;
}
if (ret) {
spin_unlock(&fs_info->reada_lock);
if (ret != -ENOMEM) {
/* someone inserted the extent in the meantime */
looped = 1;