Commit ae1a25da authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (37 commits)
  Btrfs: Make sure dir is non-null before doing S_ISGID checks
  Btrfs: Fix memory leak in cache_drop_leaf_ref
  Btrfs: don't return congestion in write_cache_pages as often
  Btrfs: Only prep for btree deletion balances when nodes are mostly empty
  Btrfs: fix btrfs_unlock_up_safe to walk the entire path
  Btrfs: change btrfs_del_leaf to drop locks earlier
  Btrfs: Change btrfs_truncate_inode_items to stop when it hits the inode
  Btrfs: Don't try to compress pages past i_size
  Btrfs: join the transaction in __btrfs_setxattr
  Btrfs: Handle SGID bit when creating inodes
  Btrfs: Make btrfs_drop_snapshot work in larger and more efficient chunks
  Btrfs: Change btree locking to use explicit blocking points
  Btrfs: hash_lock is no longer needed
  Btrfs: disable leak debugging checks in extent_io.c
  Btrfs: sort references by byte number during btrfs_inc_ref
  Btrfs: async threads should try harder to find work
  Btrfs: selinux support
  Btrfs: make btrfs acls selectable
  Btrfs: Catch missed bios in the async bio submission thread
  Btrfs: fix readdir on 32 bit machines
  ...
parents fd9fc842 42f15d77
......@@ -1021,6 +1021,14 @@ M: mb@bu3sch.de
W: http://bu3sch.de/btgpio.php
S: Maintained
BTRFS FILE SYSTEM
P: Chris Mason
M: chris.mason@oracle.com
L: linux-btrfs@vger.kernel.org
W: http://btrfs.wiki.kernel.org/
T: git kernel.org:/pub/scm/linux/kernel/git/mason/btrfs-unstable.git
S: Maintained
BTTV VIDEO4LINUX DRIVER
P: Mauro Carvalho Chehab
M: mchehab@infradead.org
......
......@@ -16,3 +16,16 @@ config BTRFS_FS
module will be called btrfs.
If unsure, say N.
config BTRFS_FS_POSIX_ACL
bool "Btrfs POSIX Access Control Lists"
depends on BTRFS_FS
select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
To learn more about Access Control Lists, visit the POSIX ACLs for
Linux website <http://acl.bestbits.at/>.
If you don't know what Access Control Lists are, say N
......@@ -16,11 +16,11 @@
* Boston, MA 021110-1307, USA.
*/
#include <linux/version.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/spinlock.h>
# include <linux/freezer.h>
#include <linux/freezer.h>
#include <linux/ftrace.h>
#include "async-thread.h"
#define WORK_QUEUED_BIT 0
......@@ -143,6 +143,7 @@ static int worker_loop(void *arg)
struct btrfs_work *work;
do {
spin_lock_irq(&worker->lock);
again_locked:
while (!list_empty(&worker->pending)) {
cur = worker->pending.next;
work = list_entry(cur, struct btrfs_work, list);
......@@ -165,14 +166,50 @@ static int worker_loop(void *arg)
check_idle_worker(worker);
}
worker->working = 0;
if (freezing(current)) {
worker->working = 0;
spin_unlock_irq(&worker->lock);
refrigerator();
} else {
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_irq(&worker->lock);
if (!kthread_should_stop())
if (!kthread_should_stop()) {
cpu_relax();
/*
* we've dropped the lock, did someone else
* jump_in?
*/
smp_mb();
if (!list_empty(&worker->pending))
continue;
/*
* this short schedule allows more work to
* come in without the queue functions
* needing to go through wake_up_process()
*
* worker->working is still 1, so nobody
* is going to try and wake us up
*/
schedule_timeout(1);
smp_mb();
if (!list_empty(&worker->pending))
continue;
/* still no more work?, sleep for real */
spin_lock_irq(&worker->lock);
set_current_state(TASK_INTERRUPTIBLE);
if (!list_empty(&worker->pending))
goto again_locked;
/*
* this makes sure we get a wakeup when someone
* adds something new to the queue
*/
worker->working = 0;
spin_unlock_irq(&worker->lock);
schedule();
}
__set_current_state(TASK_RUNNING);
}
} while (!kthread_should_stop());
......@@ -350,13 +387,14 @@ int btrfs_requeue_work(struct btrfs_work *work)
{
struct btrfs_worker_thread *worker = work->worker;
unsigned long flags;
int wake = 0;
if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
goto out;
spin_lock_irqsave(&worker->lock, flags);
atomic_inc(&worker->num_pending);
list_add_tail(&work->list, &worker->pending);
atomic_inc(&worker->num_pending);
/* by definition we're busy, take ourselves off the idle
* list
......@@ -368,10 +406,16 @@ int btrfs_requeue_work(struct btrfs_work *work)
&worker->workers->worker_list);
spin_unlock_irqrestore(&worker->workers->lock, flags);
}
if (!worker->working) {
wake = 1;
worker->working = 1;
}
spin_unlock_irqrestore(&worker->lock, flags);
if (wake)
wake_up_process(worker->task);
out:
return 0;
}
......@@ -398,9 +442,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
}
spin_lock_irqsave(&worker->lock, flags);
list_add_tail(&work->list, &worker->pending);
atomic_inc(&worker->num_pending);
check_busy_worker(worker);
list_add_tail(&work->list, &worker->pending);
/*
* avoid calling into wake_up_process if this thread has already
......
......@@ -32,7 +32,6 @@
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/bit_spinlock.h>
#include <linux/version.h>
#include <linux/pagevec.h>
#include "compat.h"
#include "ctree.h"
......
......@@ -54,6 +54,31 @@ struct btrfs_path *btrfs_alloc_path(void)
return path;
}
/*
* set all locked nodes in the path to blocking locks. This should
* be done before scheduling
*/
noinline void btrfs_set_path_blocking(struct btrfs_path *p)
{
int i;
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
if (p->nodes[i] && p->locks[i])
btrfs_set_lock_blocking(p->nodes[i]);
}
}
/*
* reset all the locked nodes in the patch to spinning locks.
*/
noinline void btrfs_clear_path_blocking(struct btrfs_path *p)
{
int i;
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
if (p->nodes[i] && p->locks[i])
btrfs_clear_lock_blocking(p->nodes[i]);
}
}
/* this also releases the path */
void btrfs_free_path(struct btrfs_path *p)
{
......@@ -272,6 +297,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (IS_ERR(cow))
return PTR_ERR(cow);
/* cow is set to blocking by btrfs_init_new_buffer */
copy_extent_buffer(cow, buf, 0, 0, cow->len);
btrfs_set_header_bytenr(cow, cow->start);
btrfs_set_header_generation(cow, trans->transid);
......@@ -388,17 +415,20 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
WARN_ON(1);
}
spin_lock(&root->fs_info->hash_lock);
if (btrfs_header_generation(buf) == trans->transid &&
btrfs_header_owner(buf) == root->root_key.objectid &&
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
*cow_ret = buf;
spin_unlock(&root->fs_info->hash_lock);
WARN_ON(prealloc_dest);
return 0;
}
spin_unlock(&root->fs_info->hash_lock);
search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
if (parent)
btrfs_set_lock_blocking(parent);
btrfs_set_lock_blocking(buf);
ret = __btrfs_cow_block(trans, root, buf, parent,
parent_slot, cow_ret, search_start, 0,
prealloc_dest);
......@@ -504,6 +534,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
if (parent_nritems == 1)
return 0;
btrfs_set_lock_blocking(parent);
for (i = start_slot; i < end_slot; i++) {
int close = 1;
......@@ -564,6 +596,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
search_start = last_block;
btrfs_tree_lock(cur);
btrfs_set_lock_blocking(cur);
err = __btrfs_cow_block(trans, root, cur, parent, i,
&cur, search_start,
min(16 * blocksize,
......@@ -862,6 +895,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
return 0;
mid = path->nodes[level];
WARN_ON(!path->locks[level]);
WARN_ON(btrfs_header_generation(mid) != trans->transid);
......@@ -884,6 +918,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* promote the child to a root */
child = read_node_slot(root, mid, 0);
btrfs_tree_lock(child);
btrfs_set_lock_blocking(child);
BUG_ON(!child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
BUG_ON(ret);
......@@ -900,6 +935,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
add_root_to_dirty_list(root);
btrfs_tree_unlock(child);
path->locks[level] = 0;
path->nodes[level] = NULL;
clean_tree_block(trans, root, mid);
......@@ -924,6 +960,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
left = read_node_slot(root, parent, pslot - 1);
if (left) {
btrfs_tree_lock(left);
btrfs_set_lock_blocking(left);
wret = btrfs_cow_block(trans, root, left,
parent, pslot - 1, &left, 0);
if (wret) {
......@@ -934,6 +971,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
right = read_node_slot(root, parent, pslot + 1);
if (right) {
btrfs_tree_lock(right);
btrfs_set_lock_blocking(right);
wret = btrfs_cow_block(trans, root, right,
parent, pslot + 1, &right, 0);
if (wret) {
......@@ -1109,6 +1147,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
u32 left_nr;
btrfs_tree_lock(left);
btrfs_set_lock_blocking(left);
left_nr = btrfs_header_nritems(left);
if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
wret = 1;
......@@ -1155,7 +1195,10 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
*/
if (right) {
u32 right_nr;
btrfs_tree_lock(right);
btrfs_set_lock_blocking(right);
right_nr = btrfs_header_nritems(right);
if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
wret = 1;
......@@ -1210,8 +1253,7 @@ static noinline void reada_for_search(struct btrfs_root *root,
struct btrfs_disk_key disk_key;
u32 nritems;
u64 search;
u64 lowest_read;
u64 highest_read;
u64 target;
u64 nread = 0;
int direction = path->reada;
struct extent_buffer *eb;
......@@ -1235,8 +1277,7 @@ static noinline void reada_for_search(struct btrfs_root *root,
return;
}
highest_read = search;
lowest_read = search;
target = search;
nritems = btrfs_header_nritems(node);
nr = slot;
......@@ -1256,27 +1297,80 @@ static noinline void reada_for_search(struct btrfs_root *root,
break;
}
search = btrfs_node_blockptr(node, nr);
if ((search >= lowest_read && search <= highest_read) ||
(search < lowest_read && lowest_read - search <= 16384) ||
(search > highest_read && search - highest_read <= 16384)) {
if ((search <= target && target - search <= 65536) ||
(search > target && search - target <= 65536)) {
readahead_tree_block(root, search, blocksize,
btrfs_node_ptr_generation(node, nr));
nread += blocksize;
}
nscan++;
if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32))
if ((nread > 65536 || nscan > 32))
break;
}
}
if (nread > (256 * 1024) || nscan > 128)
break;
/*
* returns -EAGAIN if it had to drop the path, or zero if everything was in
* cache
*/
static noinline int reada_for_balance(struct btrfs_root *root,
struct btrfs_path *path, int level)
{
int slot;
int nritems;
struct extent_buffer *parent;
struct extent_buffer *eb;
u64 gen;
u64 block1 = 0;
u64 block2 = 0;
int ret = 0;
int blocksize;
if (search < lowest_read)
lowest_read = search;
if (search > highest_read)
highest_read = search;
parent = path->nodes[level - 1];
if (!parent)
return 0;
nritems = btrfs_header_nritems(parent);
slot = path->slots[level];
blocksize = btrfs_level_size(root, level);
if (slot > 0) {
block1 = btrfs_node_blockptr(parent, slot - 1);
gen = btrfs_node_ptr_generation(parent, slot - 1);
eb = btrfs_find_tree_block(root, block1, blocksize);
if (eb && btrfs_buffer_uptodate(eb, gen))
block1 = 0;
free_extent_buffer(eb);
}
if (slot < nritems) {
block2 = btrfs_node_blockptr(parent, slot + 1);
gen = btrfs_node_ptr_generation(parent, slot + 1);
eb = btrfs_find_tree_block(root, block2, blocksize);
if (eb && btrfs_buffer_uptodate(eb, gen))
block2 = 0;
free_extent_buffer(eb);
}
if (block1 || block2) {
ret = -EAGAIN;
btrfs_release_path(root, path);
if (block1)
readahead_tree_block(root, block1, blocksize, 0);
if (block2)
readahead_tree_block(root, block2, blocksize, 0);
if (block1) {
eb = read_tree_block(root, block1, blocksize, 0);
free_extent_buffer(eb);
}
if (block1) {
eb = read_tree_block(root, block2, blocksize, 0);
free_extent_buffer(eb);
}
}
return ret;
}
/*
* when we walk down the tree, it is usually safe to unlock the higher layers
* in the tree. The exceptions are when our path goes through slot 0, because
......@@ -1327,6 +1421,32 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
}
}
/*
* This releases any locks held in the path starting at level and
* going all the way up to the root.
*
* btrfs_search_slot will keep the lock held on higher nodes in a few
* corner cases, such as COW of the block at slot zero in the node. This
* ignores those rules, and it should only be called when there are no
* more updates to be done higher up in the tree.
*/
noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
{
int i;
if (path->keep_locks || path->lowest_level)
return;
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
if (!path->nodes[i])
continue;
if (!path->locks[i])
continue;
btrfs_tree_unlock(path->nodes[i]);
path->locks[i] = 0;
}
}
/*
* look for key in the tree. path is filled in with nodes along the way
* if key is found, we return zero and you can find the item in the leaf
......@@ -1387,31 +1507,30 @@ again:
int wret;
/* is a cow on this block not required */
spin_lock(&root->fs_info->hash_lock);
if (btrfs_header_generation(b) == trans->transid &&
btrfs_header_owner(b) == root->root_key.objectid &&
!btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
spin_unlock(&root->fs_info->hash_lock);
goto cow_done;
}
spin_unlock(&root->fs_info->hash_lock);
/* ok, we have to cow, is our old prealloc the right
* size?
*/
if (prealloc_block.objectid &&
prealloc_block.offset != b->len) {
btrfs_release_path(root, p);
btrfs_free_reserved_extent(root,
prealloc_block.objectid,
prealloc_block.offset);
prealloc_block.objectid = 0;
goto again;
}
/*
* for higher level blocks, try not to allocate blocks
* with the block and the parent locks held.
*/
if (level > 1 && !prealloc_block.objectid &&
if (level > 0 && !prealloc_block.objectid &&
btrfs_path_lock_waiting(p, level)) {
u32 size = b->len;
u64 hint = b->start;
......@@ -1425,6 +1544,8 @@ again:
goto again;
}
btrfs_set_path_blocking(p);
wret = btrfs_cow_block(trans, root, b,
p->nodes[level + 1],
p->slots[level + 1],
......@@ -1446,6 +1567,22 @@ cow_done:
if (!p->skip_locking)
p->locks[level] = 1;
btrfs_clear_path_blocking(p);
/*
* we have a lock on b and as long as we aren't changing
* the tree, there is no way to for the items in b to change.
* It is safe to drop the lock on our parent before we
* go through the expensive btree search on b.
*
* If cow is true, then we might be changing slot zero,
* which may require changing the parent. So, we can't
* drop the lock until after we know which slot we're
* operating on.
*/
if (!cow)
btrfs_unlock_up_safe(p, level + 1);
ret = check_block(root, p, level);
if (ret) {
ret = -1;
......@@ -1453,6 +1590,7 @@ cow_done:
}
ret = bin_search(b, key, level, &slot);
if (level != 0) {
if (ret && slot > 0)
slot -= 1;
......@@ -1460,7 +1598,16 @@ cow_done:
if ((p->search_for_split || ins_len > 0) &&
btrfs_header_nritems(b) >=
BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
int sret = split_node(trans, root, p, level);
int sret;
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
sret = split_node(trans, root, p, level);
btrfs_clear_path_blocking(p);
BUG_ON(sret > 0);
if (sret) {
ret = sret;
......@@ -1468,9 +1615,19 @@ cow_done:
}
b = p->nodes[level];
slot = p->slots[level];
} else if (ins_len < 0) {
int sret = balance_level(trans, root, p,
level);
} else if (ins_len < 0 &&
btrfs_header_nritems(b) <
BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
int sret;
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
sret = balance_level(trans, root, p, level);
btrfs_clear_path_blocking(p);
if (sret) {
ret = sret;
goto done;
......@@ -1504,7 +1661,7 @@ cow_done:
* of the btree by dropping locks before
* we read.
*/
if (level > 1) {
if (level > 0) {
btrfs_release_path(NULL, p);
if (tmp)
free_extent_buffer(tmp);
......@@ -1519,6 +1676,7 @@ cow_done:
free_extent_buffer(tmp);
goto again;
} else {
btrfs_set_path_blocking(p);
if (tmp)
free_extent_buffer(tmp);
if (should_reada)
......@@ -1528,14 +1686,29 @@ cow_done:
b = read_node_slot(root, b, slot);
}
}
if (!p->skip_locking)
btrfs_tree_lock(b);
if (!p->skip_locking) {
int lret;
btrfs_clear_path_blocking(p);
lret = btrfs_try_spin_lock(b);
if (!lret) {
btrfs_set_path_blocking(p);
btrfs_tree_lock(b);
btrfs_clear_path_blocking(p);
}
}
} else {
p->slots[level] = slot;
if (ins_len > 0 &&
btrfs_leaf_free_space(root, b) < ins_len) {
int sret = split_leaf(trans, root, key,
int sret;
btrfs_set_path_blocking(p);
sret = split_leaf(trans, root, key,
p, ins_len, ret == 0);
btrfs_clear_path_blocking(p);
BUG_ON(sret > 0);
if (sret) {
ret = sret;
......@@ -1549,12 +1722,16 @@ cow_done:
}
ret = 1;
done:
/*
* we don't really know what they plan on doing with the path
* from here on, so for now just mark it as blocking
*/
btrfs_set_path_blocking(p);
if (prealloc_block.objectid) {
btrfs_free_reserved_extent(root,
prealloc_block.objectid,
prealloc_block.offset);
}
return ret;
}
......@@ -1578,6 +1755,8 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
BUG_ON(ret);
btrfs_set_lock_blocking(eb);
parent = eb;
while (1) {
level = btrfs_header_level(parent);
......@@ -1602,6 +1781,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
eb = read_tree_block(root, bytenr, blocksize,
generation);