Commit b695188d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs update from Chris Mason:
 "The biggest feature in the pull is the new (and still experimental)
  raid56 code that David Woodhouse started long ago.  I'm still working
  on the parity logging setup that will avoid inconsistent parity after
  a crash, so this is only for testing right now.  But, I'd really like
  to get it out to a broader audience to hammer out any performance
  issues or other problems.

  scrub does not yet correct errors on raid5/6 either.

  Josef has another pass at fsync performance.  The big change here is
  to combine waiting for metadata with waiting for data, which is a big
  latency win.  It is also step one toward using atomics from the
  hardware during a commit.

  Mark Fasheh has a new way to use btrfs send/receive to send only the
  metadata changes.  SUSE is using this to make snapper more efficient
  at finding changes between snapshosts.

  Snapshot-aware defrag is also included.

  Otherwise we have a large number of fixes and cleanups.  Eric Sandeen
  wins the award for removing the most lines, and I'm hoping we steal
  this idea from XFS over and over again."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits)
  btrfs: fixup/remove module.h usage as required
  Btrfs: delete inline extents when we find them during logging
  btrfs: try harder to allocate raid56 stripe cache
  Btrfs: cleanup to make the function btrfs_delalloc_reserve_metadata more logic
  Btrfs: don't call btrfs_qgroup_free if just btrfs_qgroup_reserve fails
  Btrfs: remove reduplicate check about root in the function btrfs_clean_quota_tree
  Btrfs: return ENOMEM rather than use BUG_ON when btrfs_alloc_path fails
  Btrfs: fix missing deleted items in btrfs_clean_quota_tree
  btrfs: use only inline_pages from extent buffer
  Btrfs: fix wrong reserved space when deleting a snapshot/subvolume
  Btrfs: fix wrong reserved space in qgroup during snap/subv creation
  Btrfs: remove unnecessary dget_parent/dput when creating the pending snapshot
  btrfs: remove a printk from scan_one_device
  Btrfs: fix NULL pointer after aborting a transaction
  Btrfs: fix memory leak of log roots
  Btrfs: copy everything if we've created an inline extent
  btrfs: cleanup for open-coded alignment
  Btrfs: do not change inode flags in rename
  Btrfs: use reserved space for creating a snapshot
  clear chunk_alloc flag on retryable failure
  ...
parents 48476df9 180e001c
...@@ -5,6 +5,9 @@ config BTRFS_FS ...@@ -5,6 +5,9 @@ config BTRFS_FS
select ZLIB_DEFLATE select ZLIB_DEFLATE
select LZO_COMPRESS select LZO_COMPRESS
select LZO_DECOMPRESS select LZO_DECOMPRESS
select RAID6_PQ
select XOR_BLOCKS
help help
Btrfs is a new filesystem with extents, writable snapshotting, Btrfs is a new filesystem with extents, writable snapshotting,
support for multiple devices and many more features. support for multiple devices and many more features.
......
...@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ ...@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
...@@ -352,11 +352,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, ...@@ -352,11 +352,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
err = __resolve_indirect_ref(fs_info, search_commit_root, err = __resolve_indirect_ref(fs_info, search_commit_root,
time_seq, ref, parents, time_seq, ref, parents,
extent_item_pos); extent_item_pos);
if (err) { if (err)
if (ret == 0)
ret = err;
continue; continue;
}
/* we put the first parent into the ref at hand */ /* we put the first parent into the ref at hand */
ULIST_ITER_INIT(&uiter); ULIST_ITER_INIT(&uiter);
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#ifndef __BTRFS_BACKREF__ #ifndef __BTRFS_BACKREF__
#define __BTRFS_BACKREF__ #define __BTRFS_BACKREF__
#include "ioctl.h" #include <linux/btrfs.h>
#include "ulist.h" #include "ulist.h"
#include "extent_io.h" #include "extent_io.h"
......
...@@ -40,6 +40,8 @@ ...@@ -40,6 +40,8 @@
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6 #define BTRFS_INODE_HAS_ASYNC_EXTENT 6
#define BTRFS_INODE_NEEDS_FULL_SYNC 7 #define BTRFS_INODE_NEEDS_FULL_SYNC 7
#define BTRFS_INODE_COPY_EVERYTHING 8 #define BTRFS_INODE_COPY_EVERYTHING 8
#define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
/* in memory btrfs inode */ /* in memory btrfs inode */
struct btrfs_inode { struct btrfs_inode {
...@@ -216,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) ...@@ -216,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
return 0; return 0;
} }
/*
* Disable DIO read nolock optimization, so new dio readers will be forced
* to grab i_mutex. It is used to avoid the endless truncate due to
* nonlocked dio read.
*/
static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
{
set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags);
smp_mb();
}
static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
{
smp_mb__before_clear_bit();
clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags);
}
#endif #endif
...@@ -813,8 +813,7 @@ static int btrfsic_process_superblock_dev_mirror( ...@@ -813,8 +813,7 @@ static int btrfsic_process_superblock_dev_mirror(
(bh->b_data + (dev_bytenr & 4095)); (bh->b_data + (dev_bytenr & 4095));
if (btrfs_super_bytenr(super_tmp) != dev_bytenr || if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) ||
sizeof(super_tmp->magic)) ||
memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
btrfs_super_nodesize(super_tmp) != state->metablock_size || btrfs_super_nodesize(super_tmp) != state->metablock_size ||
btrfs_super_leafsize(super_tmp) != state->metablock_size || btrfs_super_leafsize(super_tmp) != state->metablock_size ||
......
...@@ -372,7 +372,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, ...@@ -372,7 +372,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
page = compressed_pages[pg_index]; page = compressed_pages[pg_index];
page->mapping = inode->i_mapping; page->mapping = inode->i_mapping;
if (bio->bi_size) if (bio->bi_size)
ret = io_tree->ops->merge_bio_hook(page, 0, ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE,
bio, 0); bio, 0);
else else
...@@ -655,7 +655,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ...@@ -655,7 +655,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->index = em_start >> PAGE_CACHE_SHIFT; page->index = em_start >> PAGE_CACHE_SHIFT;
if (comp_bio->bi_size) if (comp_bio->bi_size)
ret = tree->ops->merge_bio_hook(page, 0, ret = tree->ops->merge_bio_hook(READ, page, 0,
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE,
comp_bio, 0); comp_bio, 0);
else else
......
...@@ -1138,6 +1138,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, ...@@ -1138,6 +1138,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
switch (tm->op) { switch (tm->op) {
case MOD_LOG_KEY_REMOVE_WHILE_FREEING: case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
BUG_ON(tm->slot < n); BUG_ON(tm->slot < n);
/* Fallthrough */
case MOD_LOG_KEY_REMOVE_WHILE_MOVING: case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
case MOD_LOG_KEY_REMOVE: case MOD_LOG_KEY_REMOVE:
btrfs_set_node_key(eb, &tm->key, tm->slot); btrfs_set_node_key(eb, &tm->key, tm->slot);
...@@ -1222,7 +1223,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, ...@@ -1222,7 +1223,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
__tree_mod_log_rewind(eb_rewin, time_seq, tm); __tree_mod_log_rewind(eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) > WARN_ON(btrfs_header_nritems(eb_rewin) >
BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root)); BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
return eb_rewin; return eb_rewin;
} }
...@@ -1441,7 +1442,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) ...@@ -1441,7 +1442,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
*/ */
int btrfs_realloc_node(struct btrfs_trans_handle *trans, int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent, struct btrfs_root *root, struct extent_buffer *parent,
int start_slot, int cache_only, u64 *last_ret, int start_slot, u64 *last_ret,
struct btrfs_key *progress) struct btrfs_key *progress)
{ {
struct extent_buffer *cur; struct extent_buffer *cur;
...@@ -1461,8 +1462,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, ...@@ -1461,8 +1462,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key; struct btrfs_disk_key disk_key;
parent_level = btrfs_header_level(parent); parent_level = btrfs_header_level(parent);
if (cache_only && parent_level != 1)
return 0;
WARN_ON(trans->transaction != root->fs_info->running_transaction); WARN_ON(trans->transaction != root->fs_info->running_transaction);
WARN_ON(trans->transid != root->fs_info->generation); WARN_ON(trans->transid != root->fs_info->generation);
...@@ -1508,10 +1507,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, ...@@ -1508,10 +1507,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
else else
uptodate = 0; uptodate = 0;
if (!cur || !uptodate) { if (!cur || !uptodate) {
if (cache_only) {
free_extent_buffer(cur);
continue;
}
if (!cur) { if (!cur) {
cur = read_tree_block(root, blocknr, cur = read_tree_block(root, blocknr,
blocksize, gen); blocksize, gen);
...@@ -4825,8 +4820,8 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) ...@@ -4825,8 +4820,8 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
/* /*
* A helper function to walk down the tree starting at min_key, and looking * A helper function to walk down the tree starting at min_key, and looking
* for nodes or leaves that are either in cache or have a minimum * for nodes or leaves that are have a minimum transaction id.
* transaction id. This is used by the btree defrag code, and tree logging * This is used by the btree defrag code, and tree logging
* *
* This does not cow, but it does stuff the starting key it finds back * This does not cow, but it does stuff the starting key it finds back
* into min_key, so you can call btrfs_search_slot with cow=1 on the * into min_key, so you can call btrfs_search_slot with cow=1 on the
...@@ -4847,7 +4842,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) ...@@ -4847,7 +4842,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
*/ */
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
struct btrfs_key *max_key, struct btrfs_key *max_key,
struct btrfs_path *path, int cache_only, struct btrfs_path *path,
u64 min_trans) u64 min_trans)
{ {
struct extent_buffer *cur; struct extent_buffer *cur;
...@@ -4887,15 +4882,12 @@ again: ...@@ -4887,15 +4882,12 @@ again:
if (sret && slot > 0) if (sret && slot > 0)
slot--; slot--;
/* /*
* check this node pointer against the cache_only and * check this node pointer against the min_trans parameters.
* min_trans parameters. If it isn't in cache or is too * If it is too old, old, skip to the next one.
* old, skip to the next one.
*/ */
while (slot < nritems) { while (slot < nritems) {
u64 blockptr; u64 blockptr;
u64 gen; u64 gen;
struct extent_buffer *tmp;
struct btrfs_disk_key disk_key;
blockptr = btrfs_node_blockptr(cur, slot); blockptr = btrfs_node_blockptr(cur, slot);
gen = btrfs_node_ptr_generation(cur, slot); gen = btrfs_node_ptr_generation(cur, slot);
...@@ -4903,27 +4895,7 @@ again: ...@@ -4903,27 +4895,7 @@ again:
slot++; slot++;
continue; continue;
} }
if (!cache_only) break;
break;
if (max_key) {
btrfs_node_key(cur, &disk_key, slot);
if (comp_keys(&disk_key, max_key) >= 0) {
ret = 1;
goto out;
}
}
tmp = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));
if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
free_extent_buffer(tmp);
break;
}
if (tmp)
free_extent_buffer(tmp);
slot++;
} }
find_next_key: find_next_key:
/* /*
...@@ -4934,7 +4906,7 @@ find_next_key: ...@@ -4934,7 +4906,7 @@ find_next_key:
path->slots[level] = slot; path->slots[level] = slot;
btrfs_set_path_blocking(path); btrfs_set_path_blocking(path);
sret = btrfs_find_next_key(root, path, min_key, level, sret = btrfs_find_next_key(root, path, min_key, level,
cache_only, min_trans); min_trans);
if (sret == 0) { if (sret == 0) {
btrfs_release_path(path); btrfs_release_path(path);
goto again; goto again;
...@@ -5399,8 +5371,7 @@ out: ...@@ -5399,8 +5371,7 @@ out:
/* /*
* this is similar to btrfs_next_leaf, but does not try to preserve * this is similar to btrfs_next_leaf, but does not try to preserve
* and fixup the path. It looks for and returns the next key in the * and fixup the path. It looks for and returns the next key in the
* tree based on the current path and the cache_only and min_trans * tree based on the current path and the min_trans parameters.
* parameters.
* *
* 0 is returned if another key is found, < 0 if there are any errors * 0 is returned if another key is found, < 0 if there are any errors
* and 1 is returned if there are no higher keys in the tree * and 1 is returned if there are no higher keys in the tree
...@@ -5409,8 +5380,7 @@ out: ...@@ -5409,8 +5380,7 @@ out:
* calling this function. * calling this function.
*/ */
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int level, struct btrfs_key *key, int level, u64 min_trans)
int cache_only, u64 min_trans)
{ {
int slot; int slot;
struct extent_buffer *c; struct extent_buffer *c;
...@@ -5461,22 +5431,8 @@ next: ...@@ -5461,22 +5431,8 @@ next:
if (level == 0) if (level == 0)
btrfs_item_key_to_cpu(c, key, slot); btrfs_item_key_to_cpu(c, key, slot);
else { else {
u64 blockptr = btrfs_node_blockptr(c, slot);
u64 gen = btrfs_node_ptr_generation(c, slot); u64 gen = btrfs_node_ptr_generation(c, slot);
if (cache_only) {
struct extent_buffer *cur;
cur = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));
if (!cur ||
btrfs_buffer_uptodate(cur, gen, 1) <= 0) {
slot++;
if (cur)
free_extent_buffer(cur);
goto next;
}
free_extent_buffer(cur);
}
if (gen < min_trans) { if (gen < min_trans) {
slot++; slot++;
goto next; goto next;
......
...@@ -31,10 +31,10 @@ ...@@ -31,10 +31,10 @@
#include <trace/events/btrfs.h> #include <trace/events/btrfs.h>
#include <asm/kmap_types.h> #include <asm/kmap_types.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/btrfs.h>
#include "extent_io.h" #include "extent_io.h"
#include "extent_map.h" #include "extent_map.h"
#include "async-thread.h" #include "async-thread.h"
#include "ioctl.h"
struct btrfs_trans_handle; struct btrfs_trans_handle;
struct btrfs_transaction; struct btrfs_transaction;
...@@ -46,7 +46,7 @@ extern struct kmem_cache *btrfs_path_cachep; ...@@ -46,7 +46,7 @@ extern struct kmem_cache *btrfs_path_cachep;
extern struct kmem_cache *btrfs_free_space_cachep; extern struct kmem_cache *btrfs_free_space_cachep;
struct btrfs_ordered_sum; struct btrfs_ordered_sum;
#define BTRFS_MAGIC "_BHRfS_M" #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
#define BTRFS_MAX_MIRRORS 3 #define BTRFS_MAX_MIRRORS 3
...@@ -191,6 +191,8 @@ static int btrfs_csum_sizes[] = { 4, 0 }; ...@@ -191,6 +191,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
/* ioprio of readahead is set to idle */ /* ioprio of readahead is set to idle */
#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024)
/* /*
* The key defines the order in the tree, and so it also defines (optimal) * The key defines the order in the tree, and so it also defines (optimal)
* block layout. * block layout.
...@@ -336,7 +338,10 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) ...@@ -336,7 +338,10 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
/* /*
* File system states * File system states
*/ */
#define BTRFS_FS_STATE_ERROR 0
#define BTRFS_FS_STATE_REMOUNTING 1
/* Super block flags */
/* Errors detected */ /* Errors detected */
#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
...@@ -502,6 +507,7 @@ struct btrfs_super_block { ...@@ -502,6 +507,7 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) #define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) #define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
...@@ -511,6 +517,7 @@ struct btrfs_super_block { ...@@ -511,6 +517,7 @@ struct btrfs_super_block {
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
BTRFS_FEATURE_INCOMPAT_RAID56 | \
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
/* /*
...@@ -952,8 +959,20 @@ struct btrfs_dev_replace_item { ...@@ -952,8 +959,20 @@ struct btrfs_dev_replace_item {
#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
#define BTRFS_BLOCK_GROUP_RAID5 (1 << 7)
#define BTRFS_BLOCK_GROUP_RAID6 (1 << 8)
#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
#define BTRFS_NR_RAID_TYPES 5
enum btrfs_raid_types {
BTRFS_RAID_RAID10,
BTRFS_RAID_RAID1,
BTRFS_RAID_DUP,
BTRFS_RAID_RAID0,
BTRFS_RAID_SINGLE,
BTRFS_RAID_RAID5,
BTRFS_RAID_RAID6,
BTRFS_NR_RAID_TYPES
};
#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
BTRFS_BLOCK_GROUP_SYSTEM | \ BTRFS_BLOCK_GROUP_SYSTEM | \
...@@ -961,6 +980,8 @@ struct btrfs_dev_replace_item { ...@@ -961,6 +980,8 @@ struct btrfs_dev_replace_item {
#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ #define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID1 | \ BTRFS_BLOCK_GROUP_RAID1 | \
BTRFS_BLOCK_GROUP_RAID5 | \
BTRFS_BLOCK_GROUP_RAID6 | \
BTRFS_BLOCK_GROUP_DUP | \ BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID10) BTRFS_BLOCK_GROUP_RAID10)
/* /*
...@@ -1185,6 +1206,10 @@ struct btrfs_block_group_cache { ...@@ -1185,6 +1206,10 @@ struct btrfs_block_group_cache {
u64 flags; u64 flags;
u64 sectorsize; u64 sectorsize;
u64 cache_generation; u64 cache_generation;
/* for raid56, this is a full stripe, without parity */
unsigned long full_stripe_len;
unsigned int ro:1; unsigned int ro:1;
unsigned int dirty:1; unsigned int dirty:1;
unsigned int iref:1; unsigned int iref:1;
...@@ -1225,6 +1250,28 @@ struct seq_list { ...@@ -1225,6 +1250,28 @@ struct seq_list {
u64 seq; u64 seq;
}; };
enum btrfs_orphan_cleanup_state {
ORPHAN_CLEANUP_STARTED = 1,
ORPHAN_CLEANUP_DONE = 2,
};
/* used by the raid56 code to lock stripes for read/modify/write */
struct btrfs_stripe_hash {
struct list_head hash_list;
wait_queue_head_t wait;
spinlock_t lock;
};
/* used by the raid56 code to lock stripes for read/modify/write */
struct btrfs_stripe_hash_table {
struct list_head stripe_cache;
spinlock_t cache_lock;
int cache_size;
struct btrfs_stripe_hash table[];
};
#define BTRFS_STRIPE_HASH_TABLE_BITS 11
/* fs_info */ /* fs_info */
struct reloc_control; struct reloc_control;
struct btrfs_device; struct btrfs_device;
...@@ -1250,6 +1297,7 @@ struct btrfs_fs_info { ...@@ -1250,6 +1297,7 @@ struct btrfs_fs_info {
/* block group cache stuff */ /* block group cache stuff */
spinlock_t block_group_cache_lock; spinlock_t block_group_cache_lock;
u64 first_logical_byte;
struct rb_root block_group_cache_tree; struct rb_root block_group_cache_tree;
/* keep track of unallocated space */ /* keep track of unallocated space */
...@@ -1288,7 +1336,23 @@ struct btrfs_fs_info { ...@@ -1288,7 +1336,23 @@ struct btrfs_fs_info {
u64 last_trans_log_full_commit; u64 last_trans_log_full_commit;
unsigned long mount_opt; unsigned long mount_opt;
unsigned long compress_type:4; unsigned long compress_type:4;
/*
* It is a suggestive number, the read side is safe even it gets a
* wrong number because we will write out the data into a regular
* extent. The write side(mount/remount) is under ->s_umount lock,
* so it is also safe.
*/
u64 max_inline; u64 max_inline;
/*
* Protected by ->chunk_mutex and sb->s_umount.
*
* The reason that we use two lock to protect it is because only
* remount and mount operations can change it and these two operations
* are under sb->s_umount, but the read side (chunk allocation) can not
* acquire sb->s_umount or the deadlock would happen. So we use two
* locks to protect it. On the write side, we must acquire two locks,
* and on the read side, we just need acquire one of them.
*/
u64 alloc_start; u64 alloc_start;
struct btrfs_transaction *running_transaction; struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle; wait_queue_head_t transaction_throttle;
...@@ -1307,6 +1371,13 @@ struct btrfs_fs_info { ...@@ -1307,6 +1371,13 @@ struct btrfs_fs_info {
struct mutex cleaner_mutex; struct mutex cleaner_mutex;
struct mutex chunk_mutex; struct mutex chunk_mutex;
struct mutex volume_mutex; struct mutex volume_mutex;
/* this is used during read/modify/write to make sure
* no two ios are trying to mod the same stripe at the same
* time
*/
struct btrfs_stripe_hash_table *stripe_hash_table;
/* /*
* this protects the ordered operations list only while we are * this protects the ordered operations list only while we are
* processing all of the entries on it. This way we make * processing all of the entries on it. This way we make
...@@ -1365,6 +1436,7 @@ struct btrfs_fs_info { ...@@ -1365,6 +1436,7 @@ struct btrfs_fs_info {
*/ */
struct list_head ordered_extents; struct list_head ordered_extents;
spinlock_t delalloc_lock;
/* /*
* all of the inodes that have delalloc bytes. It is possible for * all of the inodes that have delalloc bytes. It is possible for
* this list to be empty even when there is still dirty data=ordered * this list to be empty even when there is still dirty data=ordered
...@@ -1372,13 +1444,6 @@ struct btrfs_fs_info { ...@@ -1372,13 +1444,6 @@ struct btrfs_fs_info {
*/ */