ctree.h 69.4 KB
Newer Older
Chris Mason's avatar
Chris Mason committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Chris Mason's avatar
Chris Mason committed
19
20
#ifndef __BTRFS_CTREE__
#define __BTRFS_CTREE__
21

22
#include <linux/version.h>
23
24
#include <linux/mm.h>
#include <linux/highmem.h>
Chris Mason's avatar
Chris Mason committed
25
#include <linux/fs.h>
26
#include <linux/completion.h>
Chris Mason's avatar
Chris Mason committed
27
#include <linux/backing-dev.h>
28
#include <linux/wait.h>
29
#include <asm/kmap_types.h>
30
#include "extent_io.h"
31
#include "extent_map.h"
32
#include "async-thread.h"
Chris Mason's avatar
Chris Mason committed
33

34
struct btrfs_trans_handle;
Chris Mason's avatar
Chris Mason committed
35
struct btrfs_transaction;
36
37
38
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_transaction_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
Chris Mason's avatar
Chris Mason committed
39
extern struct kmem_cache *btrfs_path_cachep;
40
struct btrfs_ordered_sum;
41

42
#define BTRFS_MAGIC "_BHRfS_M"
43

Josef Bacik's avatar
Josef Bacik committed
44
45
#define BTRFS_ACL_NOT_CACHED    ((void *)-1)

46
#define BTRFS_MAX_LEVEL 8
47
48

/* holds pointers to all of the tree roots */
49
#define BTRFS_ROOT_TREE_OBJECTID 1ULL
50
51

/* stores information about which extents are in use, and reference counts */
Chris Mason's avatar
Chris Mason committed
52
#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
53
54
55
56
57

/*
 * chunk tree stores translations from logical -> physical block numbering
 * the super block points to the chunk tree
 */
58
#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
59
60
61
62
63

/*
 * stores information about which areas of a given device are in use.
 * one per device.  The tree of tree roots points to the device tree
 */
64
65
66
67
68
69
70
#define BTRFS_DEV_TREE_OBJECTID 4ULL

/* one per subvolume, storing files and directories */
#define BTRFS_FS_TREE_OBJECTID 5ULL

/* directory objectid inside the root tree */
#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
71

72
73
74
/* holds checksums of all the data extents */
#define BTRFS_CSUM_TREE_OBJECTID 7ULL

75
76
77
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL

78
79
80
81
/* does write ahead logging to speed up fsyncs */
#define BTRFS_TREE_LOG_OBJECTID -6ULL
#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL

Zheng Yan's avatar
Zheng Yan committed
82
83
84
85
/* for space balancing */
#define BTRFS_TREE_RELOC_OBJECTID -8ULL
#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL

86
87
88
89
90
91
92
/*
 * extent checksums all have this objectid
 * this allows them to share the logging tree
 * for fsyncs
 */
#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL

Zheng Yan's avatar
Zheng Yan committed
93
94
95
/* dummy objectid represents multiple objectids */
#define BTRFS_MULTIPLE_OBJECTIDS -255ULL

96
/*
97
 * All files have objectids in this range.
98
 */
99
#define BTRFS_FIRST_FREE_OBJECTID 256ULL
100
#define BTRFS_LAST_FREE_OBJECTID -256ULL
101
#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
102

103
104
105
106
107
108
109

/*
 * the device items go into the chunk tree.  The key is in the form
 * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
 */
#define BTRFS_DEV_ITEMS_OBJECTID 1ULL

Chris Mason's avatar
Chris Mason committed
110
111
112
113
114
115
/*
 * we can actually store much bigger names, but lets not confuse the rest
 * of linux
 */
#define BTRFS_NAME_LEN 255

Chris Mason's avatar
Chris Mason committed
116
117
/* 32 bytes in various csum fields */
#define BTRFS_CSUM_SIZE 32
118
119
120
121
122
123

/* csum types */
#define BTRFS_CSUM_TYPE_CRC32	0

static int btrfs_csum_sizes[] = { 4, 0 };

124
/* four bytes for CRC32 */
125
#define BTRFS_EMPTY_DIR_SIZE 0
Chris Mason's avatar
Chris Mason committed
126

Chris Mason's avatar
Chris Mason committed
127
128
129
130
131
132
133
134
#define BTRFS_FT_UNKNOWN	0
#define BTRFS_FT_REG_FILE	1
#define BTRFS_FT_DIR		2
#define BTRFS_FT_CHRDEV		3
#define BTRFS_FT_BLKDEV		4
#define BTRFS_FT_FIFO		5
#define BTRFS_FT_SOCK		6
#define BTRFS_FT_SYMLINK	7
Josef Bacik's avatar
Josef Bacik committed
135
136
#define BTRFS_FT_XATTR		8
#define BTRFS_FT_MAX		9
Chris Mason's avatar
Chris Mason committed
137

138
139
140
141
142
143
144
145
146
/*
 * the key defines the order in the tree, and so it also defines (optimal)
 * block layout.  objectid corresonds to the inode number.  The flags
 * tells us things about the object, and is a kind of stream selector.
 * so for a given inode, keys with flags of 1 might refer to the inode
 * data, flags of 2 may point to file data in the btree and flags == 3
 * may point to extents.
 *
 * offset is the starting byte offset for this key in the stream.
Chris Mason's avatar
Chris Mason committed
147
148
149
150
 *
 * btrfs_disk_key is in disk byte order.  struct btrfs_key is always
 * in cpu native order.  Otherwise they are identical and their sizes
 * should be the same (ie both packed)
151
 */
Chris Mason's avatar
Chris Mason committed
152
153
struct btrfs_disk_key {
	__le64 objectid;
154
	u8 type;
155
	__le64 offset;
Chris Mason's avatar
Chris Mason committed
156
157
158
} __attribute__ ((__packed__));

struct btrfs_key {
159
	u64 objectid;
160
	u8 type;
161
	u64 offset;
162
163
} __attribute__ ((__packed__));

164
165
166
167
struct btrfs_mapping_tree {
	struct extent_map_tree map_tree;
};

168
#define BTRFS_UUID_SIZE 16
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
struct btrfs_dev_item {
	/* the internal btrfs device id */
	__le64 devid;

	/* size of the device */
	__le64 total_bytes;

	/* bytes used */
	__le64 bytes_used;

	/* optimal io alignment for this device */
	__le32 io_align;

	/* optimal io width for this device */
	__le32 io_width;

	/* minimal io size for this device */
	__le32 sector_size;

	/* type and info about this device */
	__le64 type;

Yan Zheng's avatar
Yan Zheng committed
191
192
193
	/* expected generation for this device */
	__le64 generation;

194
195
196
197
198
199
	/*
	 * starting byte of this partition on the device,
	 * to allowr for stripe alignment in the future
	 */
	__le64 start_offset;

200
201
202
203
204
205
206
207
208
	/* grouping information for allocation decisions */
	__le32 dev_group;

	/* seek speed 0-100 where 100 is fastest */
	u8 seek_speed;

	/* bandwidth 0-100 where 100 is fastest */
	u8 bandwidth;

209
	/* btrfs generated uuid for this device */
210
	u8 uuid[BTRFS_UUID_SIZE];
Yan Zheng's avatar
Yan Zheng committed
211
212
213

	/* uuid of FS who owns this device */
	u8 fsid[BTRFS_UUID_SIZE];
214
215
216
217
218
} __attribute__ ((__packed__));

struct btrfs_stripe {
	__le64 devid;
	__le64 offset;
219
	u8 dev_uuid[BTRFS_UUID_SIZE];
220
221
222
} __attribute__ ((__packed__));

struct btrfs_chunk {
223
224
225
226
	/* size of this chunk in bytes */
	__le64 length;

	/* objectid of the root referencing this chunk */
227
	__le64 owner;
228

229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
	__le64 stripe_len;
	__le64 type;

	/* optimal io alignment for this chunk */
	__le32 io_align;

	/* optimal io width for this chunk */
	__le32 io_width;

	/* minimal io size for this chunk */
	__le32 sector_size;

	/* 2^16 stripes is quite a lot, a second limit is the size of a single
	 * item in the btree
	 */
	__le16 num_stripes;
Chris Mason's avatar
Chris Mason committed
245
246
247

	/* sub stripes only matter for raid10 */
	__le16 sub_stripes;
248
249
250
251
252
253
254
255
256
257
258
	struct btrfs_stripe stripe;
	/* additional stripes go here */
} __attribute__ ((__packed__));

static inline unsigned long btrfs_chunk_item_size(int num_stripes)
{
	BUG_ON(num_stripes == 0);
	return sizeof(struct btrfs_chunk) +
		sizeof(struct btrfs_stripe) * (num_stripes - 1);
}

259
#define BTRFS_FSID_SIZE 16
260
261
#define BTRFS_HEADER_FLAG_WRITTEN (1 << 0)

262
263
264
/*
 * every tree block (leaf or node) starts with this header.
 */
265
struct btrfs_header {
266
	/* these first four must match the super block */
Chris Mason's avatar
Chris Mason committed
267
	u8 csum[BTRFS_CSUM_SIZE];
268
	u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
269
	__le64 bytenr; /* which block this node is supposed to live in */
270
	__le64 flags;
271
272
273

	/* allowed to be different from the super from here on down */
	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
274
	__le64 generation;
275
	__le64 owner;
276
	__le32 nritems;
277
	u8 level;
278
279
} __attribute__ ((__packed__));

280
#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \
281
282
				      sizeof(struct btrfs_header)) / \
				     sizeof(struct btrfs_key_ptr))
283
#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
284
#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize))
285
286
287
#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
					sizeof(struct btrfs_item) - \
					sizeof(struct btrfs_file_extent_item))
288

Yan Zheng's avatar
Yan Zheng committed
289
#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
290
291
292
293
294
295

/*
 * this is a very generous portion of the super block, giving us
 * room to translate 14 chunks with 3 stripes each.
 */
#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
296
#define BTRFS_LABEL_SIZE 256
297

298
299
300
301
/*
 * the super block basically lists the main trees of the FS
 * it currently lacks any block count etc etc
 */
302
struct btrfs_super_block {
Chris Mason's avatar
Chris Mason committed
303
	u8 csum[BTRFS_CSUM_SIZE];
304
	/* the first 4 fields must match struct btrfs_header */
Yan Zheng's avatar
Yan Zheng committed
305
	u8 fsid[BTRFS_FSID_SIZE];    /* FS specific uuid */
306
	__le64 bytenr; /* this block number */
307
	__le64 flags;
308
309

	/* allowed to be different from the btrfs_header from here own down */
310
311
312
	__le64 magic;
	__le64 generation;
	__le64 root;
313
	__le64 chunk_root;
314
	__le64 log_root;
315
316
317

	/* this will help find the new super based on the log root */
	__le64 log_root_transid;
318
319
	__le64 total_bytes;
	__le64 bytes_used;
320
	__le64 root_dir_objectid;
321
	__le64 num_devices;
322
323
324
	__le32 sectorsize;
	__le32 nodesize;
	__le32 leafsize;
325
	__le32 stripesize;
326
	__le32 sys_chunk_array_size;
327
	__le64 chunk_root_generation;
328
329
330
	__le64 compat_flags;
	__le64 compat_ro_flags;
	__le64 incompat_flags;
331
	__le16 csum_type;
332
	u8 root_level;
333
	u8 chunk_root_level;
334
	u8 log_root_level;
335
	struct btrfs_dev_item dev_item;
336

337
	char label[BTRFS_LABEL_SIZE];
338
339
340

	/* future expansion */
	__le64 reserved[32];
341
	u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
Chris Mason's avatar
Chris Mason committed
342
343
} __attribute__ ((__packed__));

344
345
346
347
348
349
350
351
/*
 * Compat flags that we support.  If any incompat flags are set other than the
 * ones specified below then we will fail to mount
 */
#define BTRFS_FEATURE_COMPAT_SUPP	0x0
#define BTRFS_FEATURE_COMPAT_RO_SUPP	0x0
#define BTRFS_FEATURE_INCOMPAT_SUPP	0x0

352
/*
353
 * A leaf is full of items. offset and size tell us where to find
354
355
 * the item in the leaf (relative to the start of the data area)
 */
Chris Mason's avatar
Chris Mason committed
356
struct btrfs_item {
Chris Mason's avatar
Chris Mason committed
357
	struct btrfs_disk_key key;
358
	__le32 offset;
359
	__le32 size;
360
361
} __attribute__ ((__packed__));

362
363
364
365
366
367
368
/*
 * leaves have an item area and a data area:
 * [item0, item1....itemN] [free space] [dataN...data1, data0]
 *
 * The data is separate from the items to get the keys closer together
 * during searches.
 */
369
struct btrfs_leaf {
370
	struct btrfs_header header;
371
	struct btrfs_item items[];
372
373
} __attribute__ ((__packed__));

374
375
376
377
/*
 * all non-leaf blocks are nodes, they hold only keys and pointers to
 * other blocks
 */
378
379
380
struct btrfs_key_ptr {
	struct btrfs_disk_key key;
	__le64 blockptr;
381
	__le64 generation;
382
383
} __attribute__ ((__packed__));

384
struct btrfs_node {
385
	struct btrfs_header header;
386
	struct btrfs_key_ptr ptrs[];
387
388
} __attribute__ ((__packed__));

389
/*
390
391
 * btrfs_paths remember the path taken from the root down to the leaf.
 * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
392
393
394
395
396
 * to any other levels that are present.
 *
 * The slots array records the index of the item or block pointer
 * used while walking the tree.
 */
397
struct btrfs_path {
398
	struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
399
	int slots[BTRFS_MAX_LEVEL];
400
401
	/* if there is real range locking, this locks field will change */
	int locks[BTRFS_MAX_LEVEL];
402
	int reada;
403
404
	/* keep some upper locks as we walk down */
	int keep_locks;
405
	int skip_locking;
406
	int lowest_level;
407
408
409
410
411
412

	/*
	 * set by btrfs_split_item, tells search_slot to keep all locks
	 * and to force calls to keep space in the nodes
	 */
	int search_for_split;
413
};
Chris Mason's avatar
Chris Mason committed
414

415
416
417
418
419
420
/*
 * items in the extent btree are used to record the objectid of the
 * owner of the block and the number of references
 */
struct btrfs_extent_item {
	__le32 refs;
421
422
423
424
425
426
} __attribute__ ((__packed__));

struct btrfs_extent_ref {
	__le64 root;
	__le64 generation;
	__le64 objectid;
Zheng Yan's avatar
Zheng Yan committed
427
	__le32 num_refs;
428
429
} __attribute__ ((__packed__));

430
431
/* dev extents record free space on individual devices.  The owner
 * field points back to the chunk allocation mapping tree that allocated
432
 * the extent.  The chunk tree uuid field is a way to double check the owner
433
434
 */
struct btrfs_dev_extent {
435
436
437
	__le64 chunk_tree;
	__le64 chunk_objectid;
	__le64 chunk_offset;
438
	__le64 length;
439
	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
440
441
} __attribute__ ((__packed__));

442
struct btrfs_inode_ref {
443
	__le64 index;
444
445
446
447
	__le16 name_len;
	/* name goes here */
} __attribute__ ((__packed__));

448
struct btrfs_timespec {
Chris Mason's avatar
Chris Mason committed
449
	__le64 sec;
Chris Mason's avatar
Chris Mason committed
450
451
452
	__le32 nsec;
} __attribute__ ((__packed__));

Jan Engelhardt's avatar
Jan Engelhardt committed
453
enum btrfs_compression_type {
454
455
456
	BTRFS_COMPRESS_NONE = 0,
	BTRFS_COMPRESS_ZLIB = 1,
	BTRFS_COMPRESS_LAST = 2,
Jan Engelhardt's avatar
Jan Engelhardt committed
457
};
458

Chris Mason's avatar
Chris Mason committed
459
struct btrfs_inode_item {
460
	/* nfs style generation number */
Chris Mason's avatar
Chris Mason committed
461
	__le64 generation;
462
463
	/* transid that last touched this inode */
	__le64 transid;
Chris Mason's avatar
Chris Mason committed
464
	__le64 size;
465
	__le64 nbytes;
466
	__le64 block_group;
Chris Mason's avatar
Chris Mason committed
467
468
469
470
	__le32 nlink;
	__le32 uid;
	__le32 gid;
	__le32 mode;
471
	__le64 rdev;
472
	__le64 flags;
473

474
475
476
477
478
479
480
481
	/* modification sequence number for NFS */
	__le64 sequence;

	/*
	 * a little future expansion, for more than this we can
	 * just grow the inode item and version it
	 */
	__le64 reserved[4];
482
483
484
485
	struct btrfs_timespec atime;
	struct btrfs_timespec ctime;
	struct btrfs_timespec mtime;
	struct btrfs_timespec otime;
Chris Mason's avatar
Chris Mason committed
486
487
} __attribute__ ((__packed__));

488
489
490
491
struct btrfs_dir_log_item {
	__le64 end;
} __attribute__ ((__packed__));

492
struct btrfs_dir_item {
493
	struct btrfs_disk_key location;
494
	__le64 transid;
Josef Bacik's avatar
Josef Bacik committed
495
	__le16 data_len;
496
	__le16 name_len;
497
498
499
500
	u8 type;
} __attribute__ ((__packed__));

struct btrfs_root_item {
501
	struct btrfs_inode_item inode;
502
	__le64 generation;
503
	__le64 root_dirid;
504
505
506
	__le64 bytenr;
	__le64 byte_limit;
	__le64 bytes_used;
Yan Zheng's avatar
Yan Zheng committed
507
	__le64 last_snapshot;
508
	__le64 flags;
509
	__le32 refs;
510
511
	struct btrfs_disk_key drop_progress;
	u8 drop_level;
512
	u8 level;
513
} __attribute__ ((__packed__));
514

515
516
517
518
519
520
521
522
523
/*
 * this is used for both forward and backward root refs
 */
struct btrfs_root_ref {
	__le64 dirid;
	__le64 sequence;
	__le16 name_len;
} __attribute__ ((__packed__));

Yan Zheng's avatar
Yan Zheng committed
524
525
526
#define BTRFS_FILE_EXTENT_INLINE 0
#define BTRFS_FILE_EXTENT_REG 1
#define BTRFS_FILE_EXTENT_PREALLOC 2
527

528
struct btrfs_file_extent_item {
529
530
531
	/*
	 * transaction id that created this extent
	 */
532
	__le64 generation;
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
	/*
	 * max number of bytes to hold this extent in ram
	 * when we split a compressed extent we can't know how big
	 * each of the resulting pieces will be.  So, this is
	 * an upper limit on the size of the extent in ram instead of
	 * an exact limit.
	 */
	__le64 ram_bytes;

	/*
	 * 32 bits for the various ways we might encode the data,
	 * including compression and encryption.  If any of these
	 * are set to something a given disk format doesn't understand
	 * it is treated like an incompat flag for reading and writing,
	 * but not for stat.
	 */
	u8 compression;
	u8 encryption;
	__le16 other_encoding; /* spare for later use */

	/* are we inline data or a real extent? */
554
	u8 type;
555

556
557
558
559
	/*
	 * disk space consumed by the extent, checksum blocks are included
	 * in these numbers
	 */
560
561
	__le64 disk_bytenr;
	__le64 disk_num_bytes;
562
	/*
Chris Mason's avatar
Chris Mason committed
563
	 * the logical offset in file blocks (no csums)
564
565
566
567
568
569
570
	 * this extent record is for.  This allows a file extent to point
	 * into the middle of an existing extent on disk, sharing it
	 * between two snapshots (useful if some bytes in the middle of the
	 * extent have changed
	 */
	__le64 offset;
	/*
571
572
	 * the logical number of file blocks (no csums included).  This
	 * always reflects the size uncompressed and without encoding.
573
	 */
574
	__le64 num_bytes;
575

576
577
} __attribute__ ((__packed__));

Chris Mason's avatar
Chris Mason committed
578
struct btrfs_csum_item {
579
	u8 csum;
Chris Mason's avatar
Chris Mason committed
580
581
} __attribute__ ((__packed__));

582
583
584
585
/* different types of block groups (and chunks) */
#define BTRFS_BLOCK_GROUP_DATA     (1 << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM   (1 << 1)
#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
586
#define BTRFS_BLOCK_GROUP_RAID0    (1 << 3)
587
#define BTRFS_BLOCK_GROUP_RAID1    (1 << 4)
588
#define BTRFS_BLOCK_GROUP_DUP	   (1 << 5)
Chris Mason's avatar
Chris Mason committed
589
#define BTRFS_BLOCK_GROUP_RAID10   (1 << 6)
Chris Mason's avatar
Chris Mason committed
590

Chris Mason's avatar
Chris Mason committed
591
592
struct btrfs_block_group_item {
	__le64 used;
593
594
	__le64 chunk_objectid;
	__le64 flags;
Chris Mason's avatar
Chris Mason committed
595
596
} __attribute__ ((__packed__));

597
598
struct btrfs_space_info {
	u64 flags;
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619

	u64 total_bytes;	/* total bytes in the space */
	u64 bytes_used;		/* total bytes used on disk */
	u64 bytes_pinned;	/* total bytes pinned, will be freed when the
				   transaction finishes */
	u64 bytes_reserved;	/* total bytes the allocator has reserved for
				   current allocations */
	u64 bytes_readonly;	/* total bytes that are read only */

	/* delalloc accounting */
	u64 bytes_delalloc;	/* number of bytes reserved for allocation,
				   this space is not necessarily reserved yet
				   by the allocator */
	u64 bytes_may_use;	/* number of bytes that may be used for
				   delalloc */

	int full;		/* indicates that we cannot allocate any more
				   chunks for this space */
	int force_alloc;	/* set if we need to force a chunk alloc for
				   this space */

620
	struct list_head list;
621
622
623
624

	/* for block groups in our same type */
	struct list_head block_groups;
	spinlock_t lock;
625
	struct rw_semaphore groups_sem;
626
627
628
629
630
631
632
};

struct btrfs_free_space {
	struct rb_node bytes_index;
	struct rb_node offset_index;
	u64 offset;
	u64 bytes;
633
634
};

Chris Mason's avatar
Chris Mason committed
635
636
637
struct btrfs_block_group_cache {
	struct btrfs_key key;
	struct btrfs_block_group_item item;
638
	spinlock_t lock;
639
	struct mutex alloc_mutex;
640
	struct mutex cache_mutex;
641
	u64 pinned;
642
	u64 reserved;
643
644
	u64 flags;
	int cached;
645
	int ro;
646
647
648
649
650
651
652
653
654
655
656
657
658
	int dirty;

	struct btrfs_space_info *space_info;

	/* free space cache stuff */
	struct rb_root free_space_bytes;
	struct rb_root free_space_offset;

	/* block group cache stuff */
	struct rb_node cache_node;

	/* for block groups in the same raid type */
	struct list_head list;
659
660
661

	/* usage count */
	atomic_t count;
Chris Mason's avatar
Chris Mason committed
662
};
663

Zheng Yan's avatar
Zheng Yan committed
664
665
666
667
668
669
struct btrfs_leaf_ref_tree {
	struct rb_root root;
	struct list_head list;
	spinlock_t lock;
};

670
struct btrfs_device;
671
struct btrfs_fs_devices;
672
struct btrfs_fs_info {
673
	u8 fsid[BTRFS_FSID_SIZE];
674
	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
675
676
	struct btrfs_root *extent_root;
	struct btrfs_root *tree_root;
677
678
	struct btrfs_root *chunk_root;
	struct btrfs_root *dev_root;
679
	struct btrfs_root *fs_root;
680
	struct btrfs_root *csum_root;
681
682
683

	/* the log root tree is a directory of all the other log roots */
	struct btrfs_root *log_root_tree;
684
	struct radix_tree_root fs_roots_radix;
685

686
687
688
689
	/* block group cache stuff */
	spinlock_t block_group_cache_lock;
	struct rb_root block_group_cache_tree;

690
	struct extent_io_tree pinned_extents;
691

692
693
694
	/* logical->physical extent mapping */
	struct btrfs_mapping_tree mapping_tree;

695
	u64 generation;
696
	u64 last_trans_committed;
697
	u64 last_trans_new_blockgroup;
698
	u64 open_ioctl_trans;
699
	unsigned long mount_opt;
700
	u64 max_extent;
701
	u64 max_inline;
702
	u64 alloc_start;
Chris Mason's avatar
Chris Mason committed
703
	struct btrfs_transaction *running_transaction;
704
	wait_queue_head_t transaction_throttle;
705
	wait_queue_head_t transaction_wait;
706
	wait_queue_head_t async_submit_wait;
707

708
	struct btrfs_super_block super_copy;
709
	struct btrfs_super_block super_for_commit;
710
	struct block_device *__bdev;
Chris Mason's avatar
Chris Mason committed
711
	struct super_block *sb;
712
	struct inode *btree_inode;
Chris Mason's avatar
Chris Mason committed
713
	struct backing_dev_info bdi;
Chris Mason's avatar
Chris Mason committed
714
	struct mutex trans_mutex;
715
	struct mutex tree_log_mutex;
716
717
	struct mutex transaction_kthread_mutex;
	struct mutex cleaner_mutex;
718
	struct mutex pinned_mutex;
719
	struct mutex chunk_mutex;
720
	struct mutex drop_mutex;
721
	struct mutex volume_mutex;
Zheng Yan's avatar
Zheng Yan committed
722
	struct mutex tree_reloc_mutex;
Chris Mason's avatar
Chris Mason committed
723
	struct list_head trans_list;
724
	struct list_head hashers;
725
	struct list_head dead_roots;
726

727
	atomic_t nr_async_submits;
728
	atomic_t async_submit_draining;
729
	atomic_t nr_async_bios;
730
	atomic_t async_delalloc_pages;
731

732
733
734
735
736
737
	/*
	 * this is used by the balancing code to wait for all the pending
	 * ordered extents
	 */
	spinlock_t ordered_extent_lock;
	struct list_head ordered_extents;
738
	struct list_head delalloc_inodes;
739

740
741
742
743
744
745
	/*
	 * there is a pool of worker threads for checksumming during writes
	 * and a pool for checksumming after reads.  This is because readers
	 * can run with FS locks held, and the writers may be waiting for
	 * those locks.  We don't want ordering in the pending list to cause
	 * deadlocks, and so the two are serviced separately.
746
747
748
	 *
	 * A third pool does submit_bio to avoid deadlocking with the other
	 * two
749
750
	 */
	struct btrfs_workers workers;
751
	struct btrfs_workers delalloc_workers;
752
	struct btrfs_workers endio_workers;
753
	struct btrfs_workers endio_meta_workers;
754
	struct btrfs_workers endio_meta_write_workers;
755
	struct btrfs_workers endio_write_workers;
756
	struct btrfs_workers submit_workers;
757
758
759
760
761
762
	/*
	 * fixup workers take dirty pages that didn't properly go through
	 * the cow mechanism and make them safe to write.  It happens
	 * for the sys_munmap function call path
	 */
	struct btrfs_workers fixup_workers;
763
764
	struct task_struct *transaction_kthread;
	struct task_struct *cleaner_kthread;
765
	int thread_pool_size;
766

Zheng Yan's avatar
Zheng Yan committed
767
768
769
	/* tree relocation relocated fields */
	struct list_head dead_reloc_roots;
	struct btrfs_leaf_ref_tree reloc_ref_tree;
Zheng Yan's avatar
Zheng Yan committed
770
771
	struct btrfs_leaf_ref_tree shared_ref_tree;

772
773
	struct kobject super_kobj;
	struct completion kobj_unregister;
774
	int do_barriers;
775
	int closing;
776
	int log_root_recovering;
777
	atomic_t throttles;
778
	atomic_t throttle_gen;
779

780
	u64 total_pinned;
781
782
	struct list_head dirty_cowonly_roots;

783
	struct btrfs_fs_devices *fs_devices;
784
785
786
787
788
789

	/*
	 * the space_info list is almost entirely read only.  It only changes
	 * when we add a new raid type to the FS, and that happens
	 * very rarely.  RCU is used to protect it.
	 */
790
	struct list_head space_info;
791

792
	spinlock_t delalloc_lock;
793
	spinlock_t new_trans_lock;
794
	u64 delalloc_bytes;
795
	u64 last_alloc;
796
	u64 last_data_alloc;
797

Yan Zheng's avatar
Yan Zheng committed
798
799
800
	spinlock_t ref_cache_lock;
	u64 total_ref_cache_size;

801
802
803
804
805
806
	u64 avail_data_alloc_bits;
	u64 avail_metadata_alloc_bits;
	u64 avail_system_alloc_bits;
	u64 data_alloc_profile;
	u64 metadata_alloc_profile;
	u64 system_alloc_profile;
807
808

	void *bdev_holder;
809
};
810

811
812
/*
 * in ram representation of the tree.  extent_root is used for all allocations
813
 * and for the extent tree extent_root root.
814
 */
815
struct btrfs_dirty_root;
816
struct btrfs_root {
817
	struct extent_buffer *node;
818
819
820
821

	/* the node lock is held while changing the node pointer */
	spinlock_t node_lock;

822
	struct extent_buffer *commit_root;
Yan Zheng's avatar
Yan Zheng committed
823
	struct btrfs_leaf_ref_tree *ref_tree;
824
	struct btrfs_leaf_ref_tree ref_tree_struct;
825
	struct btrfs_dirty_root *dirty_root;
826
	struct btrfs_root *log_root;
Zheng Yan's avatar
Zheng Yan committed
827
	struct btrfs_root *reloc_root;
Yan Zheng's avatar
Yan Zheng committed
828

829
830
	struct btrfs_root_item root_item;
	struct btrfs_key root_key;
831
	struct btrfs_fs_info *fs_info;
832
833
	struct extent_io_tree dirty_log_pages;

834
835
	struct kobject root_kobj;
	struct completion kobj_unregister;
836
	struct mutex objectid_mutex;
Yan Zheng's avatar
Yan Zheng committed
837

838
	struct mutex log_mutex;
Yan Zheng's avatar
Yan Zheng committed
839
840
841
842
843
844
	wait_queue_head_t log_writer_wait;
	wait_queue_head_t log_commit_wait[2];
	atomic_t log_writers;
	atomic_t log_commit[2];
	unsigned long log_transid;
	unsigned long log_batch;
845

846
847
	u64 objectid;
	u64 last_trans;
848
849
850
851
852
853
854
855
856
857

	/* data allocations are done in sectorsize units */
	u32 sectorsize;

	/* node allocations are done in nodesize units */
	u32 nodesize;

	/* leaf allocations are done in leafsize units */
	u32 leafsize;

858
859
	u32 stripesize;

860
	u32 type;
Chris Mason's avatar
Chris Mason committed
861
862
	u64 highest_inode;
	u64 last_inode_alloc;
863
	int ref_cows;
864
	int track_dirty;
865
	u64 defrag_trans_start;
866
	struct btrfs_key defrag_progress;
867
	struct btrfs_key defrag_max;
868
869
	int defrag_running;
	int defrag_level;
870
	char *name;
871
	int in_sysfs;
872
873
874

	/* the dirty list is only used by non-reference counted roots */
	struct list_head dirty_list;
875

876
877
	spinlock_t list_lock;
	struct list_head dead_list;
878
	struct list_head orphan_list;
879
880
881
882
883
884

	/*
	 * right now this just gets used so that a root has its own devid
	 * for stat.  It may be used for more later
	 */
	struct super_block anon_super;
885
886
};

Chris Mason's avatar
Chris Mason committed
887
/*
888

Chris Mason's avatar
Chris Mason committed
889
890
891
892
 * inode items have the data typically returned from stat and store other
 * info about object characteristics.  There is one for every file and dir in
 * the FS
 */
Chris Mason's avatar
Chris Mason committed
893
#define BTRFS_INODE_ITEM_KEY		1
894
895
896
#define BTRFS_INODE_REF_KEY		12
#define BTRFS_XATTR_ITEM_KEY		24
#define BTRFS_ORPHAN_ITEM_KEY		48
Chris Mason's avatar
Chris Mason committed
897
/* reserve 2-15 close to the inode for later flexibility */
Chris Mason's avatar
Chris Mason committed
898
899
900
901
902

/*
 * dir items are the name -> inode pointers in a directory.  There is one
 * for every name in a directory.
 */
903
904
905
906
#define BTRFS_DIR_LOG_ITEM_KEY  60
#define BTRFS_DIR_LOG_INDEX_KEY 72
#define BTRFS_DIR_ITEM_KEY	84
#define BTRFS_DIR_INDEX_KEY	96
Chris Mason's avatar
Chris Mason committed
907
/*
Chris Mason's avatar
Chris Mason committed
908
 * extent data is for file data
Chris Mason's avatar
Chris Mason committed
909
 */
910
#define BTRFS_EXTENT_DATA_KEY	108
911

Chris Mason's avatar
Chris Mason committed
912
/*
913
914
 * extent csums are stored in a separate tree and hold csums for
 * an entire extent on disk.
Chris Mason's avatar
Chris Mason committed
915
 */
916
#define BTRFS_EXTENT_CSUM_KEY	128
Chris Mason's avatar
Chris Mason committed
917

Chris Mason's avatar
Chris Mason committed
918
919
920
921
/*
 * root items point to tree roots.  There are typically in the root
 * tree used by the super block to find all the other trees
 */
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
#define BTRFS_ROOT_ITEM_KEY	132

/*
 * root backrefs tie subvols and snapshots to the directory entries that
 * reference them
 */
#define BTRFS_ROOT_BACKREF_KEY	144

/*
 * root refs make a fast index for listing all of the snapshots and
 * subvolumes referenced by a given root.  They point directly to the
 * directory item in the root that references the subvol
 */
#define BTRFS_ROOT_REF_KEY	156

Chris Mason's avatar
Chris Mason committed
937
938
939
940
/*
 * extent items are in the extent map tree.  These record which blocks
 * are used, and how many references there are to each block
 */
941
942
#define BTRFS_EXTENT_ITEM_KEY	168
#define BTRFS_EXTENT_REF_KEY	180
Chris Mason's avatar
Chris Mason committed
943
944
945
946
947

/*
 * block groups give us hints into the extent allocation trees.  Which
 * blocks are free etc etc
 */
948
#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
949

950
951
952
#define BTRFS_DEV_EXTENT_KEY	204
#define BTRFS_DEV_ITEM_KEY	216
#define BTRFS_CHUNK_ITEM_KEY	228
953

Chris Mason's avatar
Chris Mason committed
954
955
956
957
/*
 * string items are for debugging.  They just store a short string of
 * data in the FS
 */
Chris Mason's avatar
Chris Mason committed
958
959
#define BTRFS_STRING_ITEM_KEY	253

960
961
962
#define BTRFS_MOUNT_NODATASUM		(1 << 0)
#define BTRFS_MOUNT_NODATACOW		(1 << 1)
#define BTRFS_MOUNT_NOBARRIER		(1 << 2)
963
#define BTRFS_MOUNT_SSD			(1 << 3)
964
#define BTRFS_MOUNT_DEGRADED		(1 << 4)
965
#define BTRFS_MOUNT_COMPRESS		(1 << 5)
966
967
968
969
970

#define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
#define btrfs_test_opt(root, opt)	((root)->fs_info->mount_opt & \
					 BTRFS_MOUNT_##opt)
Yan's avatar
Yan committed
971
972
973
/*
 * Inode flags
 */
Yan's avatar
Yan committed
974
975
976
#define BTRFS_INODE_NODATASUM		(1 << 0)
#define BTRFS_INODE_NODATACOW		(1 << 1)
#define BTRFS_INODE_READONLY		(1 << 2)
977
#define BTRFS_INODE_NOCOMPRESS		(1 << 3)
Yan Zheng's avatar
Yan Zheng committed
978
#define BTRFS_INODE_PREALLOC		(1 << 4)
Yan's avatar
Yan committed
979
980
981
982
983
984
#define btrfs_clear_flag(inode, flag)	(BTRFS_I(inode)->flags &= \
					 ~BTRFS_INODE_##flag)
#define btrfs_set_flag(inode, flag)	(BTRFS_I(inode)->flags |= \
					 BTRFS_INODE_##flag)
#define btrfs_test_flag(inode, flag)	(BTRFS_I(inode)->flags & \
					 BTRFS_INODE_##flag)
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
/* some macros to generate set/get funcs for the struct fields.  This
 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
 * one for u8:
 */
#define le8_to_cpu(v) (v)
#define cpu_to_le8(v) (v)
#define __le8 u8

#define read_eb_member(eb, ptr, type, member, result) (			\
	read_extent_buffer(eb, (char *)(result),			\
			   ((unsigned long)(ptr)) +			\
			    offsetof(type, member),			\
			   sizeof(((type *)0)->member)))

#define write_eb_member(eb, ptr, type, member, result) (		\
	write_extent_buffer(eb, (char *)(result),			\
			   ((unsigned long)(ptr)) +			\
			    offsetof(type, member),			\
			   sizeof(((type *)0)->member)))

1005
#ifndef BTRFS_SETGET_FUNCS
1006
#define BTRFS_SETGET_FUNCS(name, type, member, bits)			\
1007
1008
1009
u##bits btrfs_##name(struct extent_buffer *eb, type *s);		\
void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
#endif
1010
1011
1012
1013

#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits)		\
static inline u##bits btrfs_##name(struct extent_buffer *eb)		\
{									\
David Miller's avatar
David Miller committed
1014
1015
1016
	type *p = kmap_atomic(eb->first_page, KM_USER0);		\
	u##bits res = le##bits##_to_cpu(p->member);			\
	kunmap_atomic(p, KM_USER0);					\
<