namei.c 102 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/namei.c
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/namei.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 *  Directory entry file type support and forward compatibility hooks
 *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
 *  Hash Tree Directory indexing (c)
 *	Daniel Phillips, 2001
 *  Hash Tree Directory indexing porting
 *	Christopher Li, 2002
 *  Hash Tree Directory indexing cleanup
 *	Theodore Ts'o, 2002
 */

#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/time.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
36
37
#include "ext4.h"
#include "ext4_jbd2.h"
38
39
40
41

#include "xattr.h"
#include "acl.h"

42
#include <trace/events/ext4.h>
43
44
45
46
47
/*
 * define how far ahead to read directories while searching them.
 */
#define NAMEI_RA_CHUNKS  2
#define NAMEI_RA_BLOCKS  4
Dave Kleikamp's avatar
Dave Kleikamp committed
48
#define NAMEI_RA_SIZE	     (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
49

50
static struct buffer_head *ext4_append(handle_t *handle,
51
					struct inode *inode,
52
					ext4_lblk_t *block)
53
54
{
	struct buffer_head *bh;
55
	int err;
56

57
58
	if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
		     ((inode->i_size >> 10) >=
59
60
		      EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
		return ERR_PTR(-ENOSPC);
61

62
63
	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;

64
	bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
65
66
	if (IS_ERR(bh))
		return bh;
67
68
	inode->i_size += inode->i_sb->s_blocksize;
	EXT4_I(inode)->i_disksize = inode->i_size;
69
	BUFFER_TRACE(bh, "get_write_access");
70
71
72
73
74
	err = ext4_journal_get_write_access(handle, bh);
	if (err) {
		brelse(bh);
		ext4_std_error(inode->i_sb, err);
		return ERR_PTR(err);
Carlos Maiolino's avatar
Carlos Maiolino committed
75
	}
76
77
78
	return bh;
}

79
80
81
82
83
84
85
86
static int ext4_dx_csum_verify(struct inode *inode,
			       struct ext4_dir_entry *dirent);

typedef enum {
	EITHER, INDEX, DIRENT
} dirblock_type_t;

#define ext4_read_dirblock(inode, block, type) \
87
	__ext4_read_dirblock((inode), (block), (type), __func__, __LINE__)
88
89

static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
90
91
92
93
						ext4_lblk_t block,
						dirblock_type_t type,
						const char *func,
						unsigned int line)
94
95
96
{
	struct buffer_head *bh;
	struct ext4_dir_entry *dirent;
97
	int is_dx_block = 0;
98

99
100
	bh = ext4_bread(NULL, inode, block, 0);
	if (IS_ERR(bh)) {
101
102
103
104
105
		__ext4_warning(inode->i_sb, func, line,
			       "inode #%lu: lblock %lu: comm %s: "
			       "error %ld reading directory block",
			       inode->i_ino, (unsigned long)block,
			       current->comm, PTR_ERR(bh));
106
107
108
109

		return bh;
	}
	if (!bh) {
110
111
		ext4_error_inode(inode, func, line, block,
				 "Directory hole found");
112
		return ERR_PTR(-EFSCORRUPTED);
113
114
115
116
117
118
119
120
121
122
123
124
	}
	dirent = (struct ext4_dir_entry *) bh->b_data;
	/* Determine whether or not we have an index block */
	if (is_dx(inode)) {
		if (block == 0)
			is_dx_block = 1;
		else if (ext4_rec_len_from_disk(dirent->rec_len,
						inode->i_sb->s_blocksize) ==
			 inode->i_sb->s_blocksize)
			is_dx_block = 1;
	}
	if (!is_dx_block && type == INDEX) {
125
		ext4_error_inode(inode, func, line, block,
126
		       "directory leaf block found instead of index block");
127
		return ERR_PTR(-EFSCORRUPTED);
128
	}
129
	if (!ext4_has_metadata_csum(inode->i_sb) ||
130
131
132
133
134
135
136
137
138
139
140
141
	    buffer_verified(bh))
		return bh;

	/*
	 * An empty leaf block can get mistaken for a index block; for
	 * this reason, we can only check the index checksum when the
	 * caller is sure it should be an index block.
	 */
	if (is_dx_block && type == INDEX) {
		if (ext4_dx_csum_verify(inode, dirent))
			set_buffer_verified(bh);
		else {
142
143
			ext4_error_inode(inode, func, line, block,
					 "Directory index failed checksum");
144
			brelse(bh);
145
			return ERR_PTR(-EFSBADCRC);
146
		}
147
	}
148
149
150
151
	if (!is_dx_block) {
		if (ext4_dirent_csum_verify(inode, dirent))
			set_buffer_verified(bh);
		else {
152
153
			ext4_error_inode(inode, func, line, block,
					 "Directory block failed checksum");
154
			brelse(bh);
155
			return ERR_PTR(-EFSBADCRC);
156
		}
Carlos Maiolino's avatar
Carlos Maiolino committed
157
	}
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
	return bh;
}

#ifndef assert
#define assert(test) J_ASSERT(test)
#endif

#ifdef DX_DEBUG
#define dxtrace(command) command
#else
#define dxtrace(command)
#endif

struct fake_dirent
{
	__le32 inode;
	__le16 rec_len;
	u8 name_len;
	u8 file_type;
};

struct dx_countlimit
{
	__le16 limit;
	__le16 count;
};

struct dx_entry
{
	__le32 hash;
	__le32 block;
};

/*
 * dx_root_info is laid out so that if it should somehow get overlaid by a
 * dirent the two low bits of the hash version will be zero.  Therefore, the
 * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
 */

struct dx_root
{
	struct fake_dirent dot;
	char dot_name[4];
	struct fake_dirent dotdot;
	char dotdot_name[4];
	struct dx_root_info
	{
		__le32 reserved_zero;
		u8 hash_version;
		u8 info_length; /* 8 */
		u8 indirect_levels;
		u8 unused_flags;
	}
	info;
	struct dx_entry	entries[0];
};

struct dx_node
{
	struct fake_dirent fake;
	struct dx_entry	entries[0];
};


struct dx_frame
{
	struct buffer_head *bh;
	struct dx_entry *entries;
	struct dx_entry *at;
};

struct dx_map_entry
{
	u32 hash;
232
233
	u16 offs;
	u16 size;
234
235
};

236
237
238
239
240
241
242
243
/*
 * This goes at the end of each htree block.
 */
struct dx_tail {
	u32 dt_reserved;
	__le32 dt_checksum;	/* crc32c(uuid+inum+dirblock) */
};

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
244
245
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
246
247
248
249
250
251
252
253
static inline unsigned dx_get_hash(struct dx_entry *entry);
static void dx_set_hash(struct dx_entry *entry, unsigned value);
static unsigned dx_get_count(struct dx_entry *entries);
static unsigned dx_get_limit(struct dx_entry *entries);
static void dx_set_count(struct dx_entry *entries, unsigned value);
static void dx_set_limit(struct dx_entry *entries, unsigned value);
static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
static unsigned dx_node_limit(struct inode *dir);
254
static struct dx_frame *dx_probe(struct ext4_filename *fname,
255
256
				 struct inode *dir,
				 struct dx_hash_info *hinfo,
257
				 struct dx_frame *frame);
258
static void dx_release(struct dx_frame *frames);
259
260
261
static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
		       unsigned blocksize, struct dx_hash_info *hinfo,
		       struct dx_map_entry map[]);
262
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
263
static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
264
		struct dx_map_entry *offsets, int count, unsigned blocksize);
265
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
266
267
static void dx_insert_block(struct dx_frame *frame,
					u32 hash, ext4_lblk_t block);
268
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
269
270
271
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash);
272
static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
273
		struct ext4_filename *fname,
274
		struct ext4_dir_entry_2 **res_dir);
275
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
276
			     struct inode *dir, struct inode *inode);
277

278
/* checksumming functions */
279
280
void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
			    unsigned int blocksize)
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
{
	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
	t->det_rec_len = ext4_rec_len_to_disk(
			sizeof(struct ext4_dir_entry_tail), blocksize);
	t->det_reserved_ft = EXT4_FT_DIR_CSUM;
}

/* Walk through a dirent block to find a checksum "dirent" at the tail */
static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
						   struct ext4_dir_entry *de)
{
	struct ext4_dir_entry_tail *t;

#ifdef PARANOID
	struct ext4_dir_entry *d, *top;

	d = de;
	top = (struct ext4_dir_entry *)(((void *)de) +
		(EXT4_BLOCK_SIZE(inode->i_sb) -
		sizeof(struct ext4_dir_entry_tail)));
	while (d < top && d->rec_len)
		d = (struct ext4_dir_entry *)(((void *)d) +
		    le16_to_cpu(d->rec_len));

	if (d != top)
		return NULL;

	t = (struct ext4_dir_entry_tail *)d;
#else
	t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
#endif

	if (t->det_reserved_zero1 ||
	    le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
	    t->det_reserved_zero2 ||
	    t->det_reserved_ft != EXT4_FT_DIR_CSUM)
		return NULL;

	return t;
}

static __le32 ext4_dirent_csum(struct inode *inode,
			       struct ext4_dir_entry *dirent, int size)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
	__u32 csum;

	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	return cpu_to_le32(csum);
}

333
334
335
336
337
#define warn_no_space_for_csum(inode)					\
	__warn_no_space_for_csum((inode), __func__, __LINE__)

static void __warn_no_space_for_csum(struct inode *inode, const char *func,
				     unsigned int line)
338
{
339
340
	__ext4_warning_inode(inode, func, line,
		"No space for directory leaf checksum. Please run e2fsck -D.");
341
342
}

343
344
345
346
int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

347
	if (!ext4_has_metadata_csum(inode->i_sb))
348
349
350
351
		return 1;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
352
		warn_no_space_for_csum(inode);
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
		return 0;
	}

	if (t->det_checksum != ext4_dirent_csum(inode, dirent,
						(void *)t - (void *)dirent))
		return 0;

	return 1;
}

static void ext4_dirent_csum_set(struct inode *inode,
				 struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

368
	if (!ext4_has_metadata_csum(inode->i_sb))
369
370
371
372
		return;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
373
		warn_no_space_for_csum(inode);
374
375
376
377
378
379
380
		return;
	}

	t->det_checksum = ext4_dirent_csum(inode, dirent,
					   (void *)t - (void *)dirent);
}

381
382
383
int ext4_handle_dirty_dirent_node(handle_t *handle,
				  struct inode *inode,
				  struct buffer_head *bh)
384
385
386
387
388
{
	ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
					       struct ext4_dir_entry *dirent,
					       int *offset)
{
	struct ext4_dir_entry *dp;
	struct dx_root_info *root;
	int count_offset;

	if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
		count_offset = 8;
	else if (le16_to_cpu(dirent->rec_len) == 12) {
		dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
		if (le16_to_cpu(dp->rec_len) !=
		    EXT4_BLOCK_SIZE(inode->i_sb) - 12)
			return NULL;
		root = (struct dx_root_info *)(((void *)dp + 12));
		if (root->reserved_zero ||
		    root->info_length != sizeof(struct dx_root_info))
			return NULL;
		count_offset = 32;
	} else
		return NULL;

	if (offset)
		*offset = count_offset;
	return (struct dx_countlimit *)(((void *)dirent) + count_offset);
}

static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
			   int count_offset, int count, struct dx_tail *t)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
422
423
	__u32 csum;
	__le32 save_csum;
424
425
426
	int size;

	size = count_offset + (count * sizeof(struct dx_entry));
427
	save_csum = t->dt_checksum;
428
429
430
	t->dt_checksum = 0;
	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
431
	t->dt_checksum = save_csum;
432
433
434
435
436
437
438
439
440
441
442

	return cpu_to_le32(csum);
}

static int ext4_dx_csum_verify(struct inode *inode,
			       struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

443
	if (!ext4_has_metadata_csum(inode->i_sb))
444
445
446
447
448
449
450
451
452
453
454
		return 1;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return 1;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
455
		warn_no_space_for_csum(inode);
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
		return 1;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
					    count, t))
		return 0;
	return 1;
}

static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

472
	if (!ext4_has_metadata_csum(inode->i_sb))
473
474
475
476
477
478
479
480
481
482
483
		return;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
484
		warn_no_space_for_csum(inode);
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
		return;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
}

static inline int ext4_handle_dirty_dx_node(handle_t *handle,
					    struct inode *inode,
					    struct buffer_head *bh)
{
	ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

500
501
502
503
/*
 * p is at least 6 bytes before the end of page
 */
static inline struct ext4_dir_entry_2 *
504
ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
505
506
{
	return (struct ext4_dir_entry_2 *)((char *)p +
507
		ext4_rec_len_from_disk(p->rec_len, blocksize));
508
509
}

510
511
512
513
514
/*
 * Future: use high four bits of block for coalesce-on-delete flags
 * Mask them off for now.
 */

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
515
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
516
517
518
519
{
	return le32_to_cpu(entry->block) & 0x00ffffff;
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
520
static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
521
522
523
524
{
	entry->block = cpu_to_le32(value);
}

525
static inline unsigned dx_get_hash(struct dx_entry *entry)
526
527
528
529
{
	return le32_to_cpu(entry->hash);
}

530
static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
531
532
533
534
{
	entry->hash = cpu_to_le32(value);
}

535
static inline unsigned dx_get_count(struct dx_entry *entries)
536
537
538
539
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
}

540
static inline unsigned dx_get_limit(struct dx_entry *entries)
541
542
543
544
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
}

545
static inline void dx_set_count(struct dx_entry *entries, unsigned value)
546
547
548
549
{
	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
}

550
static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
551
552
553
554
{
	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
}

555
static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
556
{
557
558
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
		EXT4_DIR_REC_LEN(2) - infosize;
559

560
	if (ext4_has_metadata_csum(dir->i_sb))
561
		entry_space -= sizeof(struct dx_tail);
562
	return entry_space / sizeof(struct dx_entry);
563
564
}

565
static inline unsigned dx_node_limit(struct inode *dir)
566
{
567
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
568

569
	if (ext4_has_metadata_csum(dir->i_sb))
570
		entry_space -= sizeof(struct dx_tail);
571
	return entry_space / sizeof(struct dx_entry);
572
573
574
575
576
577
}

/*
 * Debug
 */
#ifdef DX_DEBUG
578
static void dx_show_index(char * label, struct dx_entry *entries)
579
{
580
	int i, n = dx_get_count (entries);
581
	printk(KERN_DEBUG "%s index ", label);
582
	for (i = 0; i < n; i++) {
583
		printk("%x->%lu ", i ? dx_get_hash(entries + i) :
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
584
				0, (unsigned long)dx_get_block(entries + i));
585
586
	}
	printk("\n");
587
588
589
590
591
592
593
594
595
}

struct stats
{
	unsigned names;
	unsigned space;
	unsigned bcount;
};

596
597
598
599
static struct stats dx_show_leaf(struct inode *dir,
				struct dx_hash_info *hinfo,
				struct ext4_dir_entry_2 *de,
				int size, int show_names)
600
601
602
603
604
605
606
607
608
609
610
611
{
	unsigned names = 0, space = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

	printk("names: ");
	while ((char *) de < base + size)
	{
		if (de->inode)
		{
			if (show_names)
			{
612
613
614
615
616
#ifdef CONFIG_EXT4_FS_ENCRYPTION
				int len;
				char *name;
				struct ext4_str fname_crypto_str
					= {.name = NULL, .len = 0};
617
				int res = 0;
618
619
620

				name  = de->name;
				len = de->name_len;
621
622
				if (ext4_encrypted_inode(inode))
					res = ext4_get_encryption_info(dir);
623
624
625
				if (res) {
					printk(KERN_WARNING "Error setting up"
					       " fname crypto: %d\n", res);
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
				}
				if (ctx == NULL) {
					/* Directory is not encrypted */
					ext4fs_dirhash(de->name,
						de->name_len, &h);
					printk("%*.s:(U)%x.%u ", len,
					       name, h.hash,
					       (unsigned) ((char *) de
							   - base));
				} else {
					/* Directory is encrypted */
					res = ext4_fname_crypto_alloc_buffer(
						ctx, de->name_len,
						&fname_crypto_str);
					if (res < 0) {
						printk(KERN_WARNING "Error "
							"allocating crypto "
							"buffer--skipping "
							"crypto\n");
						ctx = NULL;
					}
647
					res = ext4_fname_disk_to_usr(ctx, NULL, de,
648
649
650
651
652
653
654
655
656
657
658
659
							&fname_crypto_str);
					if (res < 0) {
						printk(KERN_WARNING "Error "
							"converting filename "
							"from disk to usr"
							"\n");
						name = "??";
						len = 2;
					} else {
						name = fname_crypto_str.name;
						len = fname_crypto_str.len;
					}
660
661
					ext4fs_dirhash(de->name, de->name_len,
						       &h);
662
663
664
665
666
667
668
					printk("%*.s:(E)%x.%u ", len, name,
					       h.hash, (unsigned) ((char *) de
								   - base));
					ext4_fname_crypto_free_buffer(
						&fname_crypto_str);
				}
#else
669
670
				int len = de->name_len;
				char *name = de->name;
671
				ext4fs_dirhash(de->name, de->name_len, &h);
672
				printk("%*.s:%x.%u ", len, name, h.hash,
673
				       (unsigned) ((char *) de - base));
674
#endif
675
			}
676
			space += EXT4_DIR_REC_LEN(de->name_len);
677
678
			names++;
		}
679
		de = ext4_next_entry(de, size);
680
681
682
683
684
685
686
687
688
	}
	printk("(%i)\n", names);
	return (struct stats) { names, space, 1 };
}

struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
			     struct dx_entry *entries, int levels)
{
	unsigned blocksize = dir->i_sb->s_blocksize;
689
	unsigned count = dx_get_count(entries), names = 0, space = 0, i;
690
691
692
693
694
	unsigned bcount = 0;
	struct buffer_head *bh;
	printk("%i indexed blocks...\n", count);
	for (i = 0; i < count; i++, entries++)
	{
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
695
696
		ext4_lblk_t block = dx_get_block(entries);
		ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
697
698
699
		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
		struct stats stats;
		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
700
701
702
		bh = ext4_bread(NULL,dir, block, 0);
		if (!bh || IS_ERR(bh))
			continue;
703
704
		stats = levels?
		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
705
706
		   dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *)
			bh->b_data, blocksize, 0);
707
708
709
		names += stats.names;
		space += stats.space;
		bcount += stats.bcount;
710
		brelse(bh);
711
712
	}
	if (bcount)
713
		printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
714
715
		       levels ? "" : "   ", names, space/bcount,
		       (space/bcount)*100/blocksize);
716
717
718
719
720
721
722
723
724
725
726
727
728
729
	return (struct stats) { names, space, bcount};
}
#endif /* DX_DEBUG */

/*
 * Probe for a directory leaf block to search.
 *
 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
 * error in the directory index, and the caller should fall back to
 * searching the directory normally.  The callers of dx_probe **MUST**
 * check for this error code, and make sure it never gets reflected
 * back to userspace.
 */
static struct dx_frame *
730
dx_probe(struct ext4_filename *fname, struct inode *dir,
731
	 struct dx_hash_info *hinfo, struct dx_frame *frame_in)
732
733
734
735
736
{
	unsigned count, indirect;
	struct dx_entry *at, *entries, *p, *q, *m;
	struct dx_root *root;
	struct dx_frame *frame = frame_in;
737
	struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
738
739
	u32 hash;

740
741
742
743
744
	frame->bh = ext4_read_dirblock(dir, 0, INDEX);
	if (IS_ERR(frame->bh))
		return (struct dx_frame *) frame->bh;

	root = (struct dx_root *) frame->bh->b_data;
745
746
747
	if (root->info.hash_version != DX_HASH_TEA &&
	    root->info.hash_version != DX_HASH_HALF_MD4 &&
	    root->info.hash_version != DX_HASH_LEGACY) {
748
749
		ext4_warning_inode(dir, "Unrecognised inode hash code %u",
				   root->info.hash_version);
750
751
		goto fail;
	}
752
753
	if (fname)
		hinfo = &fname->hinfo;
754
	hinfo->hash_version = root->info.hash_version;
755
756
	if (hinfo->hash_version <= DX_HASH_TEA)
		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
757
	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
758
759
	if (fname && fname_name(fname))
		ext4fs_dirhash(fname_name(fname), fname_len(fname), hinfo);
760
761
762
	hash = hinfo->hash;

	if (root->info.unused_flags & 1) {
763
764
		ext4_warning_inode(dir, "Unimplemented hash flags: %#06x",
				   root->info.unused_flags);
765
766
767
		goto fail;
	}

768
769
770
771
	indirect = root->info.indirect_levels;
	if (indirect > 1) {
		ext4_warning_inode(dir, "Unimplemented hash depth: %#06x",
				   root->info.indirect_levels);
772
773
774
		goto fail;
	}

775
776
	entries = (struct dx_entry *)(((char *)&root->info) +
				      root->info.info_length);
777
778
779

	if (dx_get_limit(entries) != dx_root_limit(dir,
						   root->info.info_length)) {
780
781
782
		ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
				   dx_get_limit(entries),
				   dx_root_limit(dir, root->info.info_length));
783
784
785
		goto fail;
	}

786
	dxtrace(printk("Look up %x", hash));
787
	while (1) {
788
		count = dx_get_count(entries);
789
		if (!count || count > dx_get_limit(entries)) {
790
791
792
			ext4_warning_inode(dir,
					   "dx entry: count %u beyond limit %u",
					   count, dx_get_limit(entries));
793
			goto fail;
794
795
		}

796
797
		p = entries + 1;
		q = entries + count - 1;
798
		while (p <= q) {
799
			m = p + (q - p) / 2;
800
801
802
803
804
805
806
			dxtrace(printk("."));
			if (dx_get_hash(m) > hash)
				q = m - 1;
			else
				p = m + 1;
		}

807
		if (0) { // linear search cross check
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
			unsigned n = count - 1;
			at = entries;
			while (n--)
			{
				dxtrace(printk(","));
				if (dx_get_hash(++at) > hash)
				{
					at--;
					break;
				}
			}
			assert (at == p - 1);
		}

		at = p - 1;
823
824
		dxtrace(printk(" %x->%u\n", at == entries ? 0 : dx_get_hash(at),
			       dx_get_block(at)));
825
826
		frame->entries = entries;
		frame->at = at;
827
828
829
830
831
832
833
834
		if (!indirect--)
			return frame;
		frame++;
		frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
		if (IS_ERR(frame->bh)) {
			ret_err = (struct dx_frame *) frame->bh;
			frame->bh = NULL;
			goto fail;
835
		}
836
		entries = ((struct dx_node *) frame->bh->b_data)->entries;
837

838
839
840
841
		if (dx_get_limit(entries) != dx_node_limit(dir)) {
			ext4_warning_inode(dir,
				"dx entry: limit %u != node limit %u",
				dx_get_limit(entries), dx_node_limit(dir));
842
			goto fail;
843
		}
844
	}
845
fail:
846
847
848
849
	while (frame >= frame_in) {
		brelse(frame->bh);
		frame--;
	}
850

851
	if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
852
853
		ext4_warning_inode(dir,
			"Corrupt directory, running e2fsck is recommended");
854
	return ret_err;
855
856
}

857
static void dx_release(struct dx_frame *frames)
858
859
860
861
{
	if (frames[0].bh == NULL)
		return;

862
	if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
		brelse(frames[1].bh);
	brelse(frames[0].bh);
}

/*
 * This function increments the frame pointer to search the next leaf
 * block, and reads in the necessary intervening nodes if the search
 * should be necessary.  Whether or not the search is necessary is
 * controlled by the hash parameter.  If the hash value is even, then
 * the search is only continued if the next block starts with that
 * hash value.  This is used if we are searching for a specific file.
 *
 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
 *
 * This function returns 1 if the caller should continue to search,
 * or 0 if it should not.  If there is an error reading one of the
 * index blocks, it will a negative error code.
 *
 * If start_hash is non-null, it will be filled in with the starting
 * hash of the next page.
 */
884
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
885
886
887
888
889
890
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash)
{
	struct dx_frame *p;
	struct buffer_head *bh;
891
	int num_frames = 0;
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
	__u32 bhash;

	p = frame;
	/*
	 * Find the next leaf page by incrementing the frame pointer.
	 * If we run out of entries in the interior node, loop around and
	 * increment pointer in the parent node.  When we break out of
	 * this loop, num_frames indicates the number of interior
	 * nodes need to be read.
	 */
	while (1) {
		if (++(p->at) < p->entries + dx_get_count(p->entries))
			break;
		if (p == frames)
			return 0;
		num_frames++;
		p--;
	}

	/*
	 * If the hash is 1, then continue only if the next page has a
	 * continuation hash of any value.  This is used for readdir
	 * handling.  Otherwise, check to see if the hash matches the
	 * desired contiuation hash.  If it doesn't, return since
	 * there's no point to read in the successive index pages.
	 */
	bhash = dx_get_hash(p->at);
	if (start_hash)
		*start_hash = bhash;
	if ((hash & 1) == 0) {
		if ((bhash & ~1) != hash)
			return 0;
	}
	/*
	 * If the hash is HASH_NB_ALWAYS, we always go to the next
	 * block so no check is necessary
	 */
	while (num_frames--) {
930
931
932
		bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
		if (IS_ERR(bh))
			return PTR_ERR(bh);
933
		p++;
934
		brelse(p->bh);
935
936
937
938
939
940
941
942
943
944
945
946
947
		p->bh = bh;
		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
	}
	return 1;
}


/*
 * This function fills a red-black tree with information from a
 * directory block.  It returns the number directory entries loaded
 * into the tree.  If there is an error it is returned in err.
 */
static int htree_dirblock_to_tree(struct file *dir_file,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
948
				  struct inode *dir, ext4_lblk_t block,
949
950
951
952
				  struct dx_hash_info *hinfo,
				  __u32 start_hash, __u32 start_minor_hash)
{
	struct buffer_head *bh;
953
	struct ext4_dir_entry_2 *de, *top;
954
	int err = 0, count = 0;
955
	struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}, tmp_str;
956

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
957
958
	dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
							(unsigned long)block));
959
960
961
	bh = ext4_read_dirblock(dir, block, DIRENT);
	if (IS_ERR(bh))
		return PTR_ERR(bh);
962

963
964
	de = (struct ext4_dir_entry_2 *) bh->b_data;
	top = (struct ext4_dir_entry_2 *) ((char *) de +
965
					   dir->i_sb->s_blocksize -
966
					   EXT4_DIR_REC_LEN(0));
967
968
#ifdef CONFIG_EXT4_FS_ENCRYPTION
	/* Check if the directory is encrypted */
969
	if (ext4_encrypted_inode(dir)) {
970
971
972
973
974
		err = ext4_get_encryption_info(dir);
		if (err < 0) {
			brelse(bh);
			return err;
		}
975
		err = ext4_fname_crypto_alloc_buffer(dir, EXT4_NAME_LEN,
976
977
978
979
980
981
982
						     &fname_crypto_str);
		if (err < 0) {
			brelse(bh);
			return err;
		}
	}
#endif
983
	for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
984
		if (ext4_check_dir_entry(dir, NULL, de, bh,
985
				bh->b_data, bh->b_size,
986
987
				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
					 + ((char *)de - bh->b_data))) {
988
989
			/* silently ignore the rest of the block */
			break;
990
		}
991
		ext4fs_dirhash(de->name, de->name_len, hinfo);
992
993
994
995
996
997
		if ((hinfo->hash < start_hash) ||
		    ((hinfo->hash == start_hash) &&
		     (hinfo->minor_hash < start_minor_hash)))
			continue;
		if (de->inode == 0)
			continue;
998
		if (!ext4_encrypted_inode(dir)) {
999
1000
1001
1002
1003
1004
			tmp_str.name = de->name;
			tmp_str.len = de->name_len;
			err = ext4_htree_store_dirent(dir_file,
				   hinfo->hash, hinfo->minor_hash, de,
				   &tmp_str);
		} else {
1005
1006
			int save_len = fname_crypto_str.len;

1007
			/* Directory is encrypted */
1008
			err = ext4_fname_disk_to_usr(dir, hinfo, de,
1009
1010
1011
1012
1013
1014
1015
1016
						     &fname_crypto_str);
			if (err < 0) {
				count = err;
				goto errout;
			}
			err = ext4_htree_store_dirent(dir_file,
				   hinfo->hash, hinfo->minor_hash, de,
					&fname_crypto_str);
1017
			fname_crypto_str.len = save_len;
1018
		}
1019
		if (err != 0) {
1020
1021
			count = err;
			goto errout;
1022
1023
1024
		}
		count++;
	}
1025
errout:
1026
	brelse(bh);
1027
1028
1029
#ifdef CONFIG_EXT4_FS_ENCRYPTION
	ext4_fname_crypto_free_buffer(&fname_crypto_str);
#endif
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
	return count;
}


/*
 * This function fills a red-black tree with information from a
 * directory.  We start scanning the directory in hash order, starting
 * at start_hash and start_minor_hash.
 *
 * This function returns the number of entries inserted into the tree,
 * or a negative error code.
 */
1042
int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1043
1044
1045
			 __u32 start_minor_hash, __u32 *next_hash)
{
	struct dx_hash_info hinfo;
1046
	struct ext4_dir_entry_2 *de;
1047
1048
	struct dx_frame frames[2], *frame;
	struct inode *dir;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1049
	ext4_lblk_t block;
1050
	int count = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1051
	int ret, err;
1052
	__u32 hashval;
1053
	struct ext4_str tmp_str;
1054

1055
	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
1056
		       start_hash, start_minor_hash));
Al Viro's avatar
Al Viro committed
1057
	dir = file_inode(dir_file);
1058
	if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
1059
		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
1060
1061
1062
		if (hinfo.hash_version <= DX_HASH_TEA)
			hinfo.hash_version +=
				EXT4_SB(dir->i_sb)->s_hash_unsigned;
1063
		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
		if (ext4_has_inline_data(dir)) {
			int has_inline_data = 1;
			count = htree_inlinedir_to_tree(dir_file, dir, 0,
							&hinfo, start_hash,
							start_minor_hash,
							&has_inline_data);
			if (has_inline_data) {
				*next_hash = ~0;
				return count;
			}
		}
1075
1076
1077
1078
1079
1080
1081
		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
					       start_hash, start_minor_hash);
		*next_hash = ~0;
		return count;
	}
	hinfo.hash = start_hash;
	hinfo.minor_hash = 0;
1082
1083
1084
	frame = dx_probe(NULL, dir, &hinfo, frames);
	if (IS_ERR(frame))
		return PTR_ERR(frame);
1085
1086
1087

	/* Add '.' and '..' from the htree header */
	if (!start_hash && !start_minor_hash) {
1088
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1089
1090
1091
1092
1093
		tmp_str.name = de->name;
		tmp_str.len = de->name_len;
		err = ext4_htree_store_dirent(dir_file, 0, 0,
					      de, &tmp_str);
		if (err != 0)
1094
1095
1096
1097
			goto errout;
		count++;
	}
	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1098
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1099
		de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1100
1101
1102
1103
1104
		tmp_str.name = de->name;
		tmp_str.len = de->name_len;
		err = ext4_htree_store_dirent(dir_file, 2, 0,
					      de, &tmp_str);
		if (err != 0)
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
			goto errout;
		count++;
	}

	while (1) {
		block = dx_get_block(frame->at);
		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
					     start_hash, start_minor_hash);
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		count += ret;
		hashval = ~0;
1119
		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
					    frame, frames, &hashval);
		*next_hash = hashval;
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		/*
		 * Stop if:  (a) there are no more entries, or
		 * (b) we have inserted at least one entry and the
		 * next hash value is not a continuation
		 */
		if ((ret == 0) ||
		    (count && ((hashval & 1) == 0)))
			break;
	}
	dx_release(frames);
1136
1137
	dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
		       "next hash: %x\n", count, *next_hash));
1138
1139
1140
1141
1142
1143
	return count;
errout:
	dx_release(frames);
	return (err);
}

1144
1145
static inline int search_dirblock(struct buffer_head *bh,
				  struct inode *dir,
1146
				  struct ext4_filename *fname,
1147
1148
1149
1150
				  const struct qstr *d_name,
				  unsigned int offset,
				  struct ext4_dir_entry_2 **res_dir)
{
1151
1152
	return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
			       fname, d_name, offset, res_dir);
1153
1154
}

1155
1156
1157
1158
/*
 * Directory block splitting, compacting
 */

1159
1160
1161
1162
/*
 * Create map of hash values, offsets, and sizes, stored at end of block.
 * Returns number of entries mapped.
 */
1163
1164
static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
		       unsigned blocksize, struct dx_hash_info *hinfo,
1165
		       struct dx_map_entry *map_tail)
1166
1167
1168
1169
1170
{
	int count = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

1171
	while ((char *) de < base + blocksize) {
1172
		if (de->name_len && de->inode) {
1173
			ext4fs_dirhash(de->name, de->name_len, &h);
1174
1175
			map_tail--;
			map_tail->hash = h.hash;
1176
			map_tail->offs = ((char *) de - base)>>2;
1177
			map_tail->size = le16_to_cpu(de->rec_len);
1178
1179
1180
1181
			count++;
			cond_resched();
		}
		/* XXX: do we need to check rec_len == 0 case? -Chris */
1182
		de = ext4_next_entry(de, blocksize);
1183
1184
1185
1186
	}
	return count;
}

1187
/* Sort map by hash value */