dcache.c 87.1 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * fs/dcache.c
 *
 * Complete reimplementation
 * (C) 1997 Thomas Schoebel-Theuer,
 * with heavy changes by Linus Torvalds
 */

/*
 * Notes on the allocation strategy:
 *
 * The dcache is a master of the icache - whenever a dcache entry
 * exists, the inode will always exist. "iput()" is done either when
 * the dcache entry is deleted or garbage collected.
 */

#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
21
#include <linux/fsnotify.h>
Linus Torvalds's avatar
Linus Torvalds committed
22
23
24
25
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/hash.h>
#include <linux/cache.h>
26
#include <linux/export.h>
Linus Torvalds's avatar
Linus Torvalds committed
27
28
29
30
31
32
33
#include <linux/mount.h>
#include <linux/file.h>
#include <asm/uaccess.h>
#include <linux/security.h>
#include <linux/seqlock.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
34
#include <linux/fs_struct.h>
35
#include <linux/hardirq.h>
36
37
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
38
#include <linux/prefetch.h>
39
#include <linux/ratelimit.h>
40
#include <linux/list_lru.h>
41
#include "internal.h"
42
#include "mount.h"
Linus Torvalds's avatar
Linus Torvalds committed
43

Nick Piggin's avatar
Nick Piggin committed
44
45
/*
 * Usage:
46
47
 * dcache->d_inode->i_lock protects:
 *   - i_dentry, d_alias, d_inode of aliases
48
49
50
51
 * dcache_hash_bucket lock protects:
 *   - the dcache hash table
 * s_anon bl list spinlock protects:
 *   - the s_anon list (see __d_drop)
52
 * dentry->d_sb->s_dentry_lru_lock protects:
Nick Piggin's avatar
Nick Piggin committed
53
54
55
56
57
 *   - the dcache lru lists and counters
 * d_lock protects:
 *   - d_flags
 *   - d_name
 *   - d_lru
Nick Piggin's avatar
Nick Piggin committed
58
 *   - d_count
Nick Piggin's avatar
Nick Piggin committed
59
 *   - d_unhashed()
Nick Piggin's avatar
Nick Piggin committed
60
61
 *   - d_parent and d_subdirs
 *   - childrens' d_child and d_parent
Nick Piggin's avatar
Nick Piggin committed
62
 *   - d_alias, d_inode
Nick Piggin's avatar
Nick Piggin committed
63
64
 *
 * Ordering:
65
 * dentry->d_inode->i_lock
Nick Piggin's avatar
Nick Piggin committed
66
 *   dentry->d_lock
67
 *     dentry->d_sb->s_dentry_lru_lock
68
69
 *     dcache_hash_bucket lock
 *     s_anon lock
Nick Piggin's avatar
Nick Piggin committed
70
 *
Nick Piggin's avatar
Nick Piggin committed
71
72
73
74
75
76
77
 * If there is an ancestor relationship:
 * dentry->d_parent->...->d_parent->d_lock
 *   ...
 *     dentry->d_parent->d_lock
 *       dentry->d_lock
 *
 * If no ancestor relationship:
Nick Piggin's avatar
Nick Piggin committed
78
79
80
81
 * if (dentry1 < dentry2)
 *   dentry1->d_lock
 *     dentry2->d_lock
 */
82
int sysctl_vfs_cache_pressure __read_mostly = 100;
Linus Torvalds's avatar
Linus Torvalds committed
83
84
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);

Al Viro's avatar
Al Viro committed
85
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
Linus Torvalds's avatar
Linus Torvalds committed
86

87
EXPORT_SYMBOL(rename_lock);
Linus Torvalds's avatar
Linus Torvalds committed
88

89
static struct kmem_cache *dentry_cache __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
90
91
92
93
94
95
96
97
98
99

/*
 * This is the single most critical data structure when it comes
 * to the dcache: the hashtable for lookups. Somebody should try
 * to make this good - I've just made it work.
 *
 * This hash-function tries to avoid losing too many bits of hash
 * information, yet avoid using a prime hash-size or similar.
 */

100
101
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
102

103
static struct hlist_bl_head *dentry_hashtable __read_mostly;
104

105
static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
106
					unsigned int hash)
107
{
108
	hash += (unsigned long) parent / L1_CACHE_BYTES;
109
	return dentry_hashtable + hash_32(hash, d_hash_shift);
110
111
}

Linus Torvalds's avatar
Linus Torvalds committed
112
113
114
115
116
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
	.age_limit = 45,
};

117
static DEFINE_PER_CPU(long, nr_dentry);
118
static DEFINE_PER_CPU(long, nr_dentry_unused);
119
120

#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
121
122
123
124
125
126
127
128
129
130
131
132
133

/*
 * Here we resort to our own counters instead of using generic per-cpu counters
 * for consistency with what the vfs inode code does. We are expected to harvest
 * better code and performance by having our own specialized counters.
 *
 * Please note that the loop is done over all possible CPUs, not over all online
 * CPUs. The reason for this is that we don't want to play games with CPUs going
 * on and off. If one of them goes off, we will just keep their counters.
 *
 * glommer: See cffbc8a for details, and if you ever intend to change this,
 * please update all vfs counters to match.
 */
134
static long get_nr_dentry(void)
135
136
{
	int i;
137
	long sum = 0;
138
139
140
141
142
	for_each_possible_cpu(i)
		sum += per_cpu(nr_dentry, i);
	return sum < 0 ? 0 : sum;
}

143
144
145
146
147
148
149
150
151
static long get_nr_dentry_unused(void)
{
	int i;
	long sum = 0;
	for_each_possible_cpu(i)
		sum += per_cpu(nr_dentry_unused, i);
	return sum < 0 ? 0 : sum;
}

152
int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
153
154
		   size_t *lenp, loff_t *ppos)
{
155
	dentry_stat.nr_dentry = get_nr_dentry();
156
	dentry_stat.nr_unused = get_nr_dentry_unused();
157
	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
158
159
160
}
#endif

161
162
163
164
/*
 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 * The strings are both count bytes long, and count is non-zero.
 */
165
166
167
168
169
170
171
172
173
174
175
176
#ifdef CONFIG_DCACHE_WORD_ACCESS

#include <asm/word-at-a-time.h>
/*
 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
 * aligned allocation for this particular component. We don't
 * strictly need the load_unaligned_zeropad() safety, but it
 * doesn't hurt either.
 *
 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
 * need the careful unaligned handling.
 */
177
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
178
{
179
180
181
	unsigned long a,b,mask;

	for (;;) {
182
		a = *(unsigned long *)cs;
183
		b = load_unaligned_zeropad(ct);
184
185
186
187
188
189
190
191
192
193
		if (tcount < sizeof(unsigned long))
			break;
		if (unlikely(a != b))
			return 1;
		cs += sizeof(unsigned long);
		ct += sizeof(unsigned long);
		tcount -= sizeof(unsigned long);
		if (!tcount)
			return 0;
	}
194
	mask = bytemask_from_count(tcount);
195
	return unlikely(!!((a ^ b) & mask));
196
197
}

198
#else
199

200
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
201
{
202
203
204
205
206
207
208
209
210
211
	do {
		if (*cs != *ct)
			return 1;
		cs++;
		ct++;
		tcount--;
	} while (tcount);
	return 0;
}

212
213
#endif

214
215
static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
{
216
	const unsigned char *cs;
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
	/*
	 * Be careful about RCU walk racing with rename:
	 * use ACCESS_ONCE to fetch the name pointer.
	 *
	 * NOTE! Even if a rename will mean that the length
	 * was not loaded atomically, we don't care. The
	 * RCU walk will check the sequence count eventually,
	 * and catch it. And we won't overrun the buffer,
	 * because we're reading the name pointer atomically,
	 * and a dentry name is guaranteed to be properly
	 * terminated with a NUL byte.
	 *
	 * End result: even if 'len' is wrong, we'll exit
	 * early because the data cannot match (there can
	 * be no NUL in the ct/tcount data)
	 */
233
234
235
	cs = ACCESS_ONCE(dentry->d_name.name);
	smp_read_barrier_depends();
	return dentry_string_cmp(cs, ct, tcount);
236
237
}

Christoph Hellwig's avatar
Christoph Hellwig committed
238
static void __d_free(struct rcu_head *head)
Linus Torvalds's avatar
Linus Torvalds committed
239
{
Christoph Hellwig's avatar
Christoph Hellwig committed
240
241
	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);

242
	WARN_ON(!hlist_unhashed(&dentry->d_alias));
Linus Torvalds's avatar
Linus Torvalds committed
243
244
245
246
247
	if (dname_external(dentry))
		kfree(dentry->d_name.name);
	kmem_cache_free(dentry_cache, dentry); 
}

Al Viro's avatar
Al Viro committed
248
249
250
251
252
253
254
255
256
static void dentry_free(struct dentry *dentry)
{
	/* if dentry was never visible to RCU, immediate free is OK */
	if (!(dentry->d_flags & DCACHE_RCUACCESS))
		__d_free(&dentry->d_u.d_rcu);
	else
		call_rcu(&dentry->d_u.d_rcu, __d_free);
}

Nick Piggin's avatar
Nick Piggin committed
257
258
/**
 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
259
 * @dentry: the target dentry
Nick Piggin's avatar
Nick Piggin committed
260
261
262
263
264
265
266
267
268
269
270
 * After this call, in-progress rcu-walk path lookup will fail. This
 * should be called after unhashing, and after changing d_inode (if
 * the dentry has not already been unhashed).
 */
static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
{
	assert_spin_locked(&dentry->d_lock);
	/* Go through a barrier */
	write_seqcount_barrier(&dentry->d_seq);
}

Linus Torvalds's avatar
Linus Torvalds committed
271
272
/*
 * Release the dentry's inode, using the filesystem
Nick Piggin's avatar
Nick Piggin committed
273
274
 * d_iput() operation if defined. Dentry has no refcount
 * and is unhashed.
Linus Torvalds's avatar
Linus Torvalds committed
275
 */
276
static void dentry_iput(struct dentry * dentry)
277
	__releases(dentry->d_lock)
278
	__releases(dentry->d_inode->i_lock)
Linus Torvalds's avatar
Linus Torvalds committed
279
280
281
282
{
	struct inode *inode = dentry->d_inode;
	if (inode) {
		dentry->d_inode = NULL;
283
		hlist_del_init(&dentry->d_alias);
Linus Torvalds's avatar
Linus Torvalds committed
284
		spin_unlock(&dentry->d_lock);
285
		spin_unlock(&inode->i_lock);
286
287
		if (!inode->i_nlink)
			fsnotify_inoderemove(inode);
Linus Torvalds's avatar
Linus Torvalds committed
288
289
290
291
292
293
294
295
296
		if (dentry->d_op && dentry->d_op->d_iput)
			dentry->d_op->d_iput(dentry, inode);
		else
			iput(inode);
	} else {
		spin_unlock(&dentry->d_lock);
	}
}

Nick Piggin's avatar
Nick Piggin committed
297
298
299
300
301
302
/*
 * Release the dentry's inode, using the filesystem
 * d_iput() operation if defined. dentry remains in-use.
 */
static void dentry_unlink_inode(struct dentry * dentry)
	__releases(dentry->d_lock)
303
	__releases(dentry->d_inode->i_lock)
Nick Piggin's avatar
Nick Piggin committed
304
305
{
	struct inode *inode = dentry->d_inode;
306
	__d_clear_type(dentry);
Nick Piggin's avatar
Nick Piggin committed
307
	dentry->d_inode = NULL;
308
	hlist_del_init(&dentry->d_alias);
Nick Piggin's avatar
Nick Piggin committed
309
310
	dentry_rcuwalk_barrier(dentry);
	spin_unlock(&dentry->d_lock);
311
	spin_unlock(&inode->i_lock);
Nick Piggin's avatar
Nick Piggin committed
312
313
314
315
316
317
318
319
	if (!inode->i_nlink)
		fsnotify_inoderemove(inode);
	if (dentry->d_op && dentry->d_op->d_iput)
		dentry->d_op->d_iput(dentry, inode);
	else
		iput(inode);
}

320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
/*
 * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry
 * is in use - which includes both the "real" per-superblock
 * LRU list _and_ the DCACHE_SHRINK_LIST use.
 *
 * The DCACHE_SHRINK_LIST bit is set whenever the dentry is
 * on the shrink list (ie not on the superblock LRU list).
 *
 * The per-cpu "nr_dentry_unused" counters are updated with
 * the DCACHE_LRU_LIST bit.
 *
 * These helper functions make sure we always follow the
 * rules. d_lock must be held by the caller.
 */
#define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x))
static void d_lru_add(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, 0);
	dentry->d_flags |= DCACHE_LRU_LIST;
	this_cpu_inc(nr_dentry_unused);
	WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
}

static void d_lru_del(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags &= ~DCACHE_LRU_LIST;
	this_cpu_dec(nr_dentry_unused);
	WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
}

static void d_shrink_del(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
	list_del_init(&dentry->d_lru);
	dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
	this_cpu_dec(nr_dentry_unused);
}

static void d_shrink_add(struct dentry *dentry, struct list_head *list)
{
	D_FLAG_VERIFY(dentry, 0);
	list_add(&dentry->d_lru, list);
	dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
	this_cpu_inc(nr_dentry_unused);
}

/*
 * These can only be called under the global LRU lock, ie during the
 * callback for freeing the LRU list. "isolate" removes it from the
 * LRU lists entirely, while shrink_move moves it to the indicated
 * private list.
 */
static void d_lru_isolate(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags &= ~DCACHE_LRU_LIST;
	this_cpu_dec(nr_dentry_unused);
	list_del_init(&dentry->d_lru);
}

static void d_lru_shrink_move(struct dentry *dentry, struct list_head *list)
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags |= DCACHE_SHRINK_LIST;
	list_move_tail(&dentry->d_lru, list);
}

388
/*
389
 * dentry_lru_(add|del)_list) must be called with d_lock held.
390
391
392
 */
static void dentry_lru_add(struct dentry *dentry)
{
393
394
	if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
		d_lru_add(dentry);
395
396
}

Nick Piggin's avatar
Nick Piggin committed
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
/**
 * d_drop - drop a dentry
 * @dentry: dentry to drop
 *
 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 * be found through a VFS lookup any more. Note that this is different from
 * deleting the dentry - d_delete will try to mark the dentry negative if
 * possible, giving a successful _negative_ lookup, while d_drop will
 * just make the cache lookup fail.
 *
 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 * reason (NFS timeouts or autofs deletes).
 *
 * __d_drop requires dentry->d_lock.
 */
void __d_drop(struct dentry *dentry)
{
414
	if (!d_unhashed(dentry)) {
415
		struct hlist_bl_head *b;
416
417
418
419
420
421
		/*
		 * Hashed dentries are normally on the dentry hashtable,
		 * with the exception of those newly allocated by
		 * d_obtain_alias, which are always IS_ROOT:
		 */
		if (unlikely(IS_ROOT(dentry)))
422
423
424
425
426
427
428
429
			b = &dentry->d_sb->s_anon;
		else
			b = d_hash(dentry->d_parent, dentry->d_name.hash);

		hlist_bl_lock(b);
		__hlist_bl_del(&dentry->d_hash);
		dentry->d_hash.pprev = NULL;
		hlist_bl_unlock(b);
430
		dentry_rcuwalk_barrier(dentry);
Nick Piggin's avatar
Nick Piggin committed
431
432
433
434
435
436
437
438
439
440
441
442
	}
}
EXPORT_SYMBOL(__d_drop);

void d_drop(struct dentry *dentry)
{
	spin_lock(&dentry->d_lock);
	__d_drop(dentry);
	spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(d_drop);

Al Viro's avatar
Al Viro committed
443
static void __dentry_kill(struct dentry *dentry)
444
{
445
446
447
	struct dentry *parent = NULL;
	bool can_free = true;
	if (!IS_ROOT(dentry))
448
		parent = dentry->d_parent;
Nick Piggin's avatar
Nick Piggin committed
449

450
451
452
453
454
	/*
	 * The dentry is now unrecoverably dead to the world.
	 */
	lockref_mark_dead(&dentry->d_lockref);

Sage Weil's avatar
Sage Weil committed
455
456
457
458
	/*
	 * inform the fs via d_prune that this dentry is about to be
	 * unhashed and destroyed.
	 */
459
	if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry))
Yan, Zheng's avatar
Yan, Zheng committed
460
461
		dentry->d_op->d_prune(dentry);

462
463
464
465
	if (dentry->d_flags & DCACHE_LRU_LIST) {
		if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
			d_lru_del(dentry);
	}
466
467
	/* if it was on the hash then remove it */
	__d_drop(dentry);
Al Viro's avatar
Al Viro committed
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
	list_del(&dentry->d_u.d_child);
	/*
	 * Inform d_walk() that we are no longer attached to the
	 * dentry tree
	 */
	dentry->d_flags |= DCACHE_DENTRY_KILLED;
	if (parent)
		spin_unlock(&parent->d_lock);
	dentry_iput(dentry);
	/*
	 * dentry_iput drops the locks, at which point nobody (except
	 * transient RCU lookups) can reach this dentry.
	 */
	BUG_ON((int)dentry->d_lockref.count > 0);
	this_cpu_dec(nr_dentry);
	if (dentry->d_op && dentry->d_op->d_release)
		dentry->d_op->d_release(dentry);

486
487
488
489
490
491
492
493
	spin_lock(&dentry->d_lock);
	if (dentry->d_flags & DCACHE_SHRINK_LIST) {
		dentry->d_flags |= DCACHE_MAY_FREE;
		can_free = false;
	}
	spin_unlock(&dentry->d_lock);
	if (likely(can_free))
		dentry_free(dentry);
Al Viro's avatar
Al Viro committed
494
495
496
497
498
499
500
501
}

/*
 * Finish off a dentry we've decided to kill.
 * dentry->d_lock must be held, returns with it unlocked.
 * If ref is non-zero, then decrement the refcount too.
 * Returns dentry requiring refcount drop, or NULL if we're done.
 */
502
static struct dentry *dentry_kill(struct dentry *dentry)
Al Viro's avatar
Al Viro committed
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
	__releases(dentry->d_lock)
{
	struct inode *inode = dentry->d_inode;
	struct dentry *parent = NULL;

	if (inode && unlikely(!spin_trylock(&inode->i_lock)))
		goto failed;

	if (!IS_ROOT(dentry)) {
		parent = dentry->d_parent;
		if (unlikely(!spin_trylock(&parent->d_lock))) {
			if (inode)
				spin_unlock(&inode->i_lock);
			goto failed;
		}
	}

	__dentry_kill(dentry);
Al Viro's avatar
Al Viro committed
521
	return parent;
Al Viro's avatar
Al Viro committed
522
523

failed:
524
525
	spin_unlock(&dentry->d_lock);
	cpu_relax();
Al Viro's avatar
Al Viro committed
526
	return dentry; /* try again with same dentry */
527
528
}

529
530
531
532
533
static inline struct dentry *lock_parent(struct dentry *dentry)
{
	struct dentry *parent = dentry->d_parent;
	if (IS_ROOT(dentry))
		return NULL;
534
535
	if (unlikely((int)dentry->d_lockref.count < 0))
		return NULL;
536
537
538
	if (likely(spin_trylock(&parent->d_lock)))
		return parent;
	rcu_read_lock();
539
	spin_unlock(&dentry->d_lock);
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
again:
	parent = ACCESS_ONCE(dentry->d_parent);
	spin_lock(&parent->d_lock);
	/*
	 * We can't blindly lock dentry until we are sure
	 * that we won't violate the locking order.
	 * Any changes of dentry->d_parent must have
	 * been done with parent->d_lock held, so
	 * spin_lock() above is enough of a barrier
	 * for checking if it's still our child.
	 */
	if (unlikely(parent != dentry->d_parent)) {
		spin_unlock(&parent->d_lock);
		goto again;
	}
	rcu_read_unlock();
	if (parent != dentry)
557
		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
558
559
560
561
562
	else
		parent = NULL;
	return parent;
}

Linus Torvalds's avatar
Linus Torvalds committed
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
/* 
 * This is dput
 *
 * This is complicated by the fact that we do not want to put
 * dentries that are no longer on any hash chain on the unused
 * list: we'd much rather just get rid of them immediately.
 *
 * However, that implies that we have to traverse the dentry
 * tree upwards to the parents which might _also_ now be
 * scheduled for deletion (it may have been only waiting for
 * its last child to go away).
 *
 * This tail recursion is done by hand as we don't want to depend
 * on the compiler to always get this right (gcc generally doesn't).
 * Real recursion would eat up our stack space.
 */

/*
 * dput - release a dentry
 * @dentry: dentry to release 
 *
 * Release a dentry. This will drop the usage count and if appropriate
 * call the dentry unlink method as well as removing it from the queues and
 * releasing its resources. If the parent dentries were scheduled for release
 * they too may now get deleted.
 */
void dput(struct dentry *dentry)
{
591
	if (unlikely(!dentry))
Linus Torvalds's avatar
Linus Torvalds committed
592
593
594
		return;

repeat:
595
	if (lockref_put_or_lock(&dentry->d_lockref))
Linus Torvalds's avatar
Linus Torvalds committed
596
597
		return;

598
599
600
601
602
	/* Unreachable? Get rid of it */
	if (unlikely(d_unhashed(dentry)))
		goto kill_it;

	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
Linus Torvalds's avatar
Linus Torvalds committed
603
		if (dentry->d_op->d_delete(dentry))
Nick Piggin's avatar
Nick Piggin committed
604
			goto kill_it;
Linus Torvalds's avatar
Linus Torvalds committed
605
	}
606

607
608
	if (!(dentry->d_flags & DCACHE_REFERENCED))
		dentry->d_flags |= DCACHE_REFERENCED;
609
	dentry_lru_add(dentry);
610

611
	dentry->d_lockref.count--;
Nick Piggin's avatar
Nick Piggin committed
612
	spin_unlock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
613
614
	return;

615
kill_it:
616
	dentry = dentry_kill(dentry);
617
618
	if (dentry)
		goto repeat;
Linus Torvalds's avatar
Linus Torvalds committed
619
}
620
EXPORT_SYMBOL(dput);
Linus Torvalds's avatar
Linus Torvalds committed
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638

/**
 * d_invalidate - invalidate a dentry
 * @dentry: dentry to invalidate
 *
 * Try to invalidate the dentry if it turns out to be
 * possible. If there are other dentries that can be
 * reached through this one we can't delete it and we
 * return -EBUSY. On success we return 0.
 *
 * no dcache lock.
 */
 
int d_invalidate(struct dentry * dentry)
{
	/*
	 * If it's already been dropped, return OK.
	 */
Nick Piggin's avatar
Nick Piggin committed
639
	spin_lock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
640
	if (d_unhashed(dentry)) {
Nick Piggin's avatar
Nick Piggin committed
641
		spin_unlock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
642
643
644
645
646
647
648
		return 0;
	}
	/*
	 * Check whether to do a partial shrink_dcache
	 * to get rid of unused child entries.
	 */
	if (!list_empty(&dentry->d_subdirs)) {
Nick Piggin's avatar
Nick Piggin committed
649
		spin_unlock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
650
		shrink_dcache_parent(dentry);
Nick Piggin's avatar
Nick Piggin committed
651
		spin_lock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
652
653
654
655
656
657
658
659
660
661
662
	}

	/*
	 * Somebody else still using it?
	 *
	 * If it's a directory, we can't drop it
	 * for fear of somebody re-populating it
	 * with children (even though dropping it
	 * would make it unreachable from the root,
	 * we might still populate it if it was a
	 * working directory or similar).
663
664
	 * We also need to leave mountpoints alone,
	 * directory or not.
Linus Torvalds's avatar
Linus Torvalds committed
665
	 */
666
	if (dentry->d_lockref.count > 1 && dentry->d_inode) {
667
		if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
Linus Torvalds's avatar
Linus Torvalds committed
668
669
670
671
672
673
674
675
676
			spin_unlock(&dentry->d_lock);
			return -EBUSY;
		}
	}

	__d_drop(dentry);
	spin_unlock(&dentry->d_lock);
	return 0;
}
677
EXPORT_SYMBOL(d_invalidate);
Linus Torvalds's avatar
Linus Torvalds committed
678

Nick Piggin's avatar
Nick Piggin committed
679
/* This must be called with d_lock held */
680
static inline void __dget_dlock(struct dentry *dentry)
Nick Piggin's avatar
Nick Piggin committed
681
{
682
	dentry->d_lockref.count++;
Nick Piggin's avatar
Nick Piggin committed
683
684
}

685
static inline void __dget(struct dentry *dentry)
Linus Torvalds's avatar
Linus Torvalds committed
686
{
687
	lockref_get(&dentry->d_lockref);
Linus Torvalds's avatar
Linus Torvalds committed
688
689
}

Nick Piggin's avatar
Nick Piggin committed
690
691
struct dentry *dget_parent(struct dentry *dentry)
{
692
	int gotref;
Nick Piggin's avatar
Nick Piggin committed
693
694
	struct dentry *ret;

695
696
697
698
699
700
701
702
703
704
705
706
707
708
	/*
	 * Do optimistic parent lookup without any
	 * locking.
	 */
	rcu_read_lock();
	ret = ACCESS_ONCE(dentry->d_parent);
	gotref = lockref_get_not_zero(&ret->d_lockref);
	rcu_read_unlock();
	if (likely(gotref)) {
		if (likely(ret == ACCESS_ONCE(dentry->d_parent)))
			return ret;
		dput(ret);
	}

Nick Piggin's avatar
Nick Piggin committed
709
repeat:
710
711
712
713
714
	/*
	 * Don't need rcu_dereference because we re-check it was correct under
	 * the lock.
	 */
	rcu_read_lock();
Nick Piggin's avatar
Nick Piggin committed
715
	ret = dentry->d_parent;
716
717
718
719
	spin_lock(&ret->d_lock);
	if (unlikely(ret != dentry->d_parent)) {
		spin_unlock(&ret->d_lock);
		rcu_read_unlock();
Nick Piggin's avatar
Nick Piggin committed
720
721
		goto repeat;
	}
722
	rcu_read_unlock();
723
724
	BUG_ON(!ret->d_lockref.count);
	ret->d_lockref.count++;
Nick Piggin's avatar
Nick Piggin committed
725
726
727
728
729
	spin_unlock(&ret->d_lock);
	return ret;
}
EXPORT_SYMBOL(dget_parent);

Linus Torvalds's avatar
Linus Torvalds committed
730
731
732
733
734
735
736
737
738
739
/**
 * d_find_alias - grab a hashed alias of inode
 * @inode: inode in question
 *
 * If inode has a hashed alias, or is a directory and has any alias,
 * acquire the reference to alias and return it. Otherwise return NULL.
 * Notice that if inode is a directory there can be only one alias and
 * it can be unhashed only if it has no children, or if it is the root
 * of a filesystem.
 *
740
 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
741
 * any other hashed alias over that one.
Linus Torvalds's avatar
Linus Torvalds committed
742
 */
743
static struct dentry *__d_find_alias(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
744
{
Nick Piggin's avatar
Nick Piggin committed
745
	struct dentry *alias, *discon_alias;
Linus Torvalds's avatar
Linus Torvalds committed
746

Nick Piggin's avatar
Nick Piggin committed
747
748
again:
	discon_alias = NULL;
749
	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
Nick Piggin's avatar
Nick Piggin committed
750
		spin_lock(&alias->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
751
 		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
752
			if (IS_ROOT(alias) &&
Nick Piggin's avatar
Nick Piggin committed
753
			    (alias->d_flags & DCACHE_DISCONNECTED)) {
Linus Torvalds's avatar
Linus Torvalds committed
754
				discon_alias = alias;
755
			} else {
756
				__dget_dlock(alias);
Nick Piggin's avatar
Nick Piggin committed
757
758
759
760
761
762
763
764
765
766
				spin_unlock(&alias->d_lock);
				return alias;
			}
		}
		spin_unlock(&alias->d_lock);
	}
	if (discon_alias) {
		alias = discon_alias;
		spin_lock(&alias->d_lock);
		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
767
768
769
			__dget_dlock(alias);
			spin_unlock(&alias->d_lock);
			return alias;
Linus Torvalds's avatar
Linus Torvalds committed
770
		}
Nick Piggin's avatar
Nick Piggin committed
771
772
		spin_unlock(&alias->d_lock);
		goto again;
Linus Torvalds's avatar
Linus Torvalds committed
773
	}
Nick Piggin's avatar
Nick Piggin committed
774
	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
775
776
}

Nick Piggin's avatar
Nick Piggin committed
777
struct dentry *d_find_alias(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
778
{
779
780
	struct dentry *de = NULL;

781
	if (!hlist_empty(&inode->i_dentry)) {
782
		spin_lock(&inode->i_lock);
783
		de = __d_find_alias(inode);
784
		spin_unlock(&inode->i_lock);
785
	}
Linus Torvalds's avatar
Linus Torvalds committed
786
787
	return de;
}
788
EXPORT_SYMBOL(d_find_alias);
Linus Torvalds's avatar
Linus Torvalds committed
789
790
791
792
793
794
795

/*
 *	Try to kill dentries associated with this inode.
 * WARNING: you must own a reference to inode.
 */
void d_prune_aliases(struct inode *inode)
{
796
	struct dentry *dentry;
Linus Torvalds's avatar
Linus Torvalds committed
797
restart:
798
	spin_lock(&inode->i_lock);
799
	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
Linus Torvalds's avatar
Linus Torvalds committed
800
		spin_lock(&dentry->d_lock);
801
		if (!dentry->d_lockref.count) {
802
803
804
805
806
807
808
809
			/*
			 * inform the fs via d_prune that this dentry
			 * is about to be unhashed and destroyed.
			 */
			if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
			    !d_unhashed(dentry))
				dentry->d_op->d_prune(dentry);

810
			__dget_dlock(dentry);
Linus Torvalds's avatar
Linus Torvalds committed
811
812
			__d_drop(dentry);
			spin_unlock(&dentry->d_lock);
813
			spin_unlock(&inode->i_lock);
Linus Torvalds's avatar
Linus Torvalds committed
814
815
816
817
818
			dput(dentry);
			goto restart;
		}
		spin_unlock(&dentry->d_lock);
	}
819
	spin_unlock(&inode->i_lock);
Linus Torvalds's avatar
Linus Torvalds committed
820
}
821
EXPORT_SYMBOL(d_prune_aliases);
Linus Torvalds's avatar
Linus Torvalds committed
822

823
static void shrink_dentry_list(struct list_head *list)
Linus Torvalds's avatar
Linus Torvalds committed
824
{
Al Viro's avatar
Al Viro committed
825
	struct dentry *dentry, *parent;
826

827
	while (!list_empty(list)) {
828
		struct inode *inode;
829
		dentry = list_entry(list->prev, struct dentry, d_lru);
830
		spin_lock(&dentry->d_lock);
831
832
		parent = lock_parent(dentry);

833
834
835
836
837
		/*
		 * The dispose list is isolated and dentries are not accounted
		 * to the LRU here, so we can simply remove it from the list
		 * here regardless of whether it is referenced or not.
		 */
838
		d_shrink_del(dentry);
839

Linus Torvalds's avatar
Linus Torvalds committed
840
841
		/*
		 * We found an inuse dentry which was not removed from
842
		 * the LRU because of laziness during lookup. Do not free it.
Linus Torvalds's avatar
Linus Torvalds committed
843
		 */
844
		if ((int)dentry->d_lockref.count > 0) {
845
			spin_unlock(&dentry->d_lock);
846
847
			if (parent)
				spin_unlock(&parent->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
848
849
			continue;
		}
850

851
852
853
854

		if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
			bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
			spin_unlock(&dentry->d_lock);
855
856
			if (parent)
				spin_unlock(&parent->d_lock);
857
858
859
860
861
			if (can_free)
				dentry_free(dentry);
			continue;
		}

862
863
		inode = dentry->d_inode;
		if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
864
			d_shrink_add(dentry, list);
865
			spin_unlock(&dentry->d_lock);
866
867
			if (parent)
				spin_unlock(&parent->d_lock);
Al Viro's avatar
Al Viro committed
868
			continue;
869
		}
870
871

		__dentry_kill(dentry);
872

Al Viro's avatar
Al Viro committed
873
874
875
876
877
878
879
		/*
		 * We need to prune ancestors too. This is necessary to prevent
		 * quadratic behavior of shrink_dcache_parent(), but is also
		 * expected to be beneficial in reducing dentry cache
		 * fragmentation.
		 */
		dentry = parent;
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
		while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
			parent = lock_parent(dentry);
			if (dentry->d_lockref.count != 1) {
				dentry->d_lockref.count--;
				spin_unlock(&dentry->d_lock);
				if (parent)
					spin_unlock(&parent->d_lock);
				break;
			}
			inode = dentry->d_inode;	/* can't be NULL */
			if (unlikely(!spin_trylock(&inode->i_lock))) {
				spin_unlock(&dentry->d_lock);
				if (parent)
					spin_unlock(&parent->d_lock);
				cpu_relax();
				continue;
			}
			__dentry_kill(dentry);
			dentry = parent;
		}
900
	}
901
902
}

903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
static enum lru_status
dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
{
	struct list_head *freeable = arg;
	struct dentry	*dentry = container_of(item, struct dentry, d_lru);


	/*
	 * we are inverting the lru lock/dentry->d_lock here,
	 * so use a trylock. If we fail to get the lock, just skip
	 * it
	 */
	if (!spin_trylock(&dentry->d_lock))
		return LRU_SKIP;

	/*
	 * Referenced dentries are still in use. If they have active
	 * counts, just remove them from the LRU. Otherwise give them
	 * another pass through the LRU.
	 */
	if (dentry->d_lockref.count) {
924
		d_lru_isolate(dentry);
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
		spin_unlock(&dentry->d_lock);
		return LRU_REMOVED;
	}

	if (dentry->d_flags & DCACHE_REFERENCED) {
		dentry->d_flags &= ~DCACHE_REFERENCED;
		spin_unlock(&dentry->d_lock);

		/*
		 * The list move itself will be made by the common LRU code. At
		 * this point, we've dropped the dentry->d_lock but keep the
		 * lru lock. This is safe to do, since every list movement is
		 * protected by the lru lock even if both locks are held.
		 *
		 * This is guaranteed by the fact that all LRU management
		 * functions are intermediated by the LRU API calls like
		 * list_lru_add and list_lru_del. List movement in this file
		 * only ever occur through this functions or through callbacks
		 * like this one, that are called from the LRU API.
		 *
		 * The only exceptions to this are functions like
		 * shrink_dentry_list, and code that first checks for the
		 * DCACHE_SHRINK_LIST flag.  Those are guaranteed to be
		 * operating only with stack provided lists after they are
		 * properly isolated from the main list.  It is thus, always a
		 * local access.
		 */
		return LRU_ROTATE;
	}

955
	d_lru_shrink_move(dentry, freeable);
956
957
958
959
960
	spin_unlock(&dentry->d_lock);

	return LRU_REMOVED;
}

961
/**
962
963
 * prune_dcache_sb - shrink the dcache
 * @sb: superblock
964
 * @nr_to_scan : number of entries to try to free
965
 * @nid: which node to scan for freeable entities
966
 *
967
 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
968
969
 * done when we need more memory an called from the superblock shrinker
 * function.
970
 *
971
972
 * This function may fail to free any resources if all the dentries are in
 * use.
973
 */
974
975
long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
		     int nid)
976
{
977
978
	LIST_HEAD(dispose);
	long freed;
979

980
981
	freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate,
				       &dispose, &nr_to_scan);
982
	shrink_dentry_list(&dispose);
983
	return freed;
984
}
Nick Piggin's avatar
Nick Piggin committed
985

986
987
static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
						spinlock_t *lru_lock, void *arg)
988
{
989
990
	struct list_head *freeable = arg;
	struct dentry	*dentry = container_of(item, struct dentry, d_lru);
991

992
993
994
995
996
997
998
999
	/*
	 * we are inverting the lru lock/dentry->d_lock here,
	 * so use a trylock. If we fail to get the lock, just skip
	 * it
	 */
	if (!spin_trylock(&dentry->d_lock))
		return LRU_SKIP;

1000
	d_lru_shrink_move(dentry, freeable);