xfs_buf.c 40.7 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
2
 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3
 * All Rights Reserved.
Linus Torvalds's avatar
Linus Torvalds committed
4
 *
5
6
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
Linus Torvalds's avatar
Linus Torvalds committed
7
8
 * published by the Free Software Foundation.
 *
9
10
11
12
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
Linus Torvalds's avatar
Linus Torvalds committed
13
 *
14
15
16
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
Linus Torvalds's avatar
Linus Torvalds committed
17
 */
18
#include "xfs.h"
Linus Torvalds's avatar
Linus Torvalds committed
19
20
21
22
23
24
25
26
27
28
29
30
31
#include <linux/stddef.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/vmalloc.h>
#include <linux/bio.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/workqueue.h>
#include <linux/percpu.h>
#include <linux/blkdev.h>
#include <linux/hash.h>
32
#include <linux/kthread.h>
33
#include <linux/migrate.h>
34
#include <linux/backing-dev.h>
35
#include <linux/freezer.h>
Linus Torvalds's avatar
Linus Torvalds committed
36

37
38
39
40
41
#include "xfs_sb.h"
#include "xfs_inum.h"
#include "xfs_ag.h"
#include "xfs_dmapi.h"
#include "xfs_mount.h"
Christoph Hellwig's avatar
Christoph Hellwig committed
42
#include "xfs_trace.h"
43

44
static kmem_zone_t *xfs_buf_zone;
45
STATIC int xfsbufd(void *);
Al Viro's avatar
Al Viro committed
46
STATIC int xfsbufd_wakeup(int, gfp_t);
47
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
48
49
50
51
static struct shrinker xfs_buf_shake = {
	.shrink = xfsbufd_wakeup,
	.seeks = DEFAULT_SEEKS,
};
52

53
static struct workqueue_struct *xfslogd_workqueue;
54
struct workqueue_struct *xfsdatad_workqueue;
55
struct workqueue_struct *xfsconvertd_workqueue;
Linus Torvalds's avatar
Linus Torvalds committed
56

57
58
59
60
#ifdef XFS_BUF_LOCK_TRACKING
# define XB_SET_OWNER(bp)	((bp)->b_last_holder = current->pid)
# define XB_CLEAR_OWNER(bp)	((bp)->b_last_holder = -1)
# define XB_GET_OWNER(bp)	((bp)->b_last_holder)
Linus Torvalds's avatar
Linus Torvalds committed
61
#else
62
63
64
# define XB_SET_OWNER(bp)	do { } while (0)
# define XB_CLEAR_OWNER(bp)	do { } while (0)
# define XB_GET_OWNER(bp)	do { } while (0)
Linus Torvalds's avatar
Linus Torvalds committed
65
66
#endif

67
68
69
#define xb_to_gfp(flags) \
	((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
	  ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
Linus Torvalds's avatar
Linus Torvalds committed
70

71
72
#define xb_to_km(flags) \
	 (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
Linus Torvalds's avatar
Linus Torvalds committed
73

74
75
76
77
#define xfs_buf_allocate(flags) \
	kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
#define xfs_buf_deallocate(bp) \
	kmem_zone_free(xfs_buf_zone, (bp));
Linus Torvalds's avatar
Linus Torvalds committed
78
79

/*
80
 *	Page Region interfaces.
Linus Torvalds's avatar
Linus Torvalds committed
81
 *
82
83
84
 *	For pages in filesystems where the blocksize is smaller than the
 *	pagesize, we use the page->private field (long) to hold a bitmap
 * 	of uptodate regions within the page.
Linus Torvalds's avatar
Linus Torvalds committed
85
 *
86
 *	Each such region is "bytes per page / bits per long" bytes long.
Linus Torvalds's avatar
Linus Torvalds committed
87
 *
88
89
90
 *	NBPPR == number-of-bytes-per-page-region
 *	BTOPR == bytes-to-page-region (rounded up)
 *	BTOPRT == bytes-to-page-region-truncated (rounded down)
Linus Torvalds's avatar
Linus Torvalds committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
 */
#if (BITS_PER_LONG == 32)
#define PRSHIFT		(PAGE_CACHE_SHIFT - 5)	/* (32 == 1<<5) */
#elif (BITS_PER_LONG == 64)
#define PRSHIFT		(PAGE_CACHE_SHIFT - 6)	/* (64 == 1<<6) */
#else
#error BITS_PER_LONG must be 32 or 64
#endif
#define NBPPR		(PAGE_CACHE_SIZE/BITS_PER_LONG)
#define BTOPR(b)	(((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
#define BTOPRT(b)	(((unsigned int)(b) >> PRSHIFT))

STATIC unsigned long
page_region_mask(
	size_t		offset,
	size_t		length)
{
	unsigned long	mask;
	int		first, final;

	first = BTOPR(offset);
	final = BTOPRT(offset + length - 1);
	first = min(first, final);

	mask = ~0UL;
	mask <<= BITS_PER_LONG - (final - first);
	mask >>= BITS_PER_LONG - (final);

	ASSERT(offset + length <= PAGE_CACHE_SIZE);
	ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);

	return mask;
}

125
STATIC void
Linus Torvalds's avatar
Linus Torvalds committed
126
127
128
129
130
set_page_region(
	struct page	*page,
	size_t		offset,
	size_t		length)
{
131
132
133
	set_page_private(page,
		page_private(page) | page_region_mask(offset, length));
	if (page_private(page) == ~0UL)
Linus Torvalds's avatar
Linus Torvalds committed
134
135
136
		SetPageUptodate(page);
}

137
STATIC int
Linus Torvalds's avatar
Linus Torvalds committed
138
139
140
141
142
143
144
test_page_region(
	struct page	*page,
	size_t		offset,
	size_t		length)
{
	unsigned long	mask = page_region_mask(offset, length);

145
	return (mask && (page_private(page) & mask) == mask);
Linus Torvalds's avatar
Linus Torvalds committed
146
147
}

148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
/*
 *	Mapping of multi-page buffers into contiguous virtual space
 */

typedef struct a_list {
	void		*vm_addr;
	struct a_list	*next;
} a_list_t;

static a_list_t		*as_free_head;
static int		as_list_len;
static DEFINE_SPINLOCK(as_lock);

/*
 *	Try to batch vunmaps because they are costly.
 */
STATIC void
free_address(
	void		*addr)
{
	a_list_t	*aentry;

#ifdef CONFIG_XEN
	/*
	 * Xen needs to be able to make sure it can get an exclusive
	 * RO mapping of pages it wants to turn into a pagetable.  If
	 * a newly allocated page is also still being vmap()ed by xfs,
	 * it will cause pagetable construction to fail.  This is a
	 * quick workaround to always eagerly unmap pages so that Xen
	 * is happy.
	 */
	vunmap(addr);
	return;
#endif

	aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
	if (likely(aentry)) {
		spin_lock(&as_lock);
		aentry->next = as_free_head;
		aentry->vm_addr = addr;
		as_free_head = aentry;
		as_list_len++;
		spin_unlock(&as_lock);
	} else {
		vunmap(addr);
	}
}

STATIC void
purge_addresses(void)
{
	a_list_t	*aentry, *old;

	if (as_free_head == NULL)
		return;

	spin_lock(&as_lock);
	aentry = as_free_head;
	as_free_head = NULL;
	as_list_len = 0;
	spin_unlock(&as_lock);

	while ((old = aentry) != NULL) {
		vunmap(aentry->vm_addr);
		aentry = aentry->next;
		kfree(old);
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
217
/*
218
 *	Internal xfs_buf_t object manipulation
Linus Torvalds's avatar
Linus Torvalds committed
219
220
221
 */

STATIC void
222
223
_xfs_buf_initialize(
	xfs_buf_t		*bp,
Linus Torvalds's avatar
Linus Torvalds committed
224
	xfs_buftarg_t		*target,
225
	xfs_off_t		range_base,
Linus Torvalds's avatar
Linus Torvalds committed
226
	size_t			range_length,
227
	xfs_buf_flags_t		flags)
Linus Torvalds's avatar
Linus Torvalds committed
228
229
{
	/*
230
	 * We don't want certain flags to appear in b_flags.
Linus Torvalds's avatar
Linus Torvalds committed
231
	 */
232
233
234
235
	flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);

	memset(bp, 0, sizeof(xfs_buf_t));
	atomic_set(&bp->b_hold, 1);
236
	init_completion(&bp->b_iowait);
237
238
239
240
241
242
	INIT_LIST_HEAD(&bp->b_list);
	INIT_LIST_HEAD(&bp->b_hash_list);
	init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
	XB_SET_OWNER(bp);
	bp->b_target = target;
	bp->b_file_offset = range_base;
Linus Torvalds's avatar
Linus Torvalds committed
243
244
245
246
247
	/*
	 * Set buffer_length and count_desired to the same value initially.
	 * I/O routines should use count_desired, which will be the same in
	 * most cases but may be reset (e.g. XFS recovery).
	 */
248
249
250
251
252
253
254
	bp->b_buffer_length = bp->b_count_desired = range_length;
	bp->b_flags = flags;
	bp->b_bn = XFS_BUF_DADDR_NULL;
	atomic_set(&bp->b_pin_count, 0);
	init_waitqueue_head(&bp->b_waiters);

	XFS_STATS_INC(xb_create);
Christoph Hellwig's avatar
Christoph Hellwig committed
255
256

	trace_xfs_buf_init(bp, _RET_IP_);
Linus Torvalds's avatar
Linus Torvalds committed
257
258
259
}

/*
260
261
 *	Allocate a page array capable of holding a specified number
 *	of pages, and point the page buf at it.
Linus Torvalds's avatar
Linus Torvalds committed
262
263
 */
STATIC int
264
265
_xfs_buf_get_pages(
	xfs_buf_t		*bp,
Linus Torvalds's avatar
Linus Torvalds committed
266
	int			page_count,
267
	xfs_buf_flags_t		flags)
Linus Torvalds's avatar
Linus Torvalds committed
268
269
{
	/* Make sure that we have a page list */
270
271
272
273
274
	if (bp->b_pages == NULL) {
		bp->b_offset = xfs_buf_poff(bp->b_file_offset);
		bp->b_page_count = page_count;
		if (page_count <= XB_PAGES) {
			bp->b_pages = bp->b_page_array;
Linus Torvalds's avatar
Linus Torvalds committed
275
		} else {
276
277
278
			bp->b_pages = kmem_alloc(sizeof(struct page *) *
					page_count, xb_to_km(flags));
			if (bp->b_pages == NULL)
Linus Torvalds's avatar
Linus Torvalds committed
279
280
				return -ENOMEM;
		}
281
		memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
Linus Torvalds's avatar
Linus Torvalds committed
282
283
284
285
286
	}
	return 0;
}

/*
287
 *	Frees b_pages if it was allocated.
Linus Torvalds's avatar
Linus Torvalds committed
288
289
 */
STATIC void
290
_xfs_buf_free_pages(
Linus Torvalds's avatar
Linus Torvalds committed
291
292
	xfs_buf_t	*bp)
{
293
	if (bp->b_pages != bp->b_page_array) {
294
		kmem_free(bp->b_pages);
295
		bp->b_pages = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
296
297
298
299
300
301
302
	}
}

/*
 *	Releases the specified buffer.
 *
 * 	The modification state of any associated pages is left unchanged.
303
 * 	The buffer most not be on any hash - use xfs_buf_rele instead for
Linus Torvalds's avatar
Linus Torvalds committed
304
305
306
 * 	hashed and refcounted buffers
 */
void
307
xfs_buf_free(
Linus Torvalds's avatar
Linus Torvalds committed
308
309
	xfs_buf_t		*bp)
{
Christoph Hellwig's avatar
Christoph Hellwig committed
310
	trace_xfs_buf_free(bp, _RET_IP_);
Linus Torvalds's avatar
Linus Torvalds committed
311

312
	ASSERT(list_empty(&bp->b_hash_list));
Linus Torvalds's avatar
Linus Torvalds committed
313

314
	if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
Linus Torvalds's avatar
Linus Torvalds committed
315
316
		uint		i;

317
		if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
318
			free_address(bp->b_addr - bp->b_offset);
Linus Torvalds's avatar
Linus Torvalds committed
319

320
321
322
		for (i = 0; i < bp->b_page_count; i++) {
			struct page	*page = bp->b_pages[i];

323
324
			if (bp->b_flags & _XBF_PAGE_CACHE)
				ASSERT(!PagePrivate(page));
325
326
			page_cache_release(page);
		}
Linus Torvalds's avatar
Linus Torvalds committed
327
	}
328
	_xfs_buf_free_pages(bp);
329
	xfs_buf_deallocate(bp);
Linus Torvalds's avatar
Linus Torvalds committed
330
331
332
333
334
335
}

/*
 *	Finds all pages for buffer in question and builds it's page list.
 */
STATIC int
336
_xfs_buf_lookup_pages(
Linus Torvalds's avatar
Linus Torvalds committed
337
338
339
	xfs_buf_t		*bp,
	uint			flags)
{
340
341
342
	struct address_space	*mapping = bp->b_target->bt_mapping;
	size_t			blocksize = bp->b_target->bt_bsize;
	size_t			size = bp->b_count_desired;
Linus Torvalds's avatar
Linus Torvalds committed
343
	size_t			nbytes, offset;
344
	gfp_t			gfp_mask = xb_to_gfp(flags);
Linus Torvalds's avatar
Linus Torvalds committed
345
346
	unsigned short		page_count, i;
	pgoff_t			first;
347
	xfs_off_t		end;
Linus Torvalds's avatar
Linus Torvalds committed
348
349
	int			error;

350
351
	end = bp->b_file_offset + bp->b_buffer_length;
	page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
Linus Torvalds's avatar
Linus Torvalds committed
352

353
	error = _xfs_buf_get_pages(bp, page_count, flags);
Linus Torvalds's avatar
Linus Torvalds committed
354
355
	if (unlikely(error))
		return error;
356
	bp->b_flags |= _XBF_PAGE_CACHE;
Linus Torvalds's avatar
Linus Torvalds committed
357

358
359
	offset = bp->b_offset;
	first = bp->b_file_offset >> PAGE_CACHE_SHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
360

361
	for (i = 0; i < bp->b_page_count; i++) {
Linus Torvalds's avatar
Linus Torvalds committed
362
363
364
365
366
367
		struct page	*page;
		uint		retries = 0;

	      retry:
		page = find_or_create_page(mapping, first + i, gfp_mask);
		if (unlikely(page == NULL)) {
368
369
			if (flags & XBF_READ_AHEAD) {
				bp->b_page_count = i;
370
371
				for (i = 0; i < bp->b_page_count; i++)
					unlock_page(bp->b_pages[i]);
Linus Torvalds's avatar
Linus Torvalds committed
372
373
374
375
376
377
378
379
380
381
382
383
384
				return -ENOMEM;
			}

			/*
			 * This could deadlock.
			 *
			 * But until all the XFS lowlevel code is revamped to
			 * handle buffer allocation failures we can't do much.
			 */
			if (!(++retries % 100))
				printk(KERN_ERR
					"XFS: possible memory allocation "
					"deadlock in %s (mode:0x%x)\n",
385
					__func__, gfp_mask);
Linus Torvalds's avatar
Linus Torvalds committed
386

387
			XFS_STATS_INC(xb_page_retries);
388
			xfsbufd_wakeup(0, gfp_mask);
389
			congestion_wait(BLK_RW_ASYNC, HZ/50);
Linus Torvalds's avatar
Linus Torvalds committed
390
391
392
			goto retry;
		}

393
		XFS_STATS_INC(xb_page_found);
Linus Torvalds's avatar
Linus Torvalds committed
394
395
396
397

		nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
		size -= nbytes;

398
		ASSERT(!PagePrivate(page));
Linus Torvalds's avatar
Linus Torvalds committed
399
400
		if (!PageUptodate(page)) {
			page_count--;
401
402
403
404
			if (blocksize >= PAGE_CACHE_SIZE) {
				if (flags & XBF_READ)
					bp->b_flags |= _XBF_PAGE_LOCKED;
			} else if (!PagePrivate(page)) {
Linus Torvalds's avatar
Linus Torvalds committed
405
406
407
408
409
				if (test_page_region(page, offset, nbytes))
					page_count++;
			}
		}

410
		bp->b_pages[i] = page;
Linus Torvalds's avatar
Linus Torvalds committed
411
412
413
		offset = 0;
	}

414
415
416
417
418
	if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
		for (i = 0; i < bp->b_page_count; i++)
			unlock_page(bp->b_pages[i]);
	}

419
420
	if (page_count == bp->b_page_count)
		bp->b_flags |= XBF_DONE;
Linus Torvalds's avatar
Linus Torvalds committed
421
422
423
424
425
426
427
428

	return error;
}

/*
 *	Map buffer into kernel address-space if nessecary.
 */
STATIC int
429
_xfs_buf_map_pages(
Linus Torvalds's avatar
Linus Torvalds committed
430
431
432
433
	xfs_buf_t		*bp,
	uint			flags)
{
	/* A single page buffer is always mappable */
434
435
436
437
	if (bp->b_page_count == 1) {
		bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
		bp->b_flags |= XBF_MAPPED;
	} else if (flags & XBF_MAPPED) {
438
439
		if (as_list_len > 64)
			purge_addresses();
440
441
		bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
					VM_MAP, PAGE_KERNEL);
442
		if (unlikely(bp->b_addr == NULL))
Linus Torvalds's avatar
Linus Torvalds committed
443
			return -ENOMEM;
444
445
		bp->b_addr += bp->b_offset;
		bp->b_flags |= XBF_MAPPED;
Linus Torvalds's avatar
Linus Torvalds committed
446
447
448
449
450
451
452
453
454
455
	}

	return 0;
}

/*
 *	Finding and Reading Buffers
 */

/*
456
 *	Look up, and creates if absent, a lockable buffer for
Linus Torvalds's avatar
Linus Torvalds committed
457
458
459
460
461
462
463
 *	a given range of an inode.  The buffer is returned
 *	locked.	 If other overlapping buffers exist, they are
 *	released before the new buffer is created and locked,
 *	which may imply that this call will block until those buffers
 *	are unlocked.  No I/O is implied by this call.
 */
xfs_buf_t *
464
_xfs_buf_find(
Linus Torvalds's avatar
Linus Torvalds committed
465
	xfs_buftarg_t		*btp,	/* block device target		*/
466
	xfs_off_t		ioff,	/* starting offset of range	*/
Linus Torvalds's avatar
Linus Torvalds committed
467
	size_t			isize,	/* length of range		*/
468
469
	xfs_buf_flags_t		flags,
	xfs_buf_t		*new_bp)
Linus Torvalds's avatar
Linus Torvalds committed
470
{
471
	xfs_off_t		range_base;
Linus Torvalds's avatar
Linus Torvalds committed
472
473
	size_t			range_length;
	xfs_bufhash_t		*hash;
474
	xfs_buf_t		*bp, *n;
Linus Torvalds's avatar
Linus Torvalds committed
475
476
477
478
479

	range_base = (ioff << BBSHIFT);
	range_length = (isize << BBSHIFT);

	/* Check for IOs smaller than the sector size / not sector aligned */
480
	ASSERT(!(range_length < (1 << btp->bt_sshift)));
481
	ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
Linus Torvalds's avatar
Linus Torvalds committed
482
483
484
485
486

	hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];

	spin_lock(&hash->bh_lock);

487
488
489
490
	list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
		ASSERT(btp == bp->b_target);
		if (bp->b_file_offset == range_base &&
		    bp->b_buffer_length == range_length) {
Linus Torvalds's avatar
Linus Torvalds committed
491
			/*
492
			 * If we look at something, bring it to the
Linus Torvalds's avatar
Linus Torvalds committed
493
494
			 * front of the list for next time.
			 */
495
496
			atomic_inc(&bp->b_hold);
			list_move(&bp->b_hash_list, &hash->bh_list);
Linus Torvalds's avatar
Linus Torvalds committed
497
498
499
500
501
			goto found;
		}
	}

	/* No match found */
502
503
	if (new_bp) {
		_xfs_buf_initialize(new_bp, btp, range_base,
Linus Torvalds's avatar
Linus Torvalds committed
504
				range_length, flags);
505
506
		new_bp->b_hash = hash;
		list_add(&new_bp->b_hash_list, &hash->bh_list);
Linus Torvalds's avatar
Linus Torvalds committed
507
	} else {
508
		XFS_STATS_INC(xb_miss_locked);
Linus Torvalds's avatar
Linus Torvalds committed
509
510
511
	}

	spin_unlock(&hash->bh_lock);
512
	return new_bp;
Linus Torvalds's avatar
Linus Torvalds committed
513
514
515
516
517
518
519
520

found:
	spin_unlock(&hash->bh_lock);

	/* Attempt to get the semaphore without sleeping,
	 * if this does not work then we need to drop the
	 * spinlock and do a hard attempt on the semaphore.
	 */
521
522
	if (down_trylock(&bp->b_sema)) {
		if (!(flags & XBF_TRYLOCK)) {
Linus Torvalds's avatar
Linus Torvalds committed
523
			/* wait for buffer ownership */
524
525
			xfs_buf_lock(bp);
			XFS_STATS_INC(xb_get_locked_waited);
Linus Torvalds's avatar
Linus Torvalds committed
526
527
528
		} else {
			/* We asked for a trylock and failed, no need
			 * to look at file offset and length here, we
529
530
531
			 * know that this buffer at least overlaps our
			 * buffer and is locked, therefore our buffer
			 * either does not exist, or is this buffer.
Linus Torvalds's avatar
Linus Torvalds committed
532
			 */
533
534
535
			xfs_buf_rele(bp);
			XFS_STATS_INC(xb_busy_locked);
			return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
536
537
538
		}
	} else {
		/* trylock worked */
539
		XB_SET_OWNER(bp);
Linus Torvalds's avatar
Linus Torvalds committed
540
541
	}

542
543
544
	if (bp->b_flags & XBF_STALE) {
		ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
		bp->b_flags &= XBF_MAPPED;
545
	}
Christoph Hellwig's avatar
Christoph Hellwig committed
546
547

	trace_xfs_buf_find(bp, flags, _RET_IP_);
548
549
	XFS_STATS_INC(xb_get_locked);
	return bp;
Linus Torvalds's avatar
Linus Torvalds committed
550
551
552
}

/*
553
 *	Assembles a buffer covering the specified range.
Linus Torvalds's avatar
Linus Torvalds committed
554
555
556
557
 *	Storage in memory for all portions of the buffer will be allocated,
 *	although backing storage may not be.
 */
xfs_buf_t *
558
xfs_buf_get(
Linus Torvalds's avatar
Linus Torvalds committed
559
	xfs_buftarg_t		*target,/* target for buffer		*/
560
	xfs_off_t		ioff,	/* starting offset of range	*/
Linus Torvalds's avatar
Linus Torvalds committed
561
	size_t			isize,	/* length of range		*/
562
	xfs_buf_flags_t		flags)
Linus Torvalds's avatar
Linus Torvalds committed
563
{
564
	xfs_buf_t		*bp, *new_bp;
Linus Torvalds's avatar
Linus Torvalds committed
565
566
	int			error = 0, i;

567
568
	new_bp = xfs_buf_allocate(flags);
	if (unlikely(!new_bp))
Linus Torvalds's avatar
Linus Torvalds committed
569
570
		return NULL;

571
572
573
	bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
	if (bp == new_bp) {
		error = _xfs_buf_lookup_pages(bp, flags);
Linus Torvalds's avatar
Linus Torvalds committed
574
575
576
		if (error)
			goto no_buffer;
	} else {
577
578
		xfs_buf_deallocate(new_bp);
		if (unlikely(bp == NULL))
Linus Torvalds's avatar
Linus Torvalds committed
579
580
581
			return NULL;
	}

582
583
	for (i = 0; i < bp->b_page_count; i++)
		mark_page_accessed(bp->b_pages[i]);
Linus Torvalds's avatar
Linus Torvalds committed
584

585
586
	if (!(bp->b_flags & XBF_MAPPED)) {
		error = _xfs_buf_map_pages(bp, flags);
Linus Torvalds's avatar
Linus Torvalds committed
587
588
		if (unlikely(error)) {
			printk(KERN_WARNING "%s: failed to map pages\n",
589
					__func__);
Linus Torvalds's avatar
Linus Torvalds committed
590
591
592
593
			goto no_buffer;
		}
	}

594
	XFS_STATS_INC(xb_get);
Linus Torvalds's avatar
Linus Torvalds committed
595
596
597
598
599

	/*
	 * Always fill in the block number now, the mapped cases can do
	 * their own overlay of this later.
	 */
600
601
	bp->b_bn = ioff;
	bp->b_count_desired = bp->b_buffer_length;
Linus Torvalds's avatar
Linus Torvalds committed
602

Christoph Hellwig's avatar
Christoph Hellwig committed
603
	trace_xfs_buf_get(bp, flags, _RET_IP_);
604
	return bp;
Linus Torvalds's avatar
Linus Torvalds committed
605
606

 no_buffer:
607
608
609
	if (flags & (XBF_LOCK | XBF_TRYLOCK))
		xfs_buf_unlock(bp);
	xfs_buf_rele(bp);
Linus Torvalds's avatar
Linus Torvalds committed
610
611
612
	return NULL;
}

Christoph Hellwig's avatar
Christoph Hellwig committed
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
STATIC int
_xfs_buf_read(
	xfs_buf_t		*bp,
	xfs_buf_flags_t		flags)
{
	int			status;

	ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
	ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);

	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
			XBF_READ_AHEAD | _XBF_RUN_QUEUES);
	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
			XBF_READ_AHEAD | _XBF_RUN_QUEUES);

	status = xfs_buf_iorequest(bp);
	if (!status && !(flags & XBF_ASYNC))
		status = xfs_buf_iowait(bp);
	return status;
}

Linus Torvalds's avatar
Linus Torvalds committed
634
xfs_buf_t *
635
xfs_buf_read(
Linus Torvalds's avatar
Linus Torvalds committed
636
	xfs_buftarg_t		*target,
637
	xfs_off_t		ioff,
Linus Torvalds's avatar
Linus Torvalds committed
638
	size_t			isize,
639
	xfs_buf_flags_t		flags)
Linus Torvalds's avatar
Linus Torvalds committed
640
{
641
642
643
644
	xfs_buf_t		*bp;

	flags |= XBF_READ;

645
	bp = xfs_buf_get(target, ioff, isize, flags);
646
	if (bp) {
Christoph Hellwig's avatar
Christoph Hellwig committed
647
648
		trace_xfs_buf_read(bp, flags, _RET_IP_);

649
650
		if (!XFS_BUF_ISDONE(bp)) {
			XFS_STATS_INC(xb_get_read);
Christoph Hellwig's avatar
Christoph Hellwig committed
651
			_xfs_buf_read(bp, flags);
652
		} else if (flags & XBF_ASYNC) {
Linus Torvalds's avatar
Linus Torvalds committed
653
654
655
656
657
658
659
			/*
			 * Read ahead call which is already satisfied,
			 * drop the buffer
			 */
			goto no_buffer;
		} else {
			/* We do not want read in the flags */
660
			bp->b_flags &= ~XBF_READ;
Linus Torvalds's avatar
Linus Torvalds committed
661
662
663
		}
	}

664
	return bp;
Linus Torvalds's avatar
Linus Torvalds committed
665
666

 no_buffer:
667
668
669
	if (flags & (XBF_LOCK | XBF_TRYLOCK))
		xfs_buf_unlock(bp);
	xfs_buf_rele(bp);
Linus Torvalds's avatar
Linus Torvalds committed
670
671
672
673
	return NULL;
}

/*
674
675
 *	If we are not low on memory then do the readahead in a deadlock
 *	safe manner.
Linus Torvalds's avatar
Linus Torvalds committed
676
677
 */
void
678
xfs_buf_readahead(
Linus Torvalds's avatar
Linus Torvalds committed
679
	xfs_buftarg_t		*target,
680
	xfs_off_t		ioff,
Linus Torvalds's avatar
Linus Torvalds committed
681
	size_t			isize,
682
	xfs_buf_flags_t		flags)
Linus Torvalds's avatar
Linus Torvalds committed
683
684
685
{
	struct backing_dev_info *bdi;

686
	bdi = target->bt_mapping->backing_dev_info;
Linus Torvalds's avatar
Linus Torvalds committed
687
688
689
	if (bdi_read_congested(bdi))
		return;

690
	flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
691
	xfs_buf_read(target, ioff, isize, flags);
Linus Torvalds's avatar
Linus Torvalds committed
692
693
694
}

xfs_buf_t *
695
xfs_buf_get_empty(
Linus Torvalds's avatar
Linus Torvalds committed
696
697
698
	size_t			len,
	xfs_buftarg_t		*target)
{
699
	xfs_buf_t		*bp;
Linus Torvalds's avatar
Linus Torvalds committed
700

701
702
703
704
	bp = xfs_buf_allocate(0);
	if (bp)
		_xfs_buf_initialize(bp, target, 0, len, 0);
	return bp;
Linus Torvalds's avatar
Linus Torvalds committed
705
706
707
708
709
710
}

static inline struct page *
mem_to_page(
	void			*addr)
{
711
	if ((!is_vmalloc_addr(addr))) {
Linus Torvalds's avatar
Linus Torvalds committed
712
713
714
715
716
717
718
		return virt_to_page(addr);
	} else {
		return vmalloc_to_page(addr);
	}
}

int
719
720
xfs_buf_associate_memory(
	xfs_buf_t		*bp,
Linus Torvalds's avatar
Linus Torvalds committed
721
722
723
724
725
	void			*mem,
	size_t			len)
{
	int			rval;
	int			i = 0;
726
727
728
	unsigned long		pageaddr;
	unsigned long		offset;
	size_t			buflen;
Linus Torvalds's avatar
Linus Torvalds committed
729
730
	int			page_count;

731
732
733
734
	pageaddr = (unsigned long)mem & PAGE_CACHE_MASK;
	offset = (unsigned long)mem - pageaddr;
	buflen = PAGE_CACHE_ALIGN(len + offset);
	page_count = buflen >> PAGE_CACHE_SHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
735
736

	/* Free any previous set of page pointers */
737
738
	if (bp->b_pages)
		_xfs_buf_free_pages(bp);
Linus Torvalds's avatar
Linus Torvalds committed
739

740
741
	bp->b_pages = NULL;
	bp->b_addr = mem;
Linus Torvalds's avatar
Linus Torvalds committed
742

743
	rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
Linus Torvalds's avatar
Linus Torvalds committed
744
745
746
	if (rval)
		return rval;

747
	bp->b_offset = offset;
748
749
750
751

	for (i = 0; i < bp->b_page_count; i++) {
		bp->b_pages[i] = mem_to_page((void *)pageaddr);
		pageaddr += PAGE_CACHE_SIZE;
Linus Torvalds's avatar
Linus Torvalds committed
752
753
	}

754
755
	bp->b_count_desired = len;
	bp->b_buffer_length = buflen;
756
	bp->b_flags |= XBF_MAPPED;
757
	bp->b_flags &= ~_XBF_PAGE_LOCKED;
Linus Torvalds's avatar
Linus Torvalds committed
758
759
760
761
762

	return 0;
}

xfs_buf_t *
763
xfs_buf_get_noaddr(
Linus Torvalds's avatar
Linus Torvalds committed
764
765
766
	size_t			len,
	xfs_buftarg_t		*target)
{
767
768
	unsigned long		page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
	int			error, i;
Linus Torvalds's avatar
Linus Torvalds committed
769
770
	xfs_buf_t		*bp;

771
	bp = xfs_buf_allocate(0);
Linus Torvalds's avatar
Linus Torvalds committed
772
773
	if (unlikely(bp == NULL))
		goto fail;
774
	_xfs_buf_initialize(bp, target, 0, len, 0);
Linus Torvalds's avatar
Linus Torvalds committed
775

776
777
	error = _xfs_buf_get_pages(bp, page_count, 0);
	if (error)
Linus Torvalds's avatar
Linus Torvalds committed
778
779
		goto fail_free_buf;

780
781
782
783
	for (i = 0; i < page_count; i++) {
		bp->b_pages[i] = alloc_page(GFP_KERNEL);
		if (!bp->b_pages[i])
			goto fail_free_mem;
Linus Torvalds's avatar
Linus Torvalds committed
784
	}
785
	bp->b_flags |= _XBF_PAGES;
Linus Torvalds's avatar
Linus Torvalds committed
786

787
788
789
	error = _xfs_buf_map_pages(bp, XBF_MAPPED);
	if (unlikely(error)) {
		printk(KERN_WARNING "%s: failed to map pages\n",
790
				__func__);
Linus Torvalds's avatar
Linus Torvalds committed
791
		goto fail_free_mem;
792
	}
Linus Torvalds's avatar
Linus Torvalds committed
793

794
	xfs_buf_unlock(bp);
Linus Torvalds's avatar
Linus Torvalds committed
795

Christoph Hellwig's avatar
Christoph Hellwig committed
796
	trace_xfs_buf_get_noaddr(bp, _RET_IP_);
Linus Torvalds's avatar
Linus Torvalds committed
797
	return bp;
798

Linus Torvalds's avatar
Linus Torvalds committed
799
 fail_free_mem:
800
801
	while (--i >= 0)
		__free_page(bp->b_pages[i]);
802
	_xfs_buf_free_pages(bp);
Linus Torvalds's avatar
Linus Torvalds committed
803
 fail_free_buf:
804
	xfs_buf_deallocate(bp);
Linus Torvalds's avatar
Linus Torvalds committed
805
806
807
808
809
810
811
812
813
814
 fail:
	return NULL;
}

/*
 *	Increment reference count on buffer, to hold the buffer concurrently
 *	with another thread which may release (free) the buffer asynchronously.
 *	Must hold the buffer already to call this function.
 */
void
815
816
xfs_buf_hold(
	xfs_buf_t		*bp)
Linus Torvalds's avatar
Linus Torvalds committed
817
{
Christoph Hellwig's avatar
Christoph Hellwig committed
818
	trace_xfs_buf_hold(bp, _RET_IP_);
819
	atomic_inc(&bp->b_hold);
Linus Torvalds's avatar
Linus Torvalds committed
820
821
822
}

/*
823
824
 *	Releases a hold on the specified buffer.  If the
 *	the hold count is 1, calls xfs_buf_free.
Linus Torvalds's avatar
Linus Torvalds committed
825
826
 */
void
827
828
xfs_buf_rele(
	xfs_buf_t		*bp)
Linus Torvalds's avatar
Linus Torvalds committed
829
{
830
	xfs_bufhash_t		*hash = bp->b_hash;
Linus Torvalds's avatar
Linus Torvalds committed
831

Christoph Hellwig's avatar
Christoph Hellwig committed
832
	trace_xfs_buf_rele(bp, _RET_IP_);
Linus Torvalds's avatar
Linus Torvalds committed
833

834
835
836
837
838
839
840
	if (unlikely(!hash)) {
		ASSERT(!bp->b_relse);
		if (atomic_dec_and_test(&bp->b_hold))
			xfs_buf_free(bp);
		return;
	}

841
	ASSERT(atomic_read(&bp->b_hold) > 0);
842
843
844
	if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
		if (bp->b_relse) {
			atomic_inc(&bp->b_hold);
Linus Torvalds's avatar
Linus Torvalds committed
845
			spin_unlock(&hash->bh_lock);
846
847
			(*(bp->b_relse)) (bp);
		} else if (bp->b_flags & XBF_FS_MANAGED) {
Linus Torvalds's avatar
Linus Torvalds committed
848
849
			spin_unlock(&hash->bh_lock);
		} else {
850
851
			ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
			list_del_init(&bp->b_hash_list);
Linus Torvalds's avatar
Linus Torvalds committed
852
			spin_unlock(&hash->bh_lock);
853
			xfs_buf_free(bp);
Linus Torvalds's avatar
Linus Torvalds committed
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
		}
	}
}


/*
 *	Mutual exclusion on buffers.  Locking model:
 *
 *	Buffers associated with inodes for which buffer locking
 *	is not enabled are not protected by semaphores, and are
 *	assumed to be exclusively owned by the caller.  There is a
 *	spinlock in the buffer, used by the caller when concurrent
 *	access is possible.
 */

/*
870
871
872
873
 *	Locks a buffer object, if it is not already locked.
 *	Note that this in no way locks the underlying pages, so it is only
 *	useful for synchronizing concurrent use of buffer objects, not for
 *	synchronizing independent access to the underlying pages.
Linus Torvalds's avatar
Linus Torvalds committed
874
875
 */
int
876
877
xfs_buf_cond_lock(
	xfs_buf_t		*bp)
Linus Torvalds's avatar
Linus Torvalds committed
878
879
880
{
	int			locked;

881
	locked = down_trylock(&bp->b_sema) == 0;
Christoph Hellwig's avatar
Christoph Hellwig committed
882
	if (locked)
883
		XB_SET_OWNER(bp);
Christoph Hellwig's avatar
Christoph Hellwig committed
884
885

	trace_xfs_buf_cond_lock(bp, _RET_IP_);
886
	return locked ? 0 : -EBUSY;
Linus Torvalds's avatar
Linus Torvalds committed
887
888
889
}

int
890
891
xfs_buf_lock_value(
	xfs_buf_t		*bp)
Linus Torvalds's avatar
Linus Torvalds committed
892
{
893
	return bp->b_sema.count;
Linus Torvalds's avatar
Linus Torvalds committed
894
895
896
}

/*
897
898
899
900
 *	Locks a buffer object.
 *	Note that this in no way locks the underlying pages, so it is only
 *	useful for synchronizing concurrent use of buffer objects, not for
 *	synchronizing independent access to the underlying pages.
Linus Torvalds's avatar
Linus Torvalds committed
901
 */
902
903
904
void
xfs_buf_lock(
	xfs_buf_t		*bp)
Linus Torvalds's avatar
Linus Torvalds committed
905
{
Christoph Hellwig's avatar
Christoph Hellwig committed
906
907
	trace_xfs_buf_lock(bp, _RET_IP_);

908
909
910
911
	if (atomic_read(&bp->b_io_remaining))
		blk_run_address_space(bp->b_target->bt_mapping);
	down(&bp->b_sema);
	XB_SET_OWNER(bp);
Christoph Hellwig's avatar
Christoph Hellwig committed
912
913

	trace_xfs_buf_lock_done(bp, _RET_IP_);
Linus Torvalds's avatar
Linus Torvalds committed
914
915
916
}

/*
917
 *	Releases the lock on the buffer object.
918
 *	If the buffer is marked delwri but is not queued, do so before we
919
 *	unlock the buffer as we need to set flags correctly.  We also need to
920
921
 *	take a reference for the delwri queue because the unlocker is going to
 *	drop their's and they don't know we just queued it.
Linus Torvalds's avatar
Linus Torvalds committed
922
923
 */
void
924
925
xfs_buf_unlock(
	xfs_buf_t		*bp)
Linus Torvalds's avatar
Linus Torvalds committed
926
{
927
928
929
930
	if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
		atomic_inc(&bp->b_hold);
		bp->b_flags |= XBF_ASYNC;
		xfs_buf_delwri_queue(bp, 0);
931
932
	}

933
934
	XB_CLEAR_OWNER(bp);
	up(&bp->b_sema);
Christoph Hellwig's avatar
Christoph Hellwig committed
935
936

	trace_xfs_buf_unlock(bp, _RET_IP_);
Linus Torvalds's avatar
Linus Torvalds committed
937
938
939
940
941
}


/*
 *	Pinning Buffer Storage in Memory
942
 *	Ensure that no attempt to force a buffer to disk will succeed.
Linus Torvalds's avatar
Linus Torvalds committed
943
944
 */
void
945
946
xfs_buf_pin(
	xfs_buf_t		*bp)
Linus Torvalds's avatar
Linus Torvalds committed
947
{
Christoph Hellwig's avatar
Christoph Hellwig committed
948
	trace_xfs_buf_pin(bp, _RET_IP_);
949
	atomic_inc(&bp->b_pin_count);
Linus Torvalds's avatar
Linus Torvalds committed
950
951
952
}

void
953
954
xfs_buf_unpin(
	xfs_buf_t		*bp)
Linus Torvalds's avatar
Linus Torvalds committed
955
{
Christoph Hellwig's avatar
Christoph Hellwig committed
956
957
	trace_xfs_buf_unpin(bp, _RET_IP_);

958
959
	if (atomic_dec_and_test(&bp->b_pin_count))
		wake_up_all(&bp->b_waiters);
Linus Torvalds's avatar
Linus Torvalds committed
960
961
962
}

int
963
964
xfs_buf_ispin(
	xfs_buf_t		*bp)
Linus Torvalds's avatar
Linus Torvalds committed
965
{
966
	return atomic_read(&bp->b_pin_count);
Linus Torvalds's avatar
Linus Torvalds committed
967
968
}

969
970
971
STATIC void
xfs_buf_wait_unpin(
	xfs_buf_t		*bp)
Linus Torvalds's avatar
Linus Torvalds committed
972
973
974
{
	DECLARE_WAITQUEUE	(wait, current);

975
	if (atomic_read(&bp->b_pin_count) == 0)
Linus Torvalds's avatar
Linus Torvalds committed
976
977
		return;

978
	add_wait_queue(&bp->b_waiters, &wait);
Linus Torvalds's avatar
Linus Torvalds committed
979
980
	for (;;) {
		set_current_state(TASK_UNINTERRUPTIBLE);
981
		if (atomic_read(&bp->b_pin_count) == 0)
Linus Torvalds's avatar
Linus Torvalds committed
982
			break;
983
984
		if (atomic_read(&bp->b_io_remaining))
			blk_run_address_space(bp->b_target->bt_mapping);
Linus Torvalds's avatar
Linus Torvalds committed
985
986
		schedule();
	}
987
	remove_wait_queue(&bp->b_waiters, &wait);
Linus Torvalds's avatar
Linus Torvalds committed
988
989
990
991
992
993
994
995
	set_current_state(TASK_RUNNING);
}

/*
 *	Buffer Utility Routines
 */

STATIC void
996
xfs_buf_iodone_work(
David Howells's avatar
David Howells committed
997
	struct work_struct	*work)
Linus Torvalds's avatar
Linus Torvalds committed
998
{
David Howells's avatar
David Howells committed
999
1000
	xfs_buf_t		*bp =
		container_of(work, xfs_buf_t, b_iodone_work);
For faster browsing, not all history is shown. View entire blame