gfp.h 13.6 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
#ifndef __LINUX_GFP_H
#define __LINUX_GFP_H

#include <linux/mmzone.h>
#include <linux/stddef.h>
#include <linux/linkage.h>
7
#include <linux/topology.h>
8
#include <linux/mmdebug.h>
Linus Torvalds's avatar
Linus Torvalds committed
9
10
11

struct vm_area_struct;

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
/* Plain integer GFP bitmasks. Do not use this directly. */
#define ___GFP_DMA		0x01u
#define ___GFP_HIGHMEM		0x02u
#define ___GFP_DMA32		0x04u
#define ___GFP_MOVABLE		0x08u
#define ___GFP_WAIT		0x10u
#define ___GFP_HIGH		0x20u
#define ___GFP_IO		0x40u
#define ___GFP_FS		0x80u
#define ___GFP_COLD		0x100u
#define ___GFP_NOWARN		0x200u
#define ___GFP_REPEAT		0x400u
#define ___GFP_NOFAIL		0x800u
#define ___GFP_NORETRY		0x1000u
#define ___GFP_COMP		0x4000u
#define ___GFP_ZERO		0x8000u
#define ___GFP_NOMEMALLOC	0x10000u
#define ___GFP_HARDWALL		0x20000u
#define ___GFP_THISNODE		0x40000u
#define ___GFP_RECLAIMABLE	0x80000u
#ifdef CONFIG_KMEMCHECK
#define ___GFP_NOTRACK		0x200000u
#else
#define ___GFP_NOTRACK		0
#endif
Andrea Arcangeli's avatar
Andrea Arcangeli committed
37
#define ___GFP_NO_KSWAPD	0x400000u
Andi Kleen's avatar
Andi Kleen committed
38
#define ___GFP_OTHER_NODE	0x800000u
39

Linus Torvalds's avatar
Linus Torvalds committed
40
41
/*
 * GFP bitmasks..
42
43
44
45
 *
 * Zone modifiers (see linux/mmzone.h - low three bits)
 *
 * Do not put any conditional on these. If necessary modify the definitions
46
 * without the underscores and use them consistently. The definitions here may
47
 * be used in bit comparisons.
Linus Torvalds's avatar
Linus Torvalds committed
48
 */
49
50
51
52
#define __GFP_DMA	((__force gfp_t)___GFP_DMA)
#define __GFP_HIGHMEM	((__force gfp_t)___GFP_HIGHMEM)
#define __GFP_DMA32	((__force gfp_t)___GFP_DMA32)
#define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* Page is movable */
53
#define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
Linus Torvalds's avatar
Linus Torvalds committed
54
55
56
57
58
59
60
/*
 * Action modifiers - doesn't change the zoning
 *
 * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
 * _might_ fail.  This depends upon the particular VM implementation.
 *
 * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
61
62
 * cannot handle allocation failures.  This modifier is deprecated and no new
 * users should be added.
Linus Torvalds's avatar
Linus Torvalds committed
63
64
 *
 * __GFP_NORETRY: The VM implementation must not retry indefinitely.
65
66
67
 *
 * __GFP_MOVABLE: Flag that this page will be movable by the page migration
 * mechanism or reclaimed
Linus Torvalds's avatar
Linus Torvalds committed
68
 */
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#define __GFP_WAIT	((__force gfp_t)___GFP_WAIT)	/* Can wait and reschedule? */
#define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)	/* Should access emergency pools? */
#define __GFP_IO	((__force gfp_t)___GFP_IO)	/* Can start physical IO? */
#define __GFP_FS	((__force gfp_t)___GFP_FS)	/* Can call down to low-level FS? */
#define __GFP_COLD	((__force gfp_t)___GFP_COLD)	/* Cache-cold page required */
#define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)	/* Suppress page allocation failure warning */
#define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)	/* See above */
#define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)	/* See above */
#define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY) /* See above */
#define __GFP_COMP	((__force gfp_t)___GFP_COMP)	/* Add compound page metadata */
#define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)	/* Return zeroed page on success */
#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves */
#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
#define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
#define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
Linus Torvalds's avatar
Linus Torvalds committed
85

Andrea Arcangeli's avatar
Andrea Arcangeli committed
86
#define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
Andi Kleen's avatar
Andi Kleen committed
87
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
Andrea Arcangeli's avatar
Andrea Arcangeli committed
88

Vegard Nossum's avatar
Vegard Nossum committed
89
90
91
92
93
94
/*
 * This may seem redundant, but it's a way of annotating false positives vs.
 * allocations that simply cannot be supported (e.g. page tables).
 */
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)

95
#define __GFP_BITS_SHIFT 24	/* Room for N __GFP_FOO bits */
Al Viro's avatar
Al Viro committed
96
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
Linus Torvalds's avatar
Linus Torvalds committed
97

Jeff Dike's avatar
Jeff Dike committed
98
99
/* This equals 0, but use constants in case they ever change */
#define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
Paul Jackson's avatar
Paul Jackson committed
100
/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
Linus Torvalds's avatar
Linus Torvalds committed
101
102
103
104
#define GFP_ATOMIC	(__GFP_HIGH)
#define GFP_NOIO	(__GFP_WAIT)
#define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
#define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
105
106
#define GFP_TEMPORARY	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
			 __GFP_RECLAIMABLE)
107
108
109
#define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
#define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
			 __GFP_HIGHMEM)
110
111
112
#define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
				 __GFP_HARDWALL | __GFP_HIGHMEM | \
				 __GFP_MOVABLE)
113
#define GFP_IOFS	(__GFP_IO | __GFP_FS)
114
115
116
#define GFP_TRANSHUGE	(GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
			 __GFP_NO_KSWAPD)
Linus Torvalds's avatar
Linus Torvalds committed
117

118
#ifdef CONFIG_NUMA
119
#define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
120
#else
121
#define GFP_THISNODE	((__force gfp_t)0)
122
123
#endif

Christoph Lameter's avatar
Christoph Lameter committed
124
/* This mask makes up all the page movable related flags */
125
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
Christoph Lameter's avatar
Christoph Lameter committed
126
127
128
129
130
131

/* Control page allocator reclaim behavior */
#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
			__GFP_NORETRY|__GFP_NOMEMALLOC)

132
/* Control slab gfp mask during early boot */
133
#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS))
134

Christoph Lameter's avatar
Christoph Lameter committed
135
136
137
138
139
/* Control allocation constraints */
#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)

/* Do not use these with a slab allocator */
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
140

Linus Torvalds's avatar
Linus Torvalds committed
141
142
143
144
145
/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
   platforms, used as appropriate on others */

#define GFP_DMA		__GFP_DMA

146
147
148
/* 4GB DMA on some platforms */
#define GFP_DMA32	__GFP_DMA32

149
150
151
152
153
154
155
156
157
158
159
160
/* Convert GFP flags to their corresponding migrate type */
static inline int allocflags_to_migratetype(gfp_t gfp_flags)
{
	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);

	if (unlikely(page_group_by_mobility_disabled))
		return MIGRATE_UNMOVABLE;

	/* Group based on mobility */
	return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
		((gfp_flags & __GFP_RECLAIMABLE) != 0);
}
161

162
163
164
165
166
167
#ifdef CONFIG_HIGHMEM
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
#else
#define OPT_ZONE_HIGHMEM ZONE_NORMAL
#endif

168
#ifdef CONFIG_ZONE_DMA
169
170
171
#define OPT_ZONE_DMA ZONE_DMA
#else
#define OPT_ZONE_DMA ZONE_NORMAL
172
#endif
173

174
#ifdef CONFIG_ZONE_DMA32
175
176
177
#define OPT_ZONE_DMA32 ZONE_DMA32
#else
#define OPT_ZONE_DMA32 ZONE_NORMAL
178
#endif
179
180
181
182
183

/*
 * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
 * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long
 * and there are 16 of them to cover all possible combinations of
184
 * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM.
185
186
187
188
 *
 * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA.
 * But GFP_MOVABLE is not only a zone specifier but also an allocation
 * policy. Therefore __GFP_MOVABLE plus another zone selector is valid.
189
 * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1".
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
 *
 *       bit       result
 *       =================
 *       0x0    => NORMAL
 *       0x1    => DMA or NORMAL
 *       0x2    => HIGHMEM or NORMAL
 *       0x3    => BAD (DMA+HIGHMEM)
 *       0x4    => DMA32 or DMA or NORMAL
 *       0x5    => BAD (DMA+DMA32)
 *       0x6    => BAD (HIGHMEM+DMA32)
 *       0x7    => BAD (HIGHMEM+DMA32+DMA)
 *       0x8    => NORMAL (MOVABLE+0)
 *       0x9    => DMA or NORMAL (MOVABLE+DMA)
 *       0xa    => MOVABLE (Movable is valid only if HIGHMEM is set too)
 *       0xb    => BAD (MOVABLE+HIGHMEM+DMA)
 *       0xc    => DMA32 (MOVABLE+HIGHMEM+DMA32)
 *       0xd    => BAD (MOVABLE+DMA32+DMA)
 *       0xe    => BAD (MOVABLE+DMA32+HIGHMEM)
 *       0xf    => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
 *
 * ZONES_SHIFT must be <= 2 on 32 bit platforms.
 */

#if 16 * ZONES_SHIFT > BITS_PER_LONG
#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
#endif

#define GFP_ZONE_TABLE ( \
218
219
220
221
222
223
224
225
	(ZONE_NORMAL << 0 * ZONES_SHIFT)				      \
	| (OPT_ZONE_DMA << ___GFP_DMA * ZONES_SHIFT)			      \
	| (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * ZONES_SHIFT)		      \
	| (OPT_ZONE_DMA32 << ___GFP_DMA32 * ZONES_SHIFT)		      \
	| (ZONE_NORMAL << ___GFP_MOVABLE * ZONES_SHIFT)			      \
	| (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * ZONES_SHIFT)	      \
	| (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * ZONES_SHIFT)   \
	| (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * ZONES_SHIFT)   \
226
227
228
)

/*
229
 * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32
230
231
232
233
234
 * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per
 * entry starting with bit 0. Bit is set if the combination is not
 * allowed.
 */
#define GFP_ZONE_BAD ( \
235
236
237
238
239
240
241
242
	1 << (___GFP_DMA | ___GFP_HIGHMEM)				      \
	| 1 << (___GFP_DMA | ___GFP_DMA32)				      \
	| 1 << (___GFP_DMA32 | ___GFP_HIGHMEM)				      \
	| 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM)  \
243
244
245
246
247
)

static inline enum zone_type gfp_zone(gfp_t flags)
{
	enum zone_type z;
248
	int bit = (__force int) (flags & GFP_ZONEMASK);
249
250
251

	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
					 ((1 << ZONES_SHIFT) - 1);
252
	VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
253
	return z;
254
255
}

Linus Torvalds's avatar
Linus Torvalds committed
256
257
258
259
260
261
262
/*
 * There is only one page-allocator function, and two main namespaces to
 * it. The alloc_page*() variants return 'struct page *' and as such
 * can allocate highmem pages, the *get*page*() variants return
 * virtual kernel addresses to the allocated page(s).
 */

263
264
265
266
267
268
269
270
static inline int gfp_zonelist(gfp_t flags)
{
	if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE))
		return 1;

	return 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
271
272
273
/*
 * We get the zone list from the current node and the gfp_mask.
 * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
274
275
 * There are two zonelists per node, one for all zones with memory and
 * one containing just zones from the node the zonelist belongs to.
Linus Torvalds's avatar
Linus Torvalds committed
276
277
278
279
 *
 * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
 * optimized to &contig_page_data at compile-time.
 */
280
281
static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
{
282
	return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
283
}
Linus Torvalds's avatar
Linus Torvalds committed
284
285
286
287

#ifndef HAVE_ARCH_FREE_PAGE
static inline void arch_free_page(struct page *page, int order) { }
#endif
Nick Piggin's avatar
Nick Piggin committed
288
289
290
#ifndef HAVE_ARCH_ALLOC_PAGE
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
Linus Torvalds's avatar
Linus Torvalds committed
291

292
struct page *
293
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
294
295
296
297
298
299
		       struct zonelist *zonelist, nodemask_t *nodemask);

static inline struct page *
__alloc_pages(gfp_t gfp_mask, unsigned int order,
		struct zonelist *zonelist)
{
300
	return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
301
302
}

303
static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
Linus Torvalds's avatar
Linus Torvalds committed
304
305
						unsigned int order)
{
306
307
308
309
	/* Unknown node is current node */
	if (nid < 0)
		nid = numa_node_id();

310
	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
Linus Torvalds's avatar
Linus Torvalds committed
311
312
}

313
314
315
316
317
318
319
320
static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask,
						unsigned int order)
{
	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);

	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
}

Linus Torvalds's avatar
Linus Torvalds committed
321
#ifdef CONFIG_NUMA
322
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
Linus Torvalds's avatar
Linus Torvalds committed
323
324

static inline struct page *
325
alloc_pages(gfp_t gfp_mask, unsigned int order)
Linus Torvalds's avatar
Linus Torvalds committed
326
327
328
{
	return alloc_pages_current(gfp_mask, order);
}
329
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
330
331
			struct vm_area_struct *vma, unsigned long addr,
			int node);
Linus Torvalds's avatar
Linus Torvalds committed
332
333
334
#else
#define alloc_pages(gfp_mask, order) \
		alloc_pages_node(numa_node_id(), gfp_mask, order)
335
#define alloc_pages_vma(gfp_mask, order, vma, addr, node)	\
336
	alloc_pages(gfp_mask, order)
Linus Torvalds's avatar
Linus Torvalds committed
337
338
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
339
340
#define alloc_page_vma(gfp_mask, vma, addr)			\
	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
Andi Kleen's avatar
Andi Kleen committed
341
342
#define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
	alloc_pages_vma(gfp_mask, 0, vma, addr, node)
Linus Torvalds's avatar
Linus Torvalds committed
343

344
345
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
Linus Torvalds's avatar
Linus Torvalds committed
346

347
348
void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
void free_pages_exact(void *virt, size_t size);
Andi Kleen's avatar
Andi Kleen committed
349
350
/* This is different from alloc_pages_exact_node !!! */
void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
351

Linus Torvalds's avatar
Linus Torvalds committed
352
#define __get_free_page(gfp_mask) \
353
		__get_free_pages((gfp_mask), 0)
Linus Torvalds's avatar
Linus Torvalds committed
354
355

#define __get_dma_pages(gfp_mask, order) \
356
		__get_free_pages((gfp_mask) | GFP_DMA, (order))
Linus Torvalds's avatar
Linus Torvalds committed
357

358
359
extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
Li Hong's avatar
Li Hong committed
360
extern void free_hot_cold_page(struct page *page, int cold);
361
extern void free_hot_cold_page_list(struct list_head *list, int cold);
Linus Torvalds's avatar
Linus Torvalds committed
362
363

#define __free_page(page) __free_pages((page), 0)
364
#define free_page(addr) free_pages((addr), 0)
Linus Torvalds's avatar
Linus Torvalds committed
365
366

void page_alloc_init(void);
367
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
368
369
void drain_all_pages(void);
void drain_local_pages(void *dummy);
Linus Torvalds's avatar
Linus Torvalds committed
370

371
372
373
374
375
376
377
/*
 * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
 * GFP flags are used before interrupts are enabled. Once interrupts are
 * enabled, it is set to __GFP_BITS_MASK while the system is running. During
 * hibernation, it is used by PM to avoid I/O during memory allocation while
 * devices are suspended.
 */
378
379
extern gfp_t gfp_allowed_mask;

380
381
extern void pm_restrict_gfp_mask(void);
extern void pm_restore_gfp_mask(void);
382

383
384
385
386
387
388
389
390
391
#ifdef CONFIG_PM_SLEEP
extern bool pm_suspended_storage(void);
#else
static inline bool pm_suspended_storage(void)
{
	return false;
}
#endif /* CONFIG_PM_SLEEP */

Linus Torvalds's avatar
Linus Torvalds committed
392
#endif /* __LINUX_GFP_H */