gfp.h 20.5 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3
#ifndef __LINUX_GFP_H
#define __LINUX_GFP_H

4
#include <linux/mmdebug.h>
Linus Torvalds's avatar
Linus Torvalds committed
5 6 7
#include <linux/mmzone.h>
#include <linux/stddef.h>
#include <linux/linkage.h>
8
#include <linux/topology.h>
Linus Torvalds's avatar
Linus Torvalds committed
9 10 11

struct vm_area_struct;

12 13 14 15 16
/* Plain integer GFP bitmasks. Do not use this directly. */
#define ___GFP_DMA		0x01u
#define ___GFP_HIGHMEM		0x02u
#define ___GFP_DMA32		0x04u
#define ___GFP_MOVABLE		0x08u
17
#define ___GFP_RECLAIMABLE	0x10u
18 19 20 21 22 23 24 25
#define ___GFP_HIGH		0x20u
#define ___GFP_IO		0x40u
#define ___GFP_FS		0x80u
#define ___GFP_COLD		0x100u
#define ___GFP_NOWARN		0x200u
#define ___GFP_REPEAT		0x400u
#define ___GFP_NOFAIL		0x800u
#define ___GFP_NORETRY		0x1000u
26
#define ___GFP_MEMALLOC		0x2000u
27 28 29 30 31
#define ___GFP_COMP		0x4000u
#define ___GFP_ZERO		0x8000u
#define ___GFP_NOMEMALLOC	0x10000u
#define ___GFP_HARDWALL		0x20000u
#define ___GFP_THISNODE		0x40000u
32
#define ___GFP_ATOMIC		0x80000u
Vladimir Davydov's avatar
Vladimir Davydov committed
33
#define ___GFP_NOACCOUNT	0x100000u
34
#define ___GFP_NOTRACK		0x200000u
35
#define ___GFP_DIRECT_RECLAIM	0x400000u
36 37
#define ___GFP_OTHER_NODE	0x800000u
#define ___GFP_WRITE		0x1000000u
38
#define ___GFP_KSWAPD_RECLAIM	0x2000000u
39
/* If the above are modified, __GFP_BITS_SHIFT may need updating */
40

Linus Torvalds's avatar
Linus Torvalds committed
41
/*
42
 * Physical address zone modifiers (see linux/mmzone.h - low four bits)
43 44
 *
 * Do not put any conditional on these. If necessary modify the definitions
45
 * without the underscores and use them consistently. The definitions here may
46
 * be used in bit comparisons.
Linus Torvalds's avatar
Linus Torvalds committed
47
 */
48 49 50 51
#define __GFP_DMA	((__force gfp_t)___GFP_DMA)
#define __GFP_HIGHMEM	((__force gfp_t)___GFP_HIGHMEM)
#define __GFP_DMA32	((__force gfp_t)___GFP_DMA32)
#define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* Page is movable */
52
#define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE allowed */
53
#define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
54

Linus Torvalds's avatar
Linus Torvalds committed
55
/*
56
 * Page mobility and placement hints
Linus Torvalds's avatar
Linus Torvalds committed
57
 *
58 59 60
 * These flags provide hints about how mobile the page is. Pages with similar
 * mobility are placed within the same pageblocks to minimise problems due
 * to external fragmentation.
Linus Torvalds's avatar
Linus Torvalds committed
61
 *
62 63
 * __GFP_MOVABLE (also a zone modifier) indicates that the page can be
 *   moved by page migration during memory compaction or can be reclaimed.
Linus Torvalds's avatar
Linus Torvalds committed
64
 *
65 66 67 68 69 70
 * __GFP_RECLAIMABLE is used for slab allocations that specify
 *   SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
 *
 * __GFP_WRITE indicates the caller intends to dirty the page. Where possible,
 *   these pages will be spread between local zones to avoid all the dirty
 *   pages being in one zone (fair zone allocation policy).
71
 *
72 73 74 75
 * __GFP_HARDWALL enforces the cpuset memory allocation policy.
 *
 * __GFP_THISNODE forces the allocation to be satisified from the requested
 *   node with no fallbacks or placement policy enforcements.
Linus Torvalds's avatar
Linus Torvalds committed
76
 */
77 78 79 80
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
#define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)
#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL)
#define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)
Andrea Arcangeli's avatar
Andrea Arcangeli committed
81

82
/*
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
 * Watermark modifiers -- controls access to emergency reserves
 *
 * __GFP_HIGH indicates that the caller is high-priority and that granting
 *   the request is necessary before the system can make forward progress.
 *   For example, creating an IO context to clean pages.
 *
 * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
 *   high priority. Users are typically interrupt handlers. This may be
 *   used in conjunction with __GFP_HIGH
 *
 * __GFP_MEMALLOC allows access to all memory. This should only be used when
 *   the caller guarantees the allocation will allow more memory to be freed
 *   very shortly e.g. process exiting or swapping. Users either should
 *   be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
 *
 * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
 *   This takes precedence over the __GFP_MEMALLOC flag if both are set.
 *
 * __GFP_NOACCOUNT ignores the accounting for kmemcg limit enforcement.
102
 */
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
#define __GFP_ATOMIC	((__force gfp_t)___GFP_ATOMIC)
#define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)
#define __GFP_MEMALLOC	((__force gfp_t)___GFP_MEMALLOC)
#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
#define __GFP_NOACCOUNT	((__force gfp_t)___GFP_NOACCOUNT)

/*
 * Reclaim modifiers
 *
 * __GFP_IO can start physical IO.
 *
 * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the
 *   allocator recursing into the filesystem which might already be holding
 *   locks.
 *
 * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
 *   This flag can be cleared to avoid unnecessary delays when a fallback
 *   option is available.
 *
 * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
 *   the low watermark is reached and have it reclaim pages until the high
 *   watermark is reached. A caller may wish to clear this flag when fallback
 *   options are available and the reclaim is likely to disrupt the system. The
 *   canonical example is THP allocation where a fallback is cheap but
 *   reclaim/compaction may cause indirect stalls.
 *
 * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
 *
 * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
 *   _might_ fail.  This depends upon the particular VM implementation.
 *
 * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
 *   cannot handle allocation failures. New users should be evaluated carefully
 *   (and the flag should be used only when there is no reasonable failure
 *   policy) but it is definitely preferable to use the flag rather than
 *   opencode endless loop around allocator.
 *
 * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
 *   return NULL when direct reclaim and memory compaction have failed to allow
 *   the allocation to succeed.  The OOM killer is not called with the current
 *   implementation.
 */
#define __GFP_IO	((__force gfp_t)___GFP_IO)
#define __GFP_FS	((__force gfp_t)___GFP_FS)
147 148
#define __GFP_DIRECT_RECLAIM	((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
#define __GFP_KSWAPD_RECLAIM	((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
149 150 151 152
#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
#define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)
#define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)
#define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY)
153

Vegard Nossum's avatar
Vegard Nossum committed
154
/*
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
 * Action modifiers
 *
 * __GFP_COLD indicates that the caller does not expect to be used in the near
 *   future. Where possible, a cache-cold page will be returned.
 *
 * __GFP_NOWARN suppresses allocation failure reports.
 *
 * __GFP_COMP address compound page metadata.
 *
 * __GFP_ZERO returns a zeroed page on success.
 *
 * __GFP_NOTRACK avoids tracking with kmemcheck.
 *
 * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
 *   distinguishing in the source between false positives and allocations that
 *   cannot be supported (e.g. page tables).
 *
 * __GFP_OTHER_NODE is for allocations that are on a remote node but that
 *   should not be accounted for as a remote allocation in vmstat. A
 *   typical user would be khugepaged collapsing a huge page on a remote
 *   node.
Vegard Nossum's avatar
Vegard Nossum committed
176
 */
177 178 179 180 181
#define __GFP_COLD	((__force gfp_t)___GFP_COLD)
#define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
#define __GFP_COMP	((__force gfp_t)___GFP_COMP)
#define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)
#define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)
Vegard Nossum's avatar
Vegard Nossum committed
182
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
183
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE)
Vegard Nossum's avatar
Vegard Nossum committed
184

185 186
/* Room for N __GFP_FOO bits */
#define __GFP_BITS_SHIFT 26
Al Viro's avatar
Al Viro committed
187
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
Linus Torvalds's avatar
Linus Torvalds committed
188

189
/*
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
 * Useful GFP flag combinations that are commonly used. It is recommended
 * that subsystems start with one of these combinations and then set/clear
 * __GFP_FOO flags as necessary.
 *
 * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
 *   watermark is applied to allow access to "atomic reserves"
 *
 * GFP_KERNEL is typical for kernel-internal allocations. The caller requires
 *   ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
 *
 * GFP_NOWAIT is for kernel allocations that should not stall for direct
 *   reclaim, start physical IO or use any filesystem callback.
 *
 * GFP_NOIO will use direct reclaim to discard clean pages or slab pages
 *   that do not require the starting of any physical IO.
 *
 * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
 *
 * GFP_USER is for userspace allocations that also need to be directly
 *   accessibly by the kernel or hardware. It is typically used by hardware
 *   for buffers that are mapped to userspace (e.g. graphics) that hardware
 *   still must DMA to. cpuset limits are enforced for these allocations.
 *
 * GFP_DMA exists for historical reasons and should be avoided where possible.
 *   The flags indicates that the caller requires that the lowest zone be
 *   used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
 *   it would require careful auditing as some users really require it and
 *   others use the flag to avoid lowmem reserves in ZONE_DMA and treat the
 *   lowest zone as a type of emergency reserve.
 *
 * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit
 *   address.
 *
 * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
 *   do not need to be directly accessible by the kernel but that cannot
 *   move once in use. An example may be a hardware allocation that maps
 *   data directly into userspace but has no addressing limitations.
 *
 * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
 *   need direct access to but can use kmap() when access is required. They
 *   are expected to be movable via page reclaim or page migration. Typically,
 *   pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
 *
 * GFP_TRANSHUGE is used for THP allocations. They are compound allocations
 *   that will fail quickly if memory is not available and will not wake
 *   kswapd on failure.
236 237
 */
#define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
238
#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
239
#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
240 241 242
#define GFP_NOIO	(__GFP_RECLAIM)
#define GFP_NOFS	(__GFP_RECLAIM | __GFP_IO)
#define GFP_TEMPORARY	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | \
243
			 __GFP_RECLAIMABLE)
244
#define GFP_USER	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
245 246
#define GFP_DMA		__GFP_DMA
#define GFP_DMA32	__GFP_DMA32
247 248
#define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
#define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
249 250 251
#define GFP_TRANSHUGE	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
			 ~__GFP_KSWAPD_RECLAIM)
Linus Torvalds's avatar
Linus Torvalds committed
252

253
/* Convert GFP flags to their corresponding migrate type */
254
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
255
#define GFP_MOVABLE_SHIFT 3
Christoph Lameter's avatar
Christoph Lameter committed
256

257
static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
258
{
259 260 261
	VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
	BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
	BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
262 263 264 265 266

	if (unlikely(page_group_by_mobility_disabled))
		return MIGRATE_UNMOVABLE;

	/* Group based on mobility */
267
	return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
268
}
269 270
#undef GFP_MOVABLE_MASK
#undef GFP_MOVABLE_SHIFT
271

272 273 274 275 276
static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
{
	return gfp_flags & __GFP_DIRECT_RECLAIM;
}

277 278 279 280 281 282
#ifdef CONFIG_HIGHMEM
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
#else
#define OPT_ZONE_HIGHMEM ZONE_NORMAL
#endif

283
#ifdef CONFIG_ZONE_DMA
284 285 286
#define OPT_ZONE_DMA ZONE_DMA
#else
#define OPT_ZONE_DMA ZONE_NORMAL
287
#endif
288

289
#ifdef CONFIG_ZONE_DMA32
290 291 292
#define OPT_ZONE_DMA32 ZONE_DMA32
#else
#define OPT_ZONE_DMA32 ZONE_NORMAL
293
#endif
294 295 296 297 298

/*
 * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
 * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long
 * and there are 16 of them to cover all possible combinations of
299
 * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM.
300 301 302 303
 *
 * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA.
 * But GFP_MOVABLE is not only a zone specifier but also an allocation
 * policy. Therefore __GFP_MOVABLE plus another zone selector is valid.
304
 * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1".
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
 *
 *       bit       result
 *       =================
 *       0x0    => NORMAL
 *       0x1    => DMA or NORMAL
 *       0x2    => HIGHMEM or NORMAL
 *       0x3    => BAD (DMA+HIGHMEM)
 *       0x4    => DMA32 or DMA or NORMAL
 *       0x5    => BAD (DMA+DMA32)
 *       0x6    => BAD (HIGHMEM+DMA32)
 *       0x7    => BAD (HIGHMEM+DMA32+DMA)
 *       0x8    => NORMAL (MOVABLE+0)
 *       0x9    => DMA or NORMAL (MOVABLE+DMA)
 *       0xa    => MOVABLE (Movable is valid only if HIGHMEM is set too)
 *       0xb    => BAD (MOVABLE+HIGHMEM+DMA)
320
 *       0xc    => DMA32 (MOVABLE+DMA32)
321 322 323 324 325 326 327 328 329 330 331 332
 *       0xd    => BAD (MOVABLE+DMA32+DMA)
 *       0xe    => BAD (MOVABLE+DMA32+HIGHMEM)
 *       0xf    => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
 *
 * ZONES_SHIFT must be <= 2 on 32 bit platforms.
 */

#if 16 * ZONES_SHIFT > BITS_PER_LONG
#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
#endif

#define GFP_ZONE_TABLE ( \
333 334 335 336 337 338 339 340
	(ZONE_NORMAL << 0 * ZONES_SHIFT)				      \
	| (OPT_ZONE_DMA << ___GFP_DMA * ZONES_SHIFT)			      \
	| (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * ZONES_SHIFT)		      \
	| (OPT_ZONE_DMA32 << ___GFP_DMA32 * ZONES_SHIFT)		      \
	| (ZONE_NORMAL << ___GFP_MOVABLE * ZONES_SHIFT)			      \
	| (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * ZONES_SHIFT)	      \
	| (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * ZONES_SHIFT)   \
	| (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * ZONES_SHIFT)   \
341 342 343
)

/*
344
 * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32
345 346 347 348 349
 * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per
 * entry starting with bit 0. Bit is set if the combination is not
 * allowed.
 */
#define GFP_ZONE_BAD ( \
350 351 352 353 354 355 356 357
	1 << (___GFP_DMA | ___GFP_HIGHMEM)				      \
	| 1 << (___GFP_DMA | ___GFP_DMA32)				      \
	| 1 << (___GFP_DMA32 | ___GFP_HIGHMEM)				      \
	| 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM)  \
358 359 360 361 362
)

static inline enum zone_type gfp_zone(gfp_t flags)
{
	enum zone_type z;
363
	int bit = (__force int) (flags & GFP_ZONEMASK);
364 365 366

	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
					 ((1 << ZONES_SHIFT) - 1);
367
	VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
368
	return z;
369 370
}

Linus Torvalds's avatar
Linus Torvalds committed
371 372 373 374 375 376 377
/*
 * There is only one page-allocator function, and two main namespaces to
 * it. The alloc_page*() variants return 'struct page *' and as such
 * can allocate highmem pages, the *get*page*() variants return
 * virtual kernel addresses to the allocated page(s).
 */

378 379
static inline int gfp_zonelist(gfp_t flags)
{
380
	if (IS_ENABLED(CONFIG_NUMA) && unlikely(flags & __GFP_THISNODE))
381 382 383 384 385
		return 1;

	return 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
386 387 388
/*
 * We get the zone list from the current node and the gfp_mask.
 * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
389 390
 * There are two zonelists per node, one for all zones with memory and
 * one containing just zones from the node the zonelist belongs to.
Linus Torvalds's avatar
Linus Torvalds committed
391 392 393 394
 *
 * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
 * optimized to &contig_page_data at compile-time.
 */
395 396
static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
{
397
	return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
398
}
Linus Torvalds's avatar
Linus Torvalds committed
399 400 401 402

#ifndef HAVE_ARCH_FREE_PAGE
static inline void arch_free_page(struct page *page, int order) { }
#endif
Nick Piggin's avatar
Nick Piggin committed
403 404 405
#ifndef HAVE_ARCH_ALLOC_PAGE
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
Linus Torvalds's avatar
Linus Torvalds committed
406

407
struct page *
408
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
409 410 411 412 413 414
		       struct zonelist *zonelist, nodemask_t *nodemask);

static inline struct page *
__alloc_pages(gfp_t gfp_mask, unsigned int order,
		struct zonelist *zonelist)
{
415
	return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
416 417
}

418 419 420 421 422 423
/*
 * Allocate pages, preferring the node given as nid. The node must be valid and
 * online. For more general interface, see alloc_pages_node().
 */
static inline struct page *
__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
Linus Torvalds's avatar
Linus Torvalds committed
424
{
425 426
	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
	VM_WARN_ON(!node_online(nid));
427

428
	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
Linus Torvalds's avatar
Linus Torvalds committed
429 430
}

431 432
/*
 * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
433 434
 * prefer the current CPU's closest node. Otherwise node must be valid and
 * online.
435 436
 */
static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
437 438
						unsigned int order)
{
439
	if (nid == NUMA_NO_NODE)
440
		nid = numa_mem_id();
441

442
	return __alloc_pages_node(nid, gfp_mask, order);
443 444
}

Linus Torvalds's avatar
Linus Torvalds committed
445
#ifdef CONFIG_NUMA
446
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
Linus Torvalds's avatar
Linus Torvalds committed
447 448

static inline struct page *
449
alloc_pages(gfp_t gfp_mask, unsigned int order)
Linus Torvalds's avatar
Linus Torvalds committed
450 451 452
{
	return alloc_pages_current(gfp_mask, order);
}
453
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
454
			struct vm_area_struct *vma, unsigned long addr,
455 456 457
			int node, bool hugepage);
#define alloc_hugepage_vma(gfp_mask, vma, addr, order)	\
	alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
Linus Torvalds's avatar
Linus Torvalds committed
458 459 460
#else
#define alloc_pages(gfp_mask, order) \
		alloc_pages_node(numa_node_id(), gfp_mask, order)
461
#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
462
	alloc_pages(gfp_mask, order)
463 464
#define alloc_hugepage_vma(gfp_mask, vma, addr, order)	\
	alloc_pages(gfp_mask, order)
Linus Torvalds's avatar
Linus Torvalds committed
465 466
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
467
#define alloc_page_vma(gfp_mask, vma, addr)			\
468
	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
Andi Kleen's avatar
Andi Kleen committed
469
#define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
470
	alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
Linus Torvalds's avatar
Linus Torvalds committed
471

Vladimir Davydov's avatar
Vladimir Davydov committed
472 473 474 475
extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
					  unsigned int order);

476 477
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
Linus Torvalds's avatar
Linus Torvalds committed
478

479 480
void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
void free_pages_exact(void *virt, size_t size);
481
void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
482

Linus Torvalds's avatar
Linus Torvalds committed
483
#define __get_free_page(gfp_mask) \
484
		__get_free_pages((gfp_mask), 0)
Linus Torvalds's avatar
Linus Torvalds committed
485 486

#define __get_dma_pages(gfp_mask, order) \
487
		__get_free_pages((gfp_mask) | GFP_DMA, (order))
Linus Torvalds's avatar
Linus Torvalds committed
488

489 490
extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
491 492
extern void free_hot_cold_page(struct page *page, bool cold);
extern void free_hot_cold_page_list(struct list_head *list, bool cold);
Linus Torvalds's avatar
Linus Torvalds committed
493

494 495 496 497 498
struct page_frag_cache;
extern void *__alloc_page_frag(struct page_frag_cache *nc,
			       unsigned int fragsz, gfp_t gfp_mask);
extern void __free_page_frag(void *addr);

Vladimir Davydov's avatar
Vladimir Davydov committed
499 500
extern void __free_kmem_pages(struct page *page, unsigned int order);
extern void free_kmem_pages(unsigned long addr, unsigned int order);
501

Linus Torvalds's avatar
Linus Torvalds committed
502
#define __free_page(page) __free_pages((page), 0)
503
#define free_page(addr) free_pages((addr), 0)
Linus Torvalds's avatar
Linus Torvalds committed
504 505

void page_alloc_init(void);
506
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
507 508
void drain_all_pages(struct zone *zone);
void drain_local_pages(struct zone *zone);
Linus Torvalds's avatar
Linus Torvalds committed
509

510 511 512 513 514 515 516 517
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
void page_alloc_init_late(void);
#else
static inline void page_alloc_init_late(void)
{
}
#endif

518 519 520 521 522 523 524
/*
 * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
 * GFP flags are used before interrupts are enabled. Once interrupts are
 * enabled, it is set to __GFP_BITS_MASK while the system is running. During
 * hibernation, it is used by PM to avoid I/O during memory allocation while
 * devices are suspended.
 */
525 526
extern gfp_t gfp_allowed_mask;

527 528 529
/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */
bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);

530 531
extern void pm_restrict_gfp_mask(void);
extern void pm_restore_gfp_mask(void);
532

533 534 535 536 537 538 539 540 541
#ifdef CONFIG_PM_SLEEP
extern bool pm_suspended_storage(void);
#else
static inline bool pm_suspended_storage(void)
{
	return false;
}
#endif /* CONFIG_PM_SLEEP */

542 543 544
#ifdef CONFIG_CMA

/* The below functions must be run on a range from a single zone. */
545 546
extern int alloc_contig_range(unsigned long start, unsigned long end,
			      unsigned migratetype);
547 548
extern void free_contig_range(unsigned long pfn, unsigned nr_pages);

549 550 551
/* CMA stuff */
extern void init_cma_reserved_pageblock(struct page *page);

552 553
#endif

Linus Torvalds's avatar
Linus Torvalds committed
554
#endif /* __LINUX_GFP_H */