Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/*
* linux/mm/vmscan.c
*
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*
* Swap reorganised 29.12.95, Stephen Tweedie.
* kswapd added: 7.1.96 sct
* Removed kswapd_ctl limits, and swap out as many pages as needed
* to bring the system back to freepages.high: 2.4.97, Rik van Riel.
* Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com).
* Multiqueue VM started 5.8.00, Rik van Riel.
*/
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/file.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h> /* for try_to_release_page(),
buffer_heads_over_limit */
#include <linux/mm_inline.h>
#include <linux/pagevec.h>
#include <linux/backing-dev.h>
#include <linux/rmap.h>
#include <linux/topology.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/notifier.h>
#include <linux/rwsem.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
#include <linux/swapops.h>
/* possible outcome of pageout() */
typedef enum {
/* failed to write page out, page is locked */
PAGE_KEEP,
/* move page to the active list, page is locked */
PAGE_ACTIVATE,
/* page has been sent to the disk successfully, page is unlocked */
PAGE_SUCCESS,
/* page is clean and locked */
PAGE_CLEAN,
} pageout_t;
struct scan_control {
/* Incremented by the number of inactive pages that were scanned */
unsigned long nr_scanned;
unsigned long nr_mapped; /* From page_state */
/* This context's GFP mask */
/* Can pages be swapped as part of reclaim? */
int may_swap;
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/* This context's SWAP_CLUSTER_MAX. If freeing memory for
* suspend, we effectively ignore SWAP_CLUSTER_MAX.
* In this context, it doesn't matter that we scan the
* whole list at once. */
int swap_cluster_max;
};
/*
* The list of shrinker callbacks used by to apply pressure to
* ageable caches.
*/
struct shrinker {
shrinker_t shrinker;
struct list_head list;
int seeks; /* seeks to recreate an obj */
long nr; /* objs pending delete */
};
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
#ifdef ARCH_HAS_PREFETCH
#define prefetch_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = lru_to_page(&(_page->lru)); \
prefetch(&prev->_field); \
} \
} while (0)
#else
#define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
#ifdef ARCH_HAS_PREFETCHW
#define prefetchw_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = lru_to_page(&(_page->lru)); \
prefetchw(&prev->_field); \
} \
} while (0)
#else
#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
/*
* From 0 .. 100. Higher means more swappy.
*/
int vm_swappiness = 60;
static long total_memory;
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
/*
* Add a shrinker callback to be called from the vm
*/
struct shrinker *set_shrinker(int seeks, shrinker_t theshrinker)
{
struct shrinker *shrinker;
shrinker = kmalloc(sizeof(*shrinker), GFP_KERNEL);
if (shrinker) {
shrinker->shrinker = theshrinker;
shrinker->seeks = seeks;
shrinker->nr = 0;
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
up_write(&shrinker_rwsem);
}
return shrinker;
}
EXPORT_SYMBOL(set_shrinker);
/*
* Remove one
*/
void remove_shrinker(struct shrinker *shrinker)
{
down_write(&shrinker_rwsem);
list_del(&shrinker->list);
up_write(&shrinker_rwsem);
kfree(shrinker);
}
EXPORT_SYMBOL(remove_shrinker);
#define SHRINK_BATCH 128
/*
* Call the shrink functions to age shrinkable caches
*
* Here we assume it costs one seek to replace a lru page and that it also
* takes a seek to recreate a cache object. With this in mind we age equal
* percentages of the lru and ageable caches. This should balance the seeks
* generated by these structures.
*
* If the vm encounted mapped pages on the LRU it increase the pressure on
* slab to avoid swapping.
*
* We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
*
* `lru_pages' represents the number of on-LRU pages in all the zones which
* are eligible for the caller's allocation attempt. It is used for balancing
* slab reclaim versus page reclaim.
*
* Returns the number of slab objects which we shrunk.
unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
unsigned long lru_pages)
if (scanned == 0)
scanned = SWAP_CLUSTER_MAX;
if (!down_read_trylock(&shrinker_rwsem))
return 1; /* Assume we'll be able to shrink next time */
list_for_each_entry(shrinker, &shrinker_list, list) {
unsigned long long delta;
unsigned long total_scan;
unsigned long max_pass = (*shrinker->shrinker)(0, gfp_mask);
delta *= max_pass;
do_div(delta, lru_pages + 1);
shrinker->nr += delta;
if (shrinker->nr < 0) {
printk(KERN_ERR "%s: nr=%ld\n",
__FUNCTION__, shrinker->nr);
shrinker->nr = max_pass;
}
/*
* Avoid risking looping forever due to too large nr value:
* never try to free more than twice the estimate number of
* freeable entries.
*/
if (shrinker->nr > max_pass * 2)
shrinker->nr = max_pass * 2;
total_scan = shrinker->nr;
shrinker->nr = 0;
while (total_scan >= SHRINK_BATCH) {
long this_scan = SHRINK_BATCH;
int shrink_ret;
nr_before = (*shrinker->shrinker)(0, gfp_mask);
shrink_ret = (*shrinker->shrinker)(this_scan, gfp_mask);
if (shrink_ret == -1)
break;
if (shrink_ret < nr_before)
ret += nr_before - shrink_ret;
mod_page_state(slabs_scanned, this_scan);
total_scan -= this_scan;
cond_resched();
}
shrinker->nr += total_scan;
}
up_read(&shrinker_rwsem);
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
}
/* Called without lock on whether page is mapped, so answer is unstable */
static inline int page_mapping_inuse(struct page *page)
{
struct address_space *mapping;
/* Page is in somebody's page tables. */
if (page_mapped(page))
return 1;
/* Be more reluctant to reclaim swapcache than pagecache */
if (PageSwapCache(page))
return 1;
mapping = page_mapping(page);
if (!mapping)
return 0;
/* File is mmap'd by somebody? */
return mapping_mapped(mapping);
}
static inline int is_page_cache_freeable(struct page *page)
{
return page_count(page) - !!PagePrivate(page) == 2;
}
static int may_write_to_queue(struct backing_dev_info *bdi)
{
if (current->flags & PF_SWAPWRITE)
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
return 1;
if (!bdi_write_congested(bdi))
return 1;
if (bdi == current->backing_dev_info)
return 1;
return 0;
}
/*
* We detected a synchronous write error writing a page out. Probably
* -ENOSPC. We need to propagate that into the address_space for a subsequent
* fsync(), msync() or close().
*
* The tricky part is that after writepage we cannot touch the mapping: nothing
* prevents it from being freed up. But we have a ref on the page and once
* that page is locked, the mapping is pinned.
*
* We're allowed to run sleeping lock_page() here because we know the caller has
* __GFP_FS.
*/
static void handle_write_error(struct address_space *mapping,
struct page *page, int error)
{
lock_page(page);
if (page_mapping(page) == mapping) {
if (error == -ENOSPC)
set_bit(AS_ENOSPC, &mapping->flags);
else
set_bit(AS_EIO, &mapping->flags);
}
unlock_page(page);
}
/*
* pageout is called by shrink_page_list() for each dirty page.
* Calls ->writepage().
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
*/
static pageout_t pageout(struct page *page, struct address_space *mapping)
{
/*
* If the page is dirty, only perform writeback if that write
* will be non-blocking. To prevent this allocation from being
* stalled by pagecache activity. But note that there may be
* stalls if we need to run get_block(). We could test
* PagePrivate for that.
*
* If this process is currently in generic_file_write() against
* this page's queue, we can perform writeback even if that
* will block.
*
* If the page is swapcache, write it back even if that would
* block, for some throttling. This happens by accident, because
* swap_backing_dev_info is bust: it doesn't reflect the
* congestion state of the swapdevs. Easy to fix, if needed.
* See swapfile.c:page_queue_congested().
*/
if (!is_page_cache_freeable(page))
return PAGE_KEEP;
if (!mapping) {
/*
* Some data journaling orphaned pages can have
* page->mapping == NULL while being dirty with clean buffers.
*/
if (PagePrivate(page)) {
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
if (try_to_free_buffers(page)) {
ClearPageDirty(page);
printk("%s: orphaned page\n", __FUNCTION__);
return PAGE_CLEAN;
}
}
return PAGE_KEEP;
}
if (mapping->a_ops->writepage == NULL)
return PAGE_ACTIVATE;
if (!may_write_to_queue(mapping->backing_dev_info))
return PAGE_KEEP;
if (clear_page_dirty_for_io(page)) {
int res;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = SWAP_CLUSTER_MAX,
.nonblocking = 1,
.for_reclaim = 1,
};
SetPageReclaim(page);
res = mapping->a_ops->writepage(page, &wbc);
if (res < 0)
handle_write_error(mapping, page, res);
if (res == AOP_WRITEPAGE_ACTIVATE) {
ClearPageReclaim(page);
return PAGE_ACTIVATE;
}
if (!PageWriteback(page)) {
/* synchronous write or broken a_ops? */
ClearPageReclaim(page);
}
return PAGE_SUCCESS;
}
return PAGE_CLEAN;
}
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
static int remove_mapping(struct address_space *mapping, struct page *page)
{
if (!mapping)
return 0; /* truncate got there first */
write_lock_irq(&mapping->tree_lock);
/*
* The non-racy check for busy page. It is critical to check
* PageDirty _after_ making sure that the page is freeable and
* not in use by anybody. (pagecache + us == 2)
*/
if (unlikely(page_count(page) != 2))
goto cannot_free;
smp_rmb();
if (unlikely(PageDirty(page)))
goto cannot_free;
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
__delete_from_swap_cache(page);
write_unlock_irq(&mapping->tree_lock);
swap_free(swap);
__put_page(page); /* The pagecache ref */
return 1;
}
__remove_from_page_cache(page);
write_unlock_irq(&mapping->tree_lock);
__put_page(page);
return 1;
cannot_free:
write_unlock_irq(&mapping->tree_lock);
return 0;
}
* shrink_page_list() returns the number of reclaimed pages
static unsigned long shrink_page_list(struct list_head *page_list,
struct scan_control *sc)
{
LIST_HEAD(ret_pages);
struct pagevec freed_pvec;
int pgactivate = 0;
cond_resched();
pagevec_init(&freed_pvec, 1);
while (!list_empty(page_list)) {
struct address_space *mapping;
struct page *page;
int may_enter_fs;
int referenced;
cond_resched();
page = lru_to_page(page_list);
list_del(&page->lru);
if (TestSetPageLocked(page))
goto keep;
BUG_ON(PageActive(page));
sc->nr_scanned++;
Christoph Lameter
committed
if (!sc->may_swap && page_mapped(page))
goto keep_locked;
/* Double the slab pressure for mapped and swapcache pages */
if (page_mapped(page) || PageSwapCache(page))
sc->nr_scanned++;
if (PageWriteback(page))
goto keep_locked;
referenced = page_referenced(page, 1);
/* In active use or really unfreeable? Activate it. */
if (referenced && page_mapping_inuse(page))
goto activate_locked;
#ifdef CONFIG_SWAP
/*
* Anonymous process memory has backing store?
* Try to allocate it some swap space here.
*/
if (PageAnon(page) && !PageSwapCache(page)) {
if (!sc->may_swap)
goto keep_locked;
if (!add_to_swap(page, GFP_ATOMIC))
goto activate_locked;
}
#endif /* CONFIG_SWAP */
mapping = page_mapping(page);
may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
/*
* The page is mapped into the page tables of one or more
* processes. Try to unmap it here.
*/
if (page_mapped(page) && mapping) {
/*
* No unmapping if we do not swap
*/
if (!sc->may_swap)
goto keep_locked;
switch (try_to_unmap(page, 0)) {
case SWAP_FAIL:
goto activate_locked;
case SWAP_AGAIN:
goto keep_locked;
case SWAP_SUCCESS:
; /* try to free the page below */
}
}
if (PageDirty(page)) {
if (referenced)
goto keep_locked;
if (!may_enter_fs)
goto keep_locked;
if (!sc->may_writepage)
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
goto keep_locked;
/* Page is dirty, try to write it out here */
switch(pageout(page, mapping)) {
case PAGE_KEEP:
goto keep_locked;
case PAGE_ACTIVATE:
goto activate_locked;
case PAGE_SUCCESS:
if (PageWriteback(page) || PageDirty(page))
goto keep;
/*
* A synchronous write - probably a ramdisk. Go
* ahead and try to reclaim the page.
*/
if (TestSetPageLocked(page))
goto keep;
if (PageDirty(page) || PageWriteback(page))
goto keep_locked;
mapping = page_mapping(page);
case PAGE_CLEAN:
; /* try to free the page below */
}
}
/*
* If the page has buffers, try to free the buffer mappings
* associated with this page. If we succeed we try to free
* the page as well.
*
* We do this even if the page is PageDirty().
* try_to_release_page() does not perform I/O, but it is
* possible for a page to have PageDirty set, but it is actually
* clean (all its buffers are clean). This happens if the
* buffers were written out directly, with submit_bh(). ext3
* will do this, as well as the blockdev mapping.
* try_to_release_page() will discover that cleanness and will
* drop the buffers and mark the page clean - it can be freed.
*
* Rarely, pages can have buffers and no ->mapping. These are
* the pages which were not successfully invalidated in
* truncate_complete_page(). We try to drop those buffers here
* and if that worked, and the page is no longer mapped into
* process address space (page_count == 1) it can be freed.
* Otherwise, leave the page on the LRU so it is swappable.
*/
if (PagePrivate(page)) {
if (!try_to_release_page(page, sc->gfp_mask))
goto activate_locked;
if (!mapping && page_count(page) == 1)
goto free_it;
}
if (!remove_mapping(mapping, page))
goto keep_locked;
if (!pagevec_add(&freed_pvec, page))
__pagevec_release_nonlru(&freed_pvec);
continue;
activate_locked:
SetPageActive(page);
pgactivate++;
keep_locked:
unlock_page(page);
keep:
list_add(&page->lru, &ret_pages);
BUG_ON(PageLRU(page));
}
list_splice(&ret_pages, page_list);
if (pagevec_count(&freed_pvec))
__pagevec_release_nonlru(&freed_pvec);
mod_page_state(pgactivate, pgactivate);
Christoph Lameter
committed
#ifdef CONFIG_MIGRATION
static inline void move_to_lru(struct page *page)
{
list_del(&page->lru);
if (PageActive(page)) {
/*
* lru_cache_add_active checks that
* the PG_active bit is off.
*/
ClearPageActive(page);
lru_cache_add_active(page);
} else {
lru_cache_add(page);
}
put_page(page);
}
/*
* Add isolated pages on the list back to the LRU.
*
* returns the number of pages put back.
*/
unsigned long putback_lru_pages(struct list_head *l)
{
struct page *page;
struct page *page2;
list_for_each_entry_safe(page, page2, l, lru) {
move_to_lru(page);
count++;
}
return count;
}
/*
* Non migratable page
*/
int fail_migrate_page(struct page *newpage, struct page *page)
{
return -EIO;
}
EXPORT_SYMBOL(fail_migrate_page);
/*
* swapout a single page
* page is locked upon entry, unlocked on exit
*/
static int swap_page(struct page *page)
{
struct address_space *mapping = page_mapping(page);
if (page_mapped(page) && mapping)
if (try_to_unmap(page, 1) != SWAP_SUCCESS)
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
goto unlock_retry;
if (PageDirty(page)) {
/* Page is dirty, try to write it out here */
switch(pageout(page, mapping)) {
case PAGE_KEEP:
case PAGE_ACTIVATE:
goto unlock_retry;
case PAGE_SUCCESS:
goto retry;
case PAGE_CLEAN:
; /* try to free the page below */
}
}
if (PagePrivate(page)) {
if (!try_to_release_page(page, GFP_KERNEL) ||
(!mapping && page_count(page) == 1))
goto unlock_retry;
}
if (remove_mapping(mapping, page)) {
/* Success */
unlock_page(page);
return 0;
}
unlock_retry:
unlock_page(page);
retry:
return -EAGAIN;
EXPORT_SYMBOL(swap_page);
/*
* Page migration was first developed in the context of the memory hotplug
* project. The main authors of the migration code are:
*
* IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
* Hirokazu Takahashi <taka@valinux.co.jp>
* Dave Hansen <haveblue@us.ibm.com>
* Christoph Lameter <clameter@sgi.com>
*/
/*
* Remove references for a page and establish the new page with the correct
* basic settings to be able to stop accesses to the page.
*/
int migrate_page_remove_references(struct page *newpage,
struct page *page, int nr_refs)
{
struct address_space *mapping = page_mapping(page);
struct page **radix_pointer;
/*
* Avoid doing any of the following work if the page count
* indicates that the page is in use or truncate has removed
* the page.
*/
if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
return -EAGAIN;
/*
* Establish swap ptes for anonymous pages or destroy pte
* maps for files.
*
* In order to reestablish file backed mappings the fault handlers
* will take the radix tree_lock which may then be used to stop
* processses from accessing this page until the new page is ready.
*
* A process accessing via a swap pte (an anonymous page) will take a
* page_lock on the old page which will block the process until the
* migration attempt is complete. At that time the PageSwapCache bit
* will be examined. If the page was migrated then the PageSwapCache
* bit will be clear and the operation to retrieve the page will be
* retried which will find the new page in the radix tree. Then a new
* direct mapping may be generated based on the radix tree contents.
*
* If the page was not migrated then the PageSwapCache bit
* is still set and the operation may continue.
*/
if (try_to_unmap(page, 1) == SWAP_FAIL)
/* A vma has VM_LOCKED set -> Permanent failure */
return -EPERM;
/*
* Give up if we were unable to remove all mappings.
*/
if (page_mapcount(page))
return -EAGAIN;
write_lock_irq(&mapping->tree_lock);
radix_pointer = (struct page **)radix_tree_lookup_slot(
&mapping->page_tree,
page_index(page));
if (!page_mapping(page) || page_count(page) != nr_refs ||
*radix_pointer != page) {
write_unlock_irq(&mapping->tree_lock);
return -EAGAIN;
}
/*
* Now we know that no one else is looking at the page.
*
* Certain minimal information about a page must be available
* in order for other subsystems to properly handle the page if they
* find it through the radix tree update before we are finished
* copying the page.
*/
get_page(newpage);
newpage->index = page->index;
newpage->mapping = page->mapping;
if (PageSwapCache(page)) {
SetPageSwapCache(newpage);
set_page_private(newpage, page_private(page));
}
*radix_pointer = newpage;
__put_page(page);
write_unlock_irq(&mapping->tree_lock);
return 0;
}
EXPORT_SYMBOL(migrate_page_remove_references);
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
/*
* Copy the page to its new location
*/
void migrate_page_copy(struct page *newpage, struct page *page)
{
copy_highpage(newpage, page);
if (PageError(page))
SetPageError(newpage);
if (PageReferenced(page))
SetPageReferenced(newpage);
if (PageUptodate(page))
SetPageUptodate(newpage);
if (PageActive(page))
SetPageActive(newpage);
if (PageChecked(page))
SetPageChecked(newpage);
if (PageMappedToDisk(page))
SetPageMappedToDisk(newpage);
if (PageDirty(page)) {
clear_page_dirty_for_io(page);
set_page_dirty(newpage);
}
ClearPageSwapCache(page);
ClearPageActive(page);
ClearPagePrivate(page);
set_page_private(page, 0);
page->mapping = NULL;
/*
* If any waiters have accumulated on the new page then
* wake them up.
*/
if (PageWriteback(newpage))
end_page_writeback(newpage);
}
EXPORT_SYMBOL(migrate_page_copy);
/*
* Common logic to directly migrate a single page suitable for
* pages that do not use PagePrivate.
*
* Pages are locked upon entry and exit.
*/
int migrate_page(struct page *newpage, struct page *page)
{
int rc;
BUG_ON(PageWriteback(page)); /* Writeback must be complete */
rc = migrate_page_remove_references(newpage, page, 2);
if (rc)
return rc;
migrate_page_copy(newpage, page);
/*
* Remove auxiliary swap entries and replace
* them with real ptes.
*
* Note that a real pte entry will allow processes that are not
* waiting on the page lock to use the new page via the page tables
* before the new page is unlocked.
*/
remove_from_swap(newpage);
return 0;
}
EXPORT_SYMBOL(migrate_page);
/*
* migrate_pages
*
* Two lists are passed to this function. The first list
* contains the pages isolated from the LRU to be migrated.
* The second list contains new pages that the pages isolated
* can be moved to. If the second list is NULL then all
* pages are swapped out.
*
* The function returns after 10 attempts or if no pages
* are movable anymore because to has become empty
* or no retryable pages exist anymore.
*
* Return: Number of pages not migrated when "to" ran empty.
unsigned long migrate_pages(struct list_head *from, struct list_head *to,
struct list_head *moved, struct list_head *failed)
unsigned long retry;
unsigned long nr_failed = 0;
int pass = 0;
struct page *page;
struct page *page2;
int swapwrite = current->flags & PF_SWAPWRITE;
int rc;
if (!swapwrite)
current->flags |= PF_SWAPWRITE;
redo:
retry = 0;
list_for_each_entry_safe(page, page2, from, lru) {
struct page *newpage = NULL;
struct address_space *mapping;
cond_resched();
rc = 0;
if (page_count(page) == 1)
/* page was freed from under us. So we are done. */
goto next;
if (to && list_empty(to))
break;
/*
* Skip locked pages during the first two passes to give the
Christoph Lameter
committed
* functions holding the lock time to release the page. Later we
* use lock_page() to have a higher chance of acquiring the
* lock.
rc = -EAGAIN;
if (pass > 2)
lock_page(page);
else
if (TestSetPageLocked(page))
goto next;
/*
* Only wait on writeback if we have already done a pass where
* we we may have triggered writeouts for lots of pages.
*/
Christoph Lameter
committed
if (pass > 0) {
wait_on_page_writeback(page);
Christoph Lameter
committed
} else {
if (PageWriteback(page))
goto unlock_page;
Christoph Lameter
committed
}
/*
* Anonymous pages must have swap cache references otherwise
* the information contained in the page maps cannot be
* preserved.
*/
if (PageAnon(page) && !PageSwapCache(page)) {
if (!add_to_swap(page, GFP_KERNEL)) {
rc = -ENOMEM;
goto unlock_page;
if (!to) {
rc = swap_page(page);
goto next;
}
newpage = lru_to_page(to);
lock_page(newpage);
* Pages are properly locked and writeback is complete.
* Try to migrate the page.
*/
mapping = page_mapping(page);
if (!mapping)
goto unlock_both;
if (mapping->a_ops->migratepage) {
/*
* Most pages have a mapping and most filesystems
* should provide a migration function. Anonymous
* pages are part of swap space which also has its
* own migration function. This is the most common
* path for page migration.
*/
rc = mapping->a_ops->migratepage(newpage, page);
goto unlock_both;
}
* Default handling if a filesystem does not provide
* a migration function. We can only migrate clean
* pages so try to write out any dirty pages first.
*/
if (PageDirty(page)) {
switch (pageout(page, mapping)) {
case PAGE_KEEP:
case PAGE_ACTIVATE:
goto unlock_both;
case PAGE_SUCCESS:
unlock_page(newpage);
goto next;
case PAGE_CLEAN:
; /* try to migrate the page below */
}
}
* Buffers are managed in a filesystem specific way.
* We must have no buffers or drop them.
*/
if (!page_has_buffers(page) ||
try_to_release_page(page, GFP_KERNEL)) {
rc = migrate_page(newpage, page);
goto unlock_both;
}
/*
* On early passes with mapped pages simply
* retry. There may be a lock held for some
* buffers that may go away. Later
* swap them out.
*/
if (pass > 4) {
/*
* Persistently unable to drop buffers..... As a
* measure of last resort we fall back to
* swap_page().
*/
unlock_page(newpage);
newpage = NULL;
rc = swap_page(page);
goto next;
}
unlock_both:
unlock_page(newpage);
unlock_page:
unlock_page(page);