Commit 8e4b9a60 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

mm: FOLL_DUMP replace FOLL_ANON

The "FOLL_ANON optimization" and its use_zero_page() test have caused
confusion and bugs: why does it test VM_SHARED? for the very good but
unsatisfying reason that VMware crashed without.  As we look to maybe
reinstating anonymous use of the ZERO_PAGE, we need to sort this out.

Easily done: it's silly for __get_user_pages() and follow_page() to
be guessing whether it's safe to assume that they're being used for
a coredump (which can take a shortcut snapshot where other uses must
handle a fault) - just tell them with GUP_FLAGS_DUMP and FOLL_DUMP.

get_dump_page() doesn't even want a ZERO_PAGE: an error suits fine.
Signed-off-by: default avatarHugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: default avatarRik van Riel <riel@redhat.com>
Acked-by: default avatarMel Gorman <mel@csn.ul.ie>
Reviewed-by: default avatarMinchan Kim <minchan.kim@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f3e8fccd
...@@ -1231,7 +1231,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, ...@@ -1231,7 +1231,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
#define FOLL_WRITE 0x01 /* check pte is writable */ #define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_TOUCH 0x02 /* mark page accessed */ #define FOLL_TOUCH 0x02 /* mark page accessed */
#define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_GET 0x04 /* do get_page on page */
#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ #define FOLL_DUMP 0x08 /* give error on hole if it would be zero */
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data); void *data);
......
...@@ -252,6 +252,7 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, ...@@ -252,6 +252,7 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
#define GUP_FLAGS_WRITE 0x01 #define GUP_FLAGS_WRITE 0x01
#define GUP_FLAGS_FORCE 0x02 #define GUP_FLAGS_FORCE 0x02
#define GUP_FLAGS_DUMP 0x04
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int len, int flags, unsigned long start, int len, int flags,
......
...@@ -1174,41 +1174,22 @@ no_page: ...@@ -1174,41 +1174,22 @@ no_page:
pte_unmap_unlock(ptep, ptl); pte_unmap_unlock(ptep, ptl);
if (!pte_none(pte)) if (!pte_none(pte))
return page; return page;
/* Fall through to ZERO_PAGE handling */
no_page_table: no_page_table:
/* /*
* When core dumping an enormous anonymous area that nobody * When core dumping an enormous anonymous area that nobody
* has touched so far, we don't want to allocate page tables. * has touched so far, we don't want to allocate unnecessary pages or
* page tables. Return error instead of NULL to skip handle_mm_fault,
* then get_dump_page() will return NULL to leave a hole in the dump.
* But we can only make this optimization where a hole would surely
* be zero-filled if handle_mm_fault() actually did handle it.
*/ */
if (flags & FOLL_ANON) { if ((flags & FOLL_DUMP) &&
page = ZERO_PAGE(0); (!vma->vm_ops || !vma->vm_ops->fault))
if (flags & FOLL_GET) return ERR_PTR(-EFAULT);
get_page(page);
BUG_ON(flags & FOLL_WRITE);
}
return page; return page;
} }
/* Can we do the FOLL_ANON optimization? */
static inline int use_zero_page(struct vm_area_struct *vma)
{
/*
* We don't want to optimize FOLL_ANON for make_pages_present()
* when it tries to page in a VM_LOCKED region. As to VM_SHARED,
* we want to get the page from the page tables to make sure
* that we serialize and update with any other user of that
* mapping.
*/
if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
return 0;
/*
* And if we have a fault routine, it's not an anonymous region.
*/
return !vma->vm_ops || !vma->vm_ops->fault;
}
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int nr_pages, int flags, unsigned long start, int nr_pages, int flags,
struct page **pages, struct vm_area_struct **vmas) struct page **pages, struct vm_area_struct **vmas)
...@@ -1288,8 +1269,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -1288,8 +1269,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
foll_flags = FOLL_TOUCH; foll_flags = FOLL_TOUCH;
if (pages) if (pages)
foll_flags |= FOLL_GET; foll_flags |= FOLL_GET;
if (!write && use_zero_page(vma)) if (flags & GUP_FLAGS_DUMP)
foll_flags |= FOLL_ANON; foll_flags |= FOLL_DUMP;
do { do {
struct page *page; struct page *page;
...@@ -1446,7 +1427,7 @@ struct page *get_dump_page(unsigned long addr) ...@@ -1446,7 +1427,7 @@ struct page *get_dump_page(unsigned long addr)
struct page *page; struct page *page;
if (__get_user_pages(current, current->mm, addr, 1, if (__get_user_pages(current, current->mm, addr, 1,
GUP_FLAGS_FORCE, &page, &vma) < 1) GUP_FLAGS_FORCE | GUP_FLAGS_DUMP, &page, &vma) < 1)
return NULL; return NULL;
if (page == ZERO_PAGE(0)) { if (page == ZERO_PAGE(0)) {
page_cache_release(page); page_cache_release(page);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment