Commit 5180da41 authored by Suresh Siddha's avatar Suresh Siddha Committed by Linus Torvalds
Browse files

x86, pat: separate the pfn attribute tracking for remap_pfn_range and vm_insert_pfn



With PAT enabled, vm_insert_pfn() looks up the existing pfn memory
attribute and uses it.  Expectation is that the driver reserves the
memory attributes for the pfn before calling vm_insert_pfn().

remap_pfn_range() (when called for the whole vma) will setup a new
attribute (based on the prot argument) for the specified pfn range.
This addresses the legacy usage which typically calls remap_pfn_range()
with a desired memory attribute.  For ranges smaller than the vma size
(which is typically not the case), remap_pfn_range() will use the
existing memory attribute for the pfn range.

Expose two different API's for these different behaviors.
track_pfn_insert() for tracking the pfn attribute set by vm_insert_pfn()
and track_pfn_remap() for the remap_pfn_range().

This cleanup also prepares the ground for the track/untrack pfn vma
routines to take over the ownership of setting PAT specific vm_flag in
the 'vma'.

[khlebnikov@openvz.org: Clear checks in track_pfn_remap()]
[akpm@linux-foundation.org: tweak a few comments]
Signed-off-by: default avatarSuresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: default avatarKonstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent b1a86e15
...@@ -664,13 +664,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) ...@@ -664,13 +664,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
} }
/* /*
* track_pfn_vma_copy is called when vma that is covering the pfnmap gets * track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range(). * copied through copy_page_range().
* *
* If the vma has a linear pfn mapping for the entire range, we get the prot * If the vma has a linear pfn mapping for the entire range, we get the prot
* from pte and reserve the entire vma range with single reserve_pfn_range call. * from pte and reserve the entire vma range with single reserve_pfn_range call.
*/ */
int track_pfn_vma_copy(struct vm_area_struct *vma) int track_pfn_copy(struct vm_area_struct *vma)
{ {
resource_size_t paddr; resource_size_t paddr;
unsigned long prot; unsigned long prot;
...@@ -694,15 +694,12 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) ...@@ -694,15 +694,12 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
} }
/* /*
* track_pfn_vma_new is called when a _new_ pfn mapping is being established
* for physical range indicated by pfn and size.
*
* prot is passed in as a parameter for the new mapping. If the vma has a * prot is passed in as a parameter for the new mapping. If the vma has a
* linear pfn mapping for the entire range reserve the entire vma range with * linear pfn mapping for the entire range reserve the entire vma range with
* single reserve_pfn_range call. * single reserve_pfn_range call.
*/ */
int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long size) unsigned long pfn, unsigned long size)
{ {
resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
unsigned long flags; unsigned long flags;
...@@ -714,8 +711,36 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, ...@@ -714,8 +711,36 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
if (!pat_enabled) if (!pat_enabled)
return 0; return 0;
/* for vm_insert_pfn and friends, we set prot based on lookup */ /*
* For anything smaller than the vma size we set prot based on the
* lookup.
*/
flags = lookup_memtype(paddr); flags = lookup_memtype(paddr);
/* Check memtype for the remaining pages */
while (size > PAGE_SIZE) {
size -= PAGE_SIZE;
paddr += PAGE_SIZE;
if (flags != lookup_memtype(paddr))
return -EINVAL;
}
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
flags);
return 0;
}
int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn)
{
unsigned long flags;
if (!pat_enabled)
return 0;
/* Set prot based on lookup */
flags = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT);
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
flags); flags);
...@@ -723,12 +748,12 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, ...@@ -723,12 +748,12 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
} }
/* /*
* untrack_pfn_vma is called while unmapping a pfnmap for a region. * untrack_pfn is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or * untrack can be called for a specific region indicated by pfn and size or
* can be for the entire vma (in which case pfn, size are zero). * can be for the entire vma (in which case pfn, size are zero).
*/ */
void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size) unsigned long size)
{ {
resource_size_t paddr; resource_size_t paddr;
unsigned long prot; unsigned long prot;
......
...@@ -381,48 +381,57 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, ...@@ -381,48 +381,57 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
#ifndef __HAVE_PFNMAP_TRACKING #ifndef __HAVE_PFNMAP_TRACKING
/* /*
* Interface that can be used by architecture code to keep track of * Interfaces that can be used by architecture code to keep track of
* memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) * memory type of pfn mappings specified by the remap_pfn_range,
* * vm_insert_pfn.
* track_pfn_vma_new is called when a _new_ pfn mapping is being established */
* for physical range indicated by pfn and size.
/*
* track_pfn_remap is called when a _new_ pfn mapping is being established
* by remap_pfn_range() for physical range indicated by pfn and size.
*/ */
static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long size) unsigned long pfn, unsigned long size)
{ {
return 0; return 0;
} }
/* /*
* Interface that can be used by architecture code to keep track of * track_pfn_insert is called when a _new_ single pfn is established
* memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) * by vm_insert_pfn().
* */
* track_pfn_vma_copy is called when vma that is covering the pfnmap gets static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn)
{
return 0;
}
/*
* track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range(). * copied through copy_page_range().
*/ */
static inline int track_pfn_vma_copy(struct vm_area_struct *vma) static inline int track_pfn_copy(struct vm_area_struct *vma)
{ {
return 0; return 0;
} }
/* /*
* Interface that can be used by architecture code to keep track of
* memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
*
* untrack_pfn_vma is called while unmapping a pfnmap for a region. * untrack_pfn_vma is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or * untrack can be called for a specific region indicated by pfn and size or
* can be for the entire vma (in which case size can be zero). * can be for the entire vma (in which case pfn, size are zero).
*/ */
static inline void untrack_pfn_vma(struct vm_area_struct *vma, static inline void untrack_pfn(struct vm_area_struct *vma,
unsigned long pfn, unsigned long size) unsigned long pfn, unsigned long size)
{ {
} }
#else #else
extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long size); unsigned long pfn, unsigned long size);
extern int track_pfn_vma_copy(struct vm_area_struct *vma); extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, unsigned long pfn);
unsigned long size); extern int track_pfn_copy(struct vm_area_struct *vma);
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size);
#endif #endif
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
......
...@@ -1060,7 +1060,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -1060,7 +1060,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* We do not free on error cases below as remove_vma * We do not free on error cases below as remove_vma
* gets called on error from higher level routine * gets called on error from higher level routine
*/ */
ret = track_pfn_vma_copy(vma); ret = track_pfn_copy(vma);
if (ret) if (ret)
return ret; return ret;
} }
...@@ -1328,7 +1328,7 @@ static void unmap_single_vma(struct mmu_gather *tlb, ...@@ -1328,7 +1328,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
uprobe_munmap(vma, start, end); uprobe_munmap(vma, start, end);
if (unlikely(is_pfn_mapping(vma))) if (unlikely(is_pfn_mapping(vma)))
untrack_pfn_vma(vma, 0, 0); untrack_pfn(vma, 0, 0);
if (start != end) { if (start != end) {
if (unlikely(is_vm_hugetlb_page(vma))) { if (unlikely(is_vm_hugetlb_page(vma))) {
...@@ -2162,14 +2162,11 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, ...@@ -2162,14 +2162,11 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
if (addr < vma->vm_start || addr >= vma->vm_end) if (addr < vma->vm_start || addr >= vma->vm_end)
return -EFAULT; return -EFAULT;
if (track_pfn_vma_new(vma, &pgprot, pfn, PAGE_SIZE)) if (track_pfn_insert(vma, &pgprot, pfn))
return -EINVAL; return -EINVAL;
ret = insert_pfn(vma, addr, pfn, pgprot); ret = insert_pfn(vma, addr, pfn, pgprot);
if (ret)
untrack_pfn_vma(vma, pfn, PAGE_SIZE);
return ret; return ret;
} }
EXPORT_SYMBOL(vm_insert_pfn); EXPORT_SYMBOL(vm_insert_pfn);
...@@ -2311,7 +2308,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, ...@@ -2311,7 +2308,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size)); err = track_pfn_remap(vma, &prot, pfn, PAGE_ALIGN(size));
if (err) { if (err) {
/* /*
* To indicate that track_pfn related cleanup is not * To indicate that track_pfn related cleanup is not
...@@ -2335,7 +2332,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, ...@@ -2335,7 +2332,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
if (err) if (err)
untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size)); untrack_pfn(vma, pfn, PAGE_ALIGN(size));
return err; return err;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment