Commit 9da3da66 authored by Chris Wilson's avatar Chris Wilson Committed by Daniel Vetter
Browse files

drm/i915: Replace the array of pages with a scatterlist



Rather than have multiple data structures for describing our page layout
in conjunction with the array of pages, we can migrate all users over to
a scatterlist.

One major advantage, other than unifying the page tracking structures,
this offers is that we replace the vmalloc'ed array (which can be up to
a megabyte in size) with a chain of individual pages which helps reduce
memory pressure.

The disadvantage is that we then do not have a simple array to iterate,
or to access randomly. The common case for this is in the relocation
processing, which will typically fit within a single scatterlist page
and so be almost the same cost as the simple array. For iterating over
the array, the extra function call could be optimised away, but in
reality is an insignificant cost of either binding the pages, or
performing the pwrite/pread.

v2: Fix drm_clflush_sg() to not invoke wbinvd as well! And fix the
trivial compile error from rebasing.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent f60d7f0c
......@@ -84,40 +84,33 @@ static struct _intel_private {
#define IS_IRONLAKE intel_private.driver->is_ironlake
#define HAS_PGTBL_EN intel_private.driver->has_pgtbl_enable
int intel_gtt_map_memory(struct page **pages, unsigned int num_entries,
struct scatterlist **sg_list, int *num_sg)
static int intel_gtt_map_memory(struct page **pages,
unsigned int num_entries,
struct sg_table *st)
{
struct sg_table st;
struct scatterlist *sg;
int i;
if (*sg_list)
return 0; /* already mapped (for e.g. resume */
DBG("try mapping %lu pages\n", (unsigned long)num_entries);
if (sg_alloc_table(&st, num_entries, GFP_KERNEL))
if (sg_alloc_table(st, num_entries, GFP_KERNEL))
goto err;
*sg_list = sg = st.sgl;
for (i = 0 ; i < num_entries; i++, sg = sg_next(sg))
for_each_sg(st->sgl, sg, num_entries, i)
sg_set_page(sg, pages[i], PAGE_SIZE, 0);
*num_sg = pci_map_sg(intel_private.pcidev, *sg_list,
num_entries, PCI_DMA_BIDIRECTIONAL);
if (unlikely(!*num_sg))
if (!pci_map_sg(intel_private.pcidev,
st->sgl, st->nents, PCI_DMA_BIDIRECTIONAL))
goto err;
return 0;
err:
sg_free_table(&st);
sg_free_table(st);
return -ENOMEM;
}
EXPORT_SYMBOL(intel_gtt_map_memory);
void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
static void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
{
struct sg_table st;
DBG("try unmapping %lu pages\n", (unsigned long)mem->page_count);
......@@ -130,7 +123,6 @@ void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
sg_free_table(&st);
}
EXPORT_SYMBOL(intel_gtt_unmap_memory);
static void intel_fake_agp_enable(struct agp_bridge_data *bridge, u32 mode)
{
......@@ -879,8 +871,7 @@ static bool i830_check_flags(unsigned int flags)
return false;
}
void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
unsigned int sg_len,
void intel_gtt_insert_sg_entries(struct sg_table *st,
unsigned int pg_start,
unsigned int flags)
{
......@@ -892,12 +883,11 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
/* sg may merge pages, but we have to separate
* per-page addr for GTT */
for_each_sg(sg_list, sg, sg_len, i) {
for_each_sg(st->sgl, sg, st->nents, i) {
len = sg_dma_len(sg) >> PAGE_SHIFT;
for (m = 0; m < len; m++) {
dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
intel_private.driver->write_entry(addr,
j, flags);
intel_private.driver->write_entry(addr, j, flags);
j++;
}
}
......@@ -905,8 +895,10 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
}
EXPORT_SYMBOL(intel_gtt_insert_sg_entries);
void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
struct page **pages, unsigned int flags)
static void intel_gtt_insert_pages(unsigned int first_entry,
unsigned int num_entries,
struct page **pages,
unsigned int flags)
{
int i, j;
......@@ -917,7 +909,6 @@ void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
}
readl(intel_private.gtt+j-1);
}
EXPORT_SYMBOL(intel_gtt_insert_pages);
static int intel_fake_agp_insert_entries(struct agp_memory *mem,
off_t pg_start, int type)
......@@ -953,13 +944,15 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
global_cache_flush();
if (intel_private.base.needs_dmar) {
ret = intel_gtt_map_memory(mem->pages, mem->page_count,
&mem->sg_list, &mem->num_sg);
struct sg_table st;
ret = intel_gtt_map_memory(mem->pages, mem->page_count, &st);
if (ret != 0)
return ret;
intel_gtt_insert_sg_entries(mem->sg_list, mem->num_sg,
pg_start, type);
intel_gtt_insert_sg_entries(&st, pg_start, type);
mem->sg_list = st.sgl;
mem->num_sg = st.nents;
} else
intel_gtt_insert_pages(pg_start, mem->page_count, mem->pages,
type);
......
......@@ -100,6 +100,31 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
}
EXPORT_SYMBOL(drm_clflush_pages);
void
drm_clflush_sg(struct sg_table *st)
{
#if defined(CONFIG_X86)
if (cpu_has_clflush) {
struct scatterlist *sg;
int i;
mb();
for_each_sg(st->sgl, sg, st->nents, i)
drm_clflush_page(sg_page(sg));
mb();
return;
}
if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0)
printk(KERN_ERR "Timed out waiting for cache flush.\n");
#else
printk(KERN_ERR "Architecture has no drm_cache.c support\n");
WARN_ON_ONCE(1);
#endif
}
EXPORT_SYMBOL(drm_clflush_sg);
void
drm_clflush_virt_range(char *addr, unsigned long length)
{
......
......@@ -1006,16 +1006,11 @@ struct drm_i915_gem_object {
unsigned int has_aliasing_ppgtt_mapping:1;
unsigned int has_global_gtt_mapping:1;
unsigned int has_dma_mapping:1;
struct page **pages;
struct sg_table *pages;
int pages_pin_count;
/**
* DMAR support
*/
struct scatterlist *sg_list;
int num_sg;
/* prime dma-buf support */
struct sg_table *sg_table;
void *dma_buf_vmapping;
......@@ -1342,6 +1337,15 @@ void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
void i915_gem_lastclose(struct drm_device *dev);
int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
{
struct scatterlist *sg = obj->pages->sgl;
while (n >= SG_MAX_SINGLE_ALLOC) {
sg = sg_chain_ptr(sg + SG_MAX_SINGLE_ALLOC - 1);
n -= SG_MAX_SINGLE_ALLOC - 1;
}
return sg_page(sg+n);
}
static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
{
BUG_ON(obj->pages == NULL);
......
......@@ -411,6 +411,8 @@ i915_gem_shmem_pread(struct drm_device *dev,
int hit_slowpath = 0;
int prefaulted = 0;
int needs_clflush = 0;
struct scatterlist *sg;
int i;
user_data = (char __user *) (uintptr_t) args->data_ptr;
remain = args->size;
......@@ -439,9 +441,15 @@ i915_gem_shmem_pread(struct drm_device *dev,
offset = args->offset;
while (remain > 0) {
for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
struct page *page;
if (i < offset >> PAGE_SHIFT)
continue;
if (remain <= 0)
break;
/* Operation in this page
*
* shmem_page_offset = offset within page in shmem file
......@@ -452,7 +460,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
if ((shmem_page_offset + page_length) > PAGE_SIZE)
page_length = PAGE_SIZE - shmem_page_offset;
page = obj->pages[offset >> PAGE_SHIFT];
page = sg_page(sg);
page_do_bit17_swizzling = obj_do_bit17_swizzling &&
(page_to_phys(page) & (1 << 17)) != 0;
......@@ -731,6 +739,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
int hit_slowpath = 0;
int needs_clflush_after = 0;
int needs_clflush_before = 0;
int i;
struct scatterlist *sg;
user_data = (char __user *) (uintptr_t) args->data_ptr;
remain = args->size;
......@@ -765,10 +775,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
offset = args->offset;
obj->dirty = 1;
while (remain > 0) {
for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
struct page *page;
int partial_cacheline_write;
if (i < offset >> PAGE_SHIFT)
continue;
if (remain <= 0)
break;
/* Operation in this page
*
* shmem_page_offset = offset within page in shmem file
......@@ -787,7 +803,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
((shmem_page_offset | page_length)
& (boot_cpu_data.x86_clflush_size - 1));
page = obj->pages[offset >> PAGE_SHIFT];
page = sg_page(sg);
page_do_bit17_swizzling = obj_do_bit17_swizzling &&
(page_to_phys(page) & (1 << 17)) != 0;
......@@ -1633,6 +1649,7 @@ static void
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
{
int page_count = obj->base.size / PAGE_SIZE;
struct scatterlist *sg;
int ret, i;
BUG_ON(obj->madv == __I915_MADV_PURGED);
......@@ -1653,19 +1670,21 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
if (obj->madv == I915_MADV_DONTNEED)
obj->dirty = 0;
for (i = 0; i < page_count; i++) {
for_each_sg(obj->pages->sgl, sg, page_count, i) {
struct page *page = sg_page(sg);
if (obj->dirty)
set_page_dirty(obj->pages[i]);
set_page_dirty(page);
if (obj->madv == I915_MADV_WILLNEED)
mark_page_accessed(obj->pages[i]);
mark_page_accessed(page);
page_cache_release(obj->pages[i]);
page_cache_release(page);
}
obj->dirty = 0;
drm_free_large(obj->pages);
obj->pages = NULL;
sg_free_table(obj->pages);
kfree(obj->pages);
}
static int
......@@ -1682,6 +1701,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
return -EBUSY;
ops->put_pages(obj);
obj->pages = NULL;
list_del(&obj->gtt_list);
if (i915_gem_object_is_purgeable(obj))
......@@ -1739,6 +1759,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
int page_count, i;
struct address_space *mapping;
struct sg_table *st;
struct scatterlist *sg;
struct page *page;
gfp_t gfp;
......@@ -1749,20 +1771,27 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
/* Get the list of pages out of our struct file. They'll be pinned
* at this point until we release them.
*/
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (st == NULL)
return -ENOMEM;
page_count = obj->base.size / PAGE_SIZE;
obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
if (obj->pages == NULL)
if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
sg_free_table(st);
kfree(st);
return -ENOMEM;
}
/* Fail silently without starting the shrinker */
/* Get the list of pages out of our struct file. They'll be pinned
* at this point until we release them.
*
* Fail silently without starting the shrinker
*/
mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
gfp = mapping_gfp_mask(mapping);
gfp |= __GFP_NORETRY | __GFP_NOWARN;
gfp &= ~(__GFP_IO | __GFP_WAIT);
for (i = 0; i < page_count; i++) {
for_each_sg(st->sgl, sg, page_count, i) {
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
if (IS_ERR(page)) {
i915_gem_purge(dev_priv, page_count);
......@@ -1785,20 +1814,20 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
gfp &= ~(__GFP_IO | __GFP_WAIT);
}
obj->pages[i] = page;
sg_set_page(sg, page, PAGE_SIZE, 0);
}
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_do_bit_17_swizzle(obj);
obj->pages = st;
return 0;
err_pages:
while (i--)
page_cache_release(obj->pages[i]);
drm_free_large(obj->pages);
obj->pages = NULL;
for_each_sg(st->sgl, sg, i, page_count)
page_cache_release(sg_page(sg));
sg_free_table(st);
kfree(st);
return PTR_ERR(page);
}
......@@ -2981,7 +3010,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
trace_i915_gem_object_clflush(obj);
drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
drm_clflush_sg(obj->pages);
}
/** Flushes the GTT write domain for the object if it's dirty. */
......@@ -3731,6 +3760,8 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
i915_gem_object_put_pages(obj);
i915_gem_object_free_mmap_offset(obj);
BUG_ON(obj->pages);
drm_gem_object_release(&obj->base);
i915_gem_info_remove_obj(dev_priv, obj->base.size);
......
......@@ -28,33 +28,57 @@
#include <linux/dma-buf.h>
static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment,
enum dma_data_direction dir)
enum dma_data_direction dir)
{
struct drm_i915_gem_object *obj = attachment->dmabuf->priv;
struct drm_device *dev = obj->base.dev;
int npages = obj->base.size / PAGE_SIZE;
struct sg_table *sg;
int ret;
int nents;
struct sg_table *st;
struct scatterlist *src, *dst;
int ret, i;
ret = i915_mutex_lock_interruptible(dev);
ret = i915_mutex_lock_interruptible(obj->base.dev);
if (ret)
return ERR_PTR(ret);
ret = i915_gem_object_get_pages(obj);
if (ret) {
sg = ERR_PTR(ret);
st = ERR_PTR(ret);
goto out;
}
/* Copy sg so that we make an independent mapping */
st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
if (st == NULL) {
st = ERR_PTR(-ENOMEM);
goto out;
}
ret = sg_alloc_table(st, obj->pages->nents, GFP_KERNEL);
if (ret) {
kfree(st);
st = ERR_PTR(ret);
goto out;
}
src = obj->pages->sgl;
dst = st->sgl;
for (i = 0; i < obj->pages->nents; i++) {
sg_set_page(dst, sg_page(src), PAGE_SIZE, 0);
dst = sg_next(dst);
src = sg_next(src);
}
if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) {
sg_free_table(st);
kfree(st);
st = ERR_PTR(-ENOMEM);
goto out;
}
/* link the pages into an SG then map the sg */
sg = drm_prime_pages_to_sg(obj->pages, npages);
nents = dma_map_sg(attachment->dev, sg->sgl, sg->nents, dir);
i915_gem_object_pin_pages(obj);
out:
mutex_unlock(&dev->struct_mutex);
return sg;
mutex_unlock(&obj->base.dev->struct_mutex);
return st;
}
static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
......@@ -80,7 +104,9 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
{
struct drm_i915_gem_object *obj = dma_buf->priv;
struct drm_device *dev = obj->base.dev;
int ret;
struct scatterlist *sg;
struct page **pages;
int ret, i;
ret = i915_mutex_lock_interruptible(dev);
if (ret)
......@@ -92,22 +118,33 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
}
ret = i915_gem_object_get_pages(obj);
if (ret) {
mutex_unlock(&dev->struct_mutex);
return ERR_PTR(ret);
}
if (ret)
goto error;
obj->dma_buf_vmapping = vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL);
if (!obj->dma_buf_vmapping) {
DRM_ERROR("failed to vmap object\n");
goto out_unlock;
}
ret = -ENOMEM;
pages = drm_malloc_ab(obj->pages->nents, sizeof(struct page *));
if (pages == NULL)
goto error;
for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i)
pages[i] = sg_page(sg);
obj->dma_buf_vmapping = vmap(pages, obj->pages->nents, 0, PAGE_KERNEL);
drm_free_large(pages);
if (!obj->dma_buf_vmapping)
goto error;
obj->vmapping_count = 1;
i915_gem_object_pin_pages(obj);
out_unlock:
mutex_unlock(&dev->struct_mutex);
return obj->dma_buf_vmapping;
error:
mutex_unlock(&dev->struct_mutex);
return ERR_PTR(ret);
}
static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
......@@ -184,22 +221,19 @@ static const struct dma_buf_ops i915_dmabuf_ops = {
};
struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gem_obj, int flags)
struct drm_gem_object *gem_obj, int flags)
{
struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
return dma_buf_export(obj, &i915_dmabuf_ops,
obj->base.size, 0600);
return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, 0600);
}
struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf)
struct dma_buf *dma_buf)
{
struct dma_buf_attachment *attach;
struct sg_table *sg;
struct drm_i915_gem_object *obj;
int npages;
int size;
int ret;
/* is this one of own objects? */
......@@ -223,21 +257,19 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
goto fail_detach;
}
size = dma_buf->size;
npages = size / PAGE_SIZE;
obj = kzalloc(sizeof(*obj), GFP_KERNEL);
if (obj == NULL) {
ret = -ENOMEM;
goto fail_unmap;
}
ret = drm_gem_private_object_init(dev, &obj->base, size);
ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
if (ret) {
kfree(obj);
goto fail_unmap;
}
obj->has_dma_mapping = true;
obj->sg_table = sg;
obj->base.import_attach = attach;
......@@ -249,3 +281,4 @@ fail_detach:
dma_buf_detach(dma_buf, attach);
return ERR_PTR(ret);
}
......@@ -210,7 +210,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
if (ret)
return ret;
vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
vaddr = kmap_atomic(i915_gem_object_get_page(obj,
reloc->offset >> PAGE_SHIFT));
*(uint32_t *)(vaddr + page_offset) = reloc->delta;
kunmap_atomic(vaddr);
} else {
......
......@@ -167,8 +167,7 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
}
static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
struct scatterlist *sg_list,
unsigned sg_len,
const struct sg_table *pages,
unsigned first_entry,
uint32_t pte_flags)
{
......@@ -180,12 +179,12 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
struct scatterlist *sg;
/* init sg walking */
sg = sg_list;
sg = pages->sgl;
i = 0;
segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
m = 0;
while (i < sg_len) {
while (i < pages->nents) {
pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
......@@ -194,13 +193,11 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
pt_vaddr[j] = pte | pte_flags;
/* grab the next page */
m++;
if (m == segment_len) {
sg = sg_next(sg);
i++;
if (i == sg_len)
if (++m == segment_len) {
if (++i == pages