Commit 20abd163 authored by Ben Skeggs's avatar Ben Skeggs
Browse files

drm/nouveau: create real execution engine for software object class



Just a cleanup more or less, and to remove the need for special handling of
software objects.

This removes a heap of documentation on dma/graph object formats.  The info
is very out of date with our current understanding, and is far better
documented in rnndb in envytools git.
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent 2cda7f4c
......@@ -18,6 +18,7 @@ nouveau-y := nouveau_drv.o nouveau_state.o nouveau_channel.o nouveau_mem.o \
nv50_fb.o nvc0_fb.o \
nv04_fifo.o nv10_fifo.o nv40_fifo.o nv50_fifo.o nvc0_fifo.o \
nve0_fifo.o \
nv04_software.o nv50_software.o nvc0_software.o \
nv04_graph.o nv10_graph.o nv20_graph.o \
nv40_graph.o nv50_graph.o nvc0_graph.o nve0_graph.o \
nv40_grctx.o nv50_grctx.o nvc0_grctx.o nve0_grctx.o \
......
......@@ -28,6 +28,7 @@
#include "nouveau_drm.h"
#include "nouveau_dma.h"
#include "nouveau_ramht.h"
#include "nouveau_software.h"
static int
nouveau_channel_pushbuf_init(struct nouveau_channel *chan)
......@@ -155,8 +156,6 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret,
}
NV_DEBUG(dev, "initialising channel %d\n", chan->id);
INIT_LIST_HEAD(&chan->nvsw.vbl_wait);
INIT_LIST_HEAD(&chan->nvsw.flip);
INIT_LIST_HEAD(&chan->fence.pending);
spin_lock_init(&chan->fence.lock);
......@@ -213,6 +212,12 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret,
OUT_RING (chan, 0x00000000);
FIRE_RING(chan);
ret = nouveau_gpuobj_gr_new(chan, NvSw, nouveau_software_class(dev));
if (ret) {
nouveau_channel_put(&chan);
return ret;
}
ret = nouveau_fence_channel_init(chan);
if (ret) {
nouveau_channel_put(&chan);
......
......@@ -33,6 +33,7 @@
#include "nouveau_crtc.h"
#include "nouveau_dma.h"
#include "nouveau_connector.h"
#include "nouveau_software.h"
#include "nouveau_gpio.h"
#include "nv50_display.h"
......@@ -432,6 +433,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
struct nouveau_page_flip_state *s,
struct nouveau_fence **pfence)
{
struct nouveau_software_chan *swch = chan->engctx[NVOBJ_ENGINE_SW];
struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
struct drm_device *dev = chan->dev;
unsigned long flags;
......@@ -439,7 +441,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
/* Queue it to the pending list */
spin_lock_irqsave(&dev->event_lock, flags);
list_add_tail(&s->head, &chan->nvsw.flip);
list_add_tail(&s->head, &swch->flip);
spin_unlock_irqrestore(&dev->event_lock, flags);
/* Synchronize with the old framebuffer */
......@@ -547,20 +549,20 @@ int
nouveau_finish_page_flip(struct nouveau_channel *chan,
struct nouveau_page_flip_state *ps)
{
struct nouveau_software_chan *swch = chan->engctx[NVOBJ_ENGINE_SW];
struct drm_device *dev = chan->dev;
struct nouveau_page_flip_state *s;
unsigned long flags;
spin_lock_irqsave(&dev->event_lock, flags);
if (list_empty(&chan->nvsw.flip)) {
if (list_empty(&swch->flip)) {
NV_ERROR(dev, "Unexpected pageflip in channel %d.\n", chan->id);
spin_unlock_irqrestore(&dev->event_lock, flags);
return -EINVAL;
}
s = list_first_entry(&chan->nvsw.flip,
struct nouveau_page_flip_state, head);
s = list_first_entry(&swch->flip, struct nouveau_page_flip_state, head);
if (s->event) {
struct drm_pending_vblank_event *e = s->event;
struct timeval now;
......
......@@ -296,14 +296,6 @@ struct nouveau_channel {
uint32_t sw_subchannel[8];
struct nouveau_vma dispc_vma[4];
struct {
struct nouveau_gpuobj *vblsem;
uint32_t vblsem_head;
uint32_t vblsem_offset;
uint32_t vblsem_rval;
struct list_head vbl_wait;
struct list_head flip;
} nvsw;
struct {
bool active;
......
......@@ -32,6 +32,7 @@
#include "nouveau_drv.h"
#include "nouveau_ramht.h"
#include "nouveau_software.h"
#include "nouveau_dma.h"
#define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10)
......@@ -503,11 +504,6 @@ nouveau_fence_channel_init(struct nouveau_channel *chan)
int ret;
if (dev_priv->card_type < NV_C0) {
/* Create an NV_SW object for various sync purposes */
ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW);
if (ret)
return ret;
ret = RING_SPACE(chan, 2);
if (ret)
return ret;
......
......@@ -35,6 +35,7 @@
#include "nouveau_drv.h"
#include "nouveau_drm.h"
#include "nouveau_ramht.h"
#include "nouveau_software.h"
#include "nouveau_vm.h"
#include "nv50_display.h"
......@@ -133,37 +134,6 @@ nouveau_gpuobj_mthd_call2(struct drm_device *dev, int chid,
return ret;
}
/* NVidia uses context objects to drive drawing operations.
Context objects can be selected into 8 subchannels in the FIFO,
and then used via DMA command buffers.
A context object is referenced by a user defined handle (CARD32). The HW
looks up graphics objects in a hash table in the instance RAM.
An entry in the hash table consists of 2 CARD32. The first CARD32 contains
the handle, the second one a bitfield, that contains the address of the
object in instance RAM.
The format of the second CARD32 seems to be:
NV4 to NV30:
15: 0 instance_addr >> 4
17:16 engine (here uses 1 = graphics)
28:24 channel id (here uses 0)
31 valid (use 1)
NV40:
15: 0 instance_addr >> 4 (maybe 19-0)
21:20 engine (here uses 1 = graphics)
I'm unsure about the other bits, but using 0 seems to work.
The key into the hash table depends on the object handle and channel id and
is given as:
*/
int
nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
uint32_t size, int align, uint32_t flags,
......@@ -361,34 +331,6 @@ nouveau_gpuobj_new_fake(struct drm_device *dev, u32 pinst, u64 vinst,
return 0;
}
/*
DMA objects are used to reference a piece of memory in the
framebuffer, PCI or AGP address space. Each object is 16 bytes big
and looks as follows:
entry[0]
11:0 class (seems like I can always use 0 here)
12 page table present?
13 page entry linear?
15:14 access: 0 rw, 1 ro, 2 wo
17:16 target: 0 NV memory, 1 NV memory tiled, 2 PCI, 3 AGP
31:20 dma adjust (bits 0-11 of the address)
entry[1]
dma limit (size of transfer)
entry[X]
1 0 readonly, 1 readwrite
31:12 dma frame address of the page (bits 12-31 of the address)
entry[N]
page table terminator, same value as the first pte, as does nvidia
rivatv uses 0xffffffff
Non linear page tables need a list of frame addresses afterwards,
the rivatv project has some info on this.
The method below creates a DMA object in instance RAM and returns a handle
to it that can be used to set up context objects.
*/
void
nv50_gpuobj_dma_init(struct nouveau_gpuobj *obj, u32 offset, int class,
u64 base, u64 size, int target, int access,
......@@ -540,82 +482,6 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class, u64 base,
return 0;
}
/* Context objects in the instance RAM have the following structure.
* On NV40 they are 32 byte long, on NV30 and smaller 16 bytes.
NV4 - NV30:
entry[0]
11:0 class
12 chroma key enable
13 user clip enable
14 swizzle enable
17:15 patch config:
scrcopy_and, rop_and, blend_and, scrcopy, srccopy_pre, blend_pre
18 synchronize enable
19 endian: 1 big, 0 little
21:20 dither mode
23 single step enable
24 patch status: 0 invalid, 1 valid
25 context_surface 0: 1 valid
26 context surface 1: 1 valid
27 context pattern: 1 valid
28 context rop: 1 valid
29,30 context beta, beta4
entry[1]
7:0 mono format
15:8 color format
31:16 notify instance address
entry[2]
15:0 dma 0 instance address
31:16 dma 1 instance address
entry[3]
dma method traps
NV40:
No idea what the exact format is. Here's what can be deducted:
entry[0]:
11:0 class (maybe uses more bits here?)
17 user clip enable
21:19 patch config
25 patch status valid ?
entry[1]:
15:0 DMA notifier (maybe 20:0)
entry[2]:
15:0 DMA 0 instance (maybe 20:0)
24 big endian
entry[3]:
15:0 DMA 1 instance (maybe 20:0)
entry[4]:
entry[5]:
set to 0?
*/
static int
nouveau_gpuobj_sw_new(struct nouveau_channel *chan, u32 handle, u16 class)
{
struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
struct nouveau_gpuobj *gpuobj;
int ret;
gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL);
if (!gpuobj)
return -ENOMEM;
gpuobj->dev = chan->dev;
gpuobj->engine = NVOBJ_ENGINE_SW;
gpuobj->class = class;
kref_init(&gpuobj->refcount);
gpuobj->cinst = 0x40;
spin_lock(&dev_priv->ramin_lock);
list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
spin_unlock(&dev_priv->ramin_lock);
ret = nouveau_ramht_insert(chan, handle, gpuobj);
nouveau_gpuobj_ref(NULL, &gpuobj);
return ret;
}
int
nouveau_gpuobj_gr_new(struct nouveau_channel *chan, u32 handle, int class)
{
......@@ -632,9 +498,6 @@ nouveau_gpuobj_gr_new(struct nouveau_channel *chan, u32 handle, int class)
if (oc->id != class)
continue;
if (oc->engine == NVOBJ_ENGINE_SW)
return nouveau_gpuobj_sw_new(chan, handle, class);
if (!chan->engctx[oc->engine]) {
ret = eng->context_new(chan, oc->engine);
if (ret)
......@@ -956,6 +819,17 @@ int nouveau_ioctl_grobj_alloc(struct drm_device *dev, void *data,
if (init->handle == ~0)
return -EINVAL;
/* compatibility with userspace that assumes 506e for all chipsets */
if (init->class == 0x506e) {
init->class = nouveau_software_class(dev);
if (init->class == 0x906e)
return 0;
} else
if (init->class == 0x906e) {
NV_ERROR(dev, "906e not supported yet\n");
return -EINVAL;
}
chan = nouveau_channel_get(file_priv, init->channel);
if (IS_ERR(chan))
return PTR_ERR(chan);
......
#ifndef __NOUVEAU_SOFTWARE_H__
#define __NOUVEAU_SOFTWARE_H__
struct nouveau_software_priv {
struct nouveau_exec_engine base;
struct list_head vblank;
};
struct nouveau_software_chan {
struct list_head flip;
struct {
struct list_head list;
struct nouveau_bo *bo;
u32 offset;
u32 value;
u32 head;
} vblank;
};
static inline void
nouveau_software_vblank(struct drm_device *dev, int crtc)
{
struct nouveau_software_priv *psw = nv_engine(dev, NVOBJ_ENGINE_SW);
struct nouveau_software_chan *pch, *tmp;
list_for_each_entry_safe(pch, tmp, &psw->vblank, vblank.list) {
if (pch->vblank.head != crtc)
continue;
nouveau_bo_wr32(pch->vblank.bo, pch->vblank.offset,
pch->vblank.value);
list_del(&pch->vblank.list);
drm_vblank_put(dev, crtc);
}
}
static inline void
nouveau_software_context_new(struct nouveau_software_chan *pch)
{
INIT_LIST_HEAD(&pch->flip);
}
static inline void
nouveau_software_create(struct nouveau_software_priv *psw)
{
INIT_LIST_HEAD(&psw->vblank);
}
static inline u16
nouveau_software_class(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
if (dev_priv->card_type <= NV_04)
return 0x006e;
if (dev_priv->card_type <= NV_40)
return 0x016e;
if (dev_priv->card_type <= NV_50)
return 0x506e;
if (dev_priv->card_type <= NV_E0)
return 0x906e;
return 0x0000;
}
int nv04_software_create(struct drm_device *);
int nv50_software_create(struct drm_device *);
int nvc0_software_create(struct drm_device *);
#endif
......@@ -39,6 +39,7 @@
#include "nouveau_gpio.h"
#include "nouveau_pm.h"
#include "nv50_display.h"
#include "nouveau_software.h"
static void nouveau_stub_takedown(struct drm_device *dev) {}
static int nouveau_stub_init(struct drm_device *dev) { return 0; }
......@@ -765,6 +766,26 @@ nouveau_card_init(struct drm_device *dev)
goto out_ttmvram;
if (!dev_priv->noaccel) {
switch (dev_priv->card_type) {
case NV_04:
case NV_10:
case NV_20:
case NV_30:
case NV_40:
nv04_software_create(dev);
break;
case NV_50:
nv50_software_create(dev);
break;
case NV_C0:
case NV_D0:
case NV_E0:
nvc0_software_create(dev);
break;
default:
break;
}
switch (dev_priv->card_type) {
case NV_04:
nv04_graph_create(dev);
......
......@@ -550,28 +550,6 @@ nv04_graph_fini(struct drm_device *dev, int engine, bool suspend)
return 0;
}
static int
nv04_graph_mthd_set_ref(struct nouveau_channel *chan,
u32 class, u32 mthd, u32 data)
{
atomic_set(&chan->fence.last_sequence_irq, data);
return 0;
}
int
nv04_graph_mthd_page_flip(struct nouveau_channel *chan,
u32 class, u32 mthd, u32 data)
{
struct drm_device *dev = chan->dev;
struct nouveau_page_flip_state s;
if (!nouveau_finish_page_flip(chan, &s))
nv_set_crtc_base(dev, s.crtc,
s.offset + s.y * s.pitch + s.x * s.bpp / 8);
return 0;
}
/*
* Software methods, why they are needed, and how they all work:
*
......@@ -1345,9 +1323,5 @@ nv04_graph_create(struct drm_device *dev)
NVOBJ_MTHD (dev, 0x005e, 0x0198, nv04_graph_mthd_bind_surf2d);
NVOBJ_MTHD (dev, 0x005e, 0x02fc, nv04_graph_mthd_set_operation);
/* nvsw */
NVOBJ_CLASS(dev, 0x506e, SW);
NVOBJ_MTHD (dev, 0x506e, 0x0150, nv04_graph_mthd_set_ref);
NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
return 0;
}
/*
* Copyright 2012 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Ben Skeggs
*/
#include "drmP.h"
#include "nouveau_drv.h"
#include "nouveau_ramht.h"
#include "nouveau_software.h"
#include "nouveau_hw.h"
struct nv04_software_priv {
struct nouveau_software_priv base;
};
struct nv04_software_chan {
struct nouveau_software_chan base;
};
static int
mthd_fence(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
{
atomic_set(&chan->fence.last_sequence_irq, data);
return 0;
}
static int
mthd_flip(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
{
struct nouveau_page_flip_state state;
if (!nouveau_finish_page_flip(chan, &state)) {
nv_set_crtc_base(chan->dev, state.crtc, state.offset +
state.y * state.pitch +
state.x * state.bpp / 8);
}
return 0;
}
static int
nv04_software_context_new(struct nouveau_channel *chan, int engine)
{
struct nv04_software_chan *pch;
pch = kzalloc(sizeof(*pch), GFP_KERNEL);
if (!pch)
return -ENOMEM;
nouveau_software_context_new(&pch->base);
atomic_set(&chan->fence.last_sequence_irq, 0);
chan->engctx[engine] = pch;
return 0;
}
static void
nv04_software_context_del(struct nouveau_channel *chan, int engine)
{
struct nv04_software_chan *pch = chan->engctx[engine];
chan->engctx[engine] = NULL;
kfree(pch);
}
static int
nv04_software_object_new(struct nouveau_channel *chan, int engine,
u32 handle, u16 class)
{
struct drm_device *dev = chan->dev;
struct nouveau_gpuobj *obj = NULL;
int ret;
ret = nouveau_gpuobj_new(dev, chan, 16, 16, 0, &obj);
if (ret)
return ret;
obj->engine = 0;
obj->class = class;
ret = nouveau_ramht_insert(chan, handle, obj);
nouveau_gpuobj_ref(NULL, &obj);
return ret;
}
static int
nv04_software_init(struct drm_device *dev, int engine)
{
return 0;
}
static int
nv04_software_fini(struct drm_device *dev, int engine, bool suspend)
{
return 0;
}
static void
nv04_software_destroy(struct drm_device *dev, int engine)
{
struct nv04_software_priv *psw = nv_engine(dev, engine);
NVOBJ_ENGINE_DEL(dev, SW);
kfree(psw);
}
int
nv04_software_create(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nv04_software_priv *psw;
psw = kzalloc(sizeof(*psw), GFP_KERNEL);
if (!psw)
return -ENOMEM;
psw->base.base.destroy = nv04_software_destroy;
psw->base.base.init = nv04_software_init;
psw->base.base.fini = nv04_software_fini;
psw->base.base.context_new = nv04_software_context_new;
psw->base.base.context_del = nv04_software_context_del;
psw->base.base.object_new = nv04_software_object_new;
nouveau_software_create(&psw->base);
NVOBJ_ENGINE_ADD(dev, SW, &psw->base.base);
if (dev_priv->card_type <= NV_04) {
NVOBJ_CLASS(dev, 0x006e, SW);
NVOBJ_MTHD (dev, 0x006e, 0x0150, mthd_fence);
NVOBJ_MTHD (dev, 0x006e, 0x0500, mthd_flip);
} else {
NVOBJ_CLASS(dev, 0x016e, SW);
NVOBJ_MTHD (dev, 0x016e, 0x0500, mthd_flip);
}
return 0;
}
......@@ -1153,10 +1153,6 @@ nv10_graph_create(struct drm_device *dev)
NVOBJ_ENGINE_ADD(dev, GR, &pgraph->base);
nouveau_irq_register(dev, 12, nv10_graph_isr);