intel_ringbuffer.c 38.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
/*
 * Copyright © 2008-2010 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *    Zou Nan hai <nanhai.zou@intel.com>
 *    Xiang Hai hao<haihao.xiang@intel.com>
 *
 */

#include "drmP.h"
#include "drm.h"
#include "i915_drv.h"
33
#include "i915_drm.h"
34
#include "i915_trace.h"
35
#include "intel_drv.h"
36

37
38
39
40
41
42
43
44
45
46
/*
 * 965+ support PIPE_CONTROL commands, which provide finer grained control
 * over cache flushing.
 */
struct pipe_control {
	struct drm_i915_gem_object *obj;
	volatile u32 *cpu_page;
	u32 gtt_offset;
};

47
48
49
50
51
52
53
54
static inline int ring_space(struct intel_ring_buffer *ring)
{
	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
	if (space < 0)
		space += ring->size;
	return space;
}

55
static int
56
render_ring_flush(struct intel_ring_buffer *ring,
57
58
		  u32	invalidate_domains,
		  u32	flush_domains)
59
{
60
	struct drm_device *dev = ring->dev;
61
	u32 cmd;
62
	int ret;
63

64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
	/*
	 * read/write caches:
	 *
	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
	 * also flushed at 2d versus 3d pipeline switches.
	 *
	 * read-only caches:
	 *
	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
	 * MI_READ_FLUSH is set, and is always flushed on 965.
	 *
	 * I915_GEM_DOMAIN_COMMAND may not exist?
	 *
	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
	 * invalidated when MI_EXE_FLUSH is set.
	 *
	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
	 * invalidated with every MI_FLUSH.
	 *
	 * TLBs:
	 *
	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
	 * are flushed at any MI_FLUSH.
	 */

	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
	if ((invalidate_domains|flush_domains) &
	    I915_GEM_DOMAIN_RENDER)
		cmd &= ~MI_NO_WRITE_FLUSH;
	if (INTEL_INFO(dev)->gen < 4) {
97
		/*
98
99
		 * On the 965, the sampler cache always gets flushed
		 * and this bit is reserved.
100
		 */
101
102
103
104
105
		if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
			cmd |= MI_READ_FLUSH;
	}
	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
		cmd |= MI_EXE_FLUSH;
106

107
108
109
	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
	    (IS_G4X(dev) || IS_GEN5(dev)))
		cmd |= MI_INVALIDATE_ISP;
110

111
112
113
	ret = intel_ring_begin(ring, 2);
	if (ret)
		return ret;
114

115
116
117
	intel_ring_emit(ring, cmd);
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);
118
119

	return 0;
120
121
}

122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
/**
 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
 * implementing two workarounds on gen6.  From section 1.4.7.1
 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
 *
 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
 * produced by non-pipelined state commands), software needs to first
 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
 * 0.
 *
 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
 *
 * And the workaround for these two requires this workaround first:
 *
 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
 * BEFORE the pipe-control with a post-sync op and no write-cache
 * flushes.
 *
 * And this last workaround is tricky because of the requirements on
 * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
 * volume 2 part 1:
 *
 *     "1 of the following must also be set:
 *      - Render Target Cache Flush Enable ([12] of DW1)
 *      - Depth Cache Flush Enable ([0] of DW1)
 *      - Stall at Pixel Scoreboard ([1] of DW1)
 *      - Depth Stall ([13] of DW1)
 *      - Post-Sync Operation ([13] of DW1)
 *      - Notify Enable ([8] of DW1)"
 *
 * The cache flushes require the workaround flush that triggered this
 * one, so we can't use it.  Depth stall would trigger the same.
 * Post-sync nonzero is what triggered this second workaround, so we
 * can't use that one either.  Notify enable is IRQs, which aren't
 * really our business.  That leaves only stall at scoreboard.
 */
static int
intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
{
	struct pipe_control *pc = ring->private;
	u32 scratch_addr = pc->gtt_offset + 128;
	int ret;


	ret = intel_ring_begin(ring, 6);
	if (ret)
		return ret;

	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
			PIPE_CONTROL_STALL_AT_SCOREBOARD);
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
	intel_ring_emit(ring, 0); /* low dword */
	intel_ring_emit(ring, 0); /* high dword */
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);

	ret = intel_ring_begin(ring, 6);
	if (ret)
		return ret;

	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
	intel_ring_emit(ring, 0);
	intel_ring_emit(ring, 0);
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);

	return 0;
}

static int
gen6_render_ring_flush(struct intel_ring_buffer *ring,
                         u32 invalidate_domains, u32 flush_domains)
{
	u32 flags = 0;
	struct pipe_control *pc = ring->private;
	u32 scratch_addr = pc->gtt_offset + 128;
	int ret;

	/* Force SNB workarounds for PIPE_CONTROL flushes */
	intel_emit_post_sync_nonzero_flush(ring);

	/* Just flush everything.  Experiments have shown that reducing the
	 * number of bits based on the write domains has little performance
	 * impact.
	 */
	flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
	flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;

	ret = intel_ring_begin(ring, 6);
	if (ret)
		return ret;

	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
	intel_ring_emit(ring, flags);
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
	intel_ring_emit(ring, 0); /* lower dword */
	intel_ring_emit(ring, 0); /* uppwer dword */
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);

	return 0;
}

234
static void ring_write_tail(struct intel_ring_buffer *ring,
235
			    u32 value)
236
{
237
	drm_i915_private_t *dev_priv = ring->dev->dev_private;
238
	I915_WRITE_TAIL(ring, value);
239
240
}

241
u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
242
{
243
244
	drm_i915_private_t *dev_priv = ring->dev->dev_private;
	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
Daniel Vetter's avatar
Daniel Vetter committed
245
			RING_ACTHD(ring->mmio_base) : ACTHD;
246
247
248
249

	return I915_READ(acthd_reg);
}

250
static int init_ring_common(struct intel_ring_buffer *ring)
251
{
252
	drm_i915_private_t *dev_priv = ring->dev->dev_private;
253
	struct drm_i915_gem_object *obj = ring->obj;
254
255
256
	u32 head;

	/* Stop the ring if it's running. */
257
	I915_WRITE_CTL(ring, 0);
258
	I915_WRITE_HEAD(ring, 0);
259
	ring->write_tail(ring, 0);
260
261

	/* Initialize the ring. */
262
	I915_WRITE_START(ring, obj->gtt_offset);
263
	head = I915_READ_HEAD(ring) & HEAD_ADDR;
264
265
266

	/* G45 ring initialization fails to reset head to zero */
	if (head != 0) {
267
268
269
270
271
272
273
		DRM_DEBUG_KMS("%s head not reset to zero "
			      "ctl %08x head %08x tail %08x start %08x\n",
			      ring->name,
			      I915_READ_CTL(ring),
			      I915_READ_HEAD(ring),
			      I915_READ_TAIL(ring),
			      I915_READ_START(ring));
274

275
		I915_WRITE_HEAD(ring, 0);
276

277
278
279
280
281
282
283
284
285
		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
			DRM_ERROR("failed to set %s head to zero "
				  "ctl %08x head %08x tail %08x start %08x\n",
				  ring->name,
				  I915_READ_CTL(ring),
				  I915_READ_HEAD(ring),
				  I915_READ_TAIL(ring),
				  I915_READ_START(ring));
		}
286
287
	}

288
	I915_WRITE_CTL(ring,
289
			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
290
			| RING_VALID);
291
292

	/* If the head is still not zero, the ring is dead */
293
	if ((I915_READ_CTL(ring) & RING_VALID) == 0 ||
294
	    I915_READ_START(ring) != obj->gtt_offset ||
295
	    (I915_READ_HEAD(ring) & HEAD_ADDR) != 0) {
296
297
298
299
300
301
302
303
		DRM_ERROR("%s initialization failed "
				"ctl %08x head %08x tail %08x start %08x\n",
				ring->name,
				I915_READ_CTL(ring),
				I915_READ_HEAD(ring),
				I915_READ_TAIL(ring),
				I915_READ_START(ring));
		return -EIO;
304
305
	}

306
307
	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
		i915_kernel_lost_context(ring->dev);
308
	else {
309
		ring->head = I915_READ_HEAD(ring);
310
		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
311
		ring->space = ring_space(ring);
312
	}
313

314
315
316
	return 0;
}

317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
static int
init_pipe_control(struct intel_ring_buffer *ring)
{
	struct pipe_control *pc;
	struct drm_i915_gem_object *obj;
	int ret;

	if (ring->private)
		return 0;

	pc = kmalloc(sizeof(*pc), GFP_KERNEL);
	if (!pc)
		return -ENOMEM;

	obj = i915_gem_alloc_object(ring->dev, 4096);
	if (obj == NULL) {
		DRM_ERROR("Failed to allocate seqno page\n");
		ret = -ENOMEM;
		goto err;
	}
337
338

	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379

	ret = i915_gem_object_pin(obj, 4096, true);
	if (ret)
		goto err_unref;

	pc->gtt_offset = obj->gtt_offset;
	pc->cpu_page =  kmap(obj->pages[0]);
	if (pc->cpu_page == NULL)
		goto err_unpin;

	pc->obj = obj;
	ring->private = pc;
	return 0;

err_unpin:
	i915_gem_object_unpin(obj);
err_unref:
	drm_gem_object_unreference(&obj->base);
err:
	kfree(pc);
	return ret;
}

static void
cleanup_pipe_control(struct intel_ring_buffer *ring)
{
	struct pipe_control *pc = ring->private;
	struct drm_i915_gem_object *obj;

	if (!ring->private)
		return;

	obj = pc->obj;
	kunmap(obj->pages[0]);
	i915_gem_object_unpin(obj);
	drm_gem_object_unreference(&obj->base);

	kfree(pc);
	ring->private = NULL;
}

380
static int init_render_ring(struct intel_ring_buffer *ring)
381
{
382
	struct drm_device *dev = ring->dev;
383
	struct drm_i915_private *dev_priv = dev->dev_private;
384
	int ret = init_ring_common(ring);
385

386
	if (INTEL_INFO(dev)->gen > 3) {
387
		int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH;
388
		I915_WRITE(MI_MODE, mode);
389
390
391
392
		if (IS_GEN7(dev))
			I915_WRITE(GFX_MODE_GEN7,
				   GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
				   GFX_MODE_ENABLE(GFX_REPLAY_MODE));
393
	}
394

395
	if (INTEL_INFO(dev)->gen >= 5) {
396
397
398
399
400
		ret = init_pipe_control(ring);
		if (ret)
			return ret;
	}

401
402
403
	if (INTEL_INFO(dev)->gen >= 6) {
		I915_WRITE(INSTPM,
			   INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING);
404
405
406
407
408
409
410
411

		/* From the Sandybridge PRM, volume 1 part 3, page 24:
		 * "If this bit is set, STCunit will have LRA as replacement
		 *  policy. [...] This bit must be reset.  LRA replacement
		 *  policy is not supported."
		 */
		I915_WRITE(CACHE_MODE_0,
			   CM0_STC_EVICT_DISABLE_LRA_SNB << CM0_MASK_SHIFT);
412
413
	}

414
415
416
	return ret;
}

417
418
419
420
421
422
423
424
static void render_ring_cleanup(struct intel_ring_buffer *ring)
{
	if (!ring->private)
		return;

	cleanup_pipe_control(ring);
}

425
static void
426
427
428
update_mboxes(struct intel_ring_buffer *ring,
	    u32 seqno,
	    u32 mmio_offset)
429
{
430
431
432
433
	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
			      MI_SEMAPHORE_GLOBAL_GTT |
			      MI_SEMAPHORE_REGISTER |
			      MI_SEMAPHORE_UPDATE);
434
	intel_ring_emit(ring, seqno);
435
	intel_ring_emit(ring, mmio_offset);
436
437
}

438
439
440
441
442
443
444
445
446
/**
 * gen6_add_request - Update the semaphore mailbox registers
 * 
 * @ring - ring that is adding a request
 * @seqno - return seqno stuck into the ring
 *
 * Update the mailbox registers in the *other* rings with the current seqno.
 * This acts like a signal in the canonical semaphore.
 */
447
448
static int
gen6_add_request(struct intel_ring_buffer *ring,
449
		 u32 *seqno)
450
{
451
452
	u32 mbox1_reg;
	u32 mbox2_reg;
453
454
455
456
457
458
	int ret;

	ret = intel_ring_begin(ring, 10);
	if (ret)
		return ret;

459
460
	mbox1_reg = ring->signal_mbox[0];
	mbox2_reg = ring->signal_mbox[1];
461

462
	*seqno = i915_gem_next_request_seqno(ring);
463
464
465

	update_mboxes(ring, *seqno, mbox1_reg);
	update_mboxes(ring, *seqno, mbox2_reg);
466
467
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
468
	intel_ring_emit(ring, *seqno);
469
470
471
472
473
474
	intel_ring_emit(ring, MI_USER_INTERRUPT);
	intel_ring_advance(ring);

	return 0;
}

475
476
477
478
479
480
481
482
483
484
485
/**
 * intel_ring_sync - sync the waiter to the signaller on seqno
 *
 * @waiter - ring that is waiting
 * @signaller - ring which has, or will signal
 * @seqno - seqno which the waiter will block on
 */
static int
intel_ring_sync(struct intel_ring_buffer *waiter,
		struct intel_ring_buffer *signaller,
		int ring,
486
487
488
		u32 seqno)
{
	int ret;
489
490
491
	u32 dw1 = MI_SEMAPHORE_MBOX |
		  MI_SEMAPHORE_COMPARE |
		  MI_SEMAPHORE_REGISTER;
492

493
	ret = intel_ring_begin(waiter, 4);
494
495
496
	if (ret)
		return ret;

497
498
499
500
501
	intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]);
	intel_ring_emit(waiter, seqno);
	intel_ring_emit(waiter, 0);
	intel_ring_emit(waiter, MI_NOOP);
	intel_ring_advance(waiter);
502
503
504
505

	return 0;
}

506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
/* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
int
render_ring_sync_to(struct intel_ring_buffer *waiter,
		    struct intel_ring_buffer *signaller,
		    u32 seqno)
{
	WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID);
	return intel_ring_sync(waiter,
			       signaller,
			       RCS,
			       seqno);
}

/* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
int
gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
		      struct intel_ring_buffer *signaller,
		      u32 seqno)
{
	WARN_ON(signaller->semaphore_register[VCS] == MI_SEMAPHORE_SYNC_INVALID);
	return intel_ring_sync(waiter,
			       signaller,
			       VCS,
			       seqno);
}

/* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
int
gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
		      struct intel_ring_buffer *signaller,
		      u32 seqno)
{
	WARN_ON(signaller->semaphore_register[BCS] == MI_SEMAPHORE_SYNC_INVALID);
	return intel_ring_sync(waiter,
			       signaller,
			       BCS,
			       seqno);
}



547
548
#define PIPE_CONTROL_FLUSH(ring__, addr__)					\
do {									\
549
550
	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
		 PIPE_CONTROL_DEPTH_STALL);				\
551
552
553
554
555
556
557
558
559
	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
	intel_ring_emit(ring__, 0);							\
	intel_ring_emit(ring__, 0);							\
} while (0)

static int
pc_render_add_request(struct intel_ring_buffer *ring,
		      u32 *result)
{
560
	u32 seqno = i915_gem_next_request_seqno(ring);
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
	struct pipe_control *pc = ring->private;
	u32 scratch_addr = pc->gtt_offset + 128;
	int ret;

	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
	 * incoherent with writes to memory, i.e. completely fubar,
	 * so we need to use PIPE_NOTIFY instead.
	 *
	 * However, we also need to workaround the qword write
	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
	 * memory before requesting an interrupt.
	 */
	ret = intel_ring_begin(ring, 32);
	if (ret)
		return ret;

577
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
578
579
			PIPE_CONTROL_WRITE_FLUSH |
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
580
581
582
583
584
585
586
587
588
589
590
591
592
593
	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
	intel_ring_emit(ring, seqno);
	intel_ring_emit(ring, 0);
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
	scratch_addr += 128; /* write to separate cachelines */
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
	scratch_addr += 128;
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
	scratch_addr += 128;
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
	scratch_addr += 128;
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
	scratch_addr += 128;
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
594

595
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
596
597
			PIPE_CONTROL_WRITE_FLUSH |
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
598
599
600
601
602
603
604
605
606
607
			PIPE_CONTROL_NOTIFY);
	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
	intel_ring_emit(ring, seqno);
	intel_ring_emit(ring, 0);
	intel_ring_advance(ring);

	*result = seqno;
	return 0;
}

608
609
610
611
static int
render_ring_add_request(struct intel_ring_buffer *ring,
			u32 *result)
{
612
	u32 seqno = i915_gem_next_request_seqno(ring);
613
	int ret;
614

615
616
617
	ret = intel_ring_begin(ring, 4);
	if (ret)
		return ret;
618

619
620
621
622
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
	intel_ring_emit(ring, seqno);
	intel_ring_emit(ring, MI_USER_INTERRUPT);
623
	intel_ring_advance(ring);
624

625
626
	*result = seqno;
	return 0;
627
628
}

629
630
631
632
633
634
635
636
static u32
gen6_ring_get_seqno(struct intel_ring_buffer *ring)
{
	struct drm_device *dev = ring->dev;

	/* Workaround to force correct ordering between irq and seqno writes on
	 * ivb (and maybe also on snb) by reading from a CS register (like
	 * ACTHD) before reading the status page. */
637
	if (IS_GEN6(dev) || IS_GEN7(dev))
638
639
640
641
		intel_ring_get_active_head(ring);
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
}

642
static u32
643
ring_get_seqno(struct intel_ring_buffer *ring)
644
{
645
646
647
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
}

648
649
650
651
652
653
654
static u32
pc_render_get_seqno(struct intel_ring_buffer *ring)
{
	struct pipe_control *pc = ring->private;
	return pc->cpu_page[0];
}

655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
static void
ironlake_enable_irq(drm_i915_private_t *dev_priv, u32 mask)
{
	dev_priv->gt_irq_mask &= ~mask;
	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
	POSTING_READ(GTIMR);
}

static void
ironlake_disable_irq(drm_i915_private_t *dev_priv, u32 mask)
{
	dev_priv->gt_irq_mask |= mask;
	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
	POSTING_READ(GTIMR);
}

static void
i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask)
{
	dev_priv->irq_mask &= ~mask;
	I915_WRITE(IMR, dev_priv->irq_mask);
	POSTING_READ(IMR);
}

static void
i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask)
{
	dev_priv->irq_mask |= mask;
	I915_WRITE(IMR, dev_priv->irq_mask);
	POSTING_READ(IMR);
}

687
static bool
688
render_ring_get_irq(struct intel_ring_buffer *ring)
689
{
690
	struct drm_device *dev = ring->dev;
691
	drm_i915_private_t *dev_priv = dev->dev_private;
692

693
694
695
	if (!dev->irq_enabled)
		return false;

696
	spin_lock(&ring->irq_lock);
697
	if (ring->irq_refcount++ == 0) {
698
		if (HAS_PCH_SPLIT(dev))
699
700
			ironlake_enable_irq(dev_priv,
					    GT_PIPE_NOTIFY | GT_USER_INTERRUPT);
701
702
703
		else
			i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
	}
704
	spin_unlock(&ring->irq_lock);
705
706

	return true;
707
708
}

709
static void
710
render_ring_put_irq(struct intel_ring_buffer *ring)
711
{
712
	struct drm_device *dev = ring->dev;
713
	drm_i915_private_t *dev_priv = dev->dev_private;
714

715
	spin_lock(&ring->irq_lock);
716
	if (--ring->irq_refcount == 0) {
717
		if (HAS_PCH_SPLIT(dev))
718
719
720
			ironlake_disable_irq(dev_priv,
					     GT_USER_INTERRUPT |
					     GT_PIPE_NOTIFY);
721
722
723
		else
			i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
	}
724
	spin_unlock(&ring->irq_lock);
725
726
}

727
void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
728
{
729
	struct drm_device *dev = ring->dev;
730
	drm_i915_private_t *dev_priv = ring->dev->dev_private;
731
732
733
734
735
736
737
	u32 mmio = 0;

	/* The ring status page addresses are no longer next to the rest of
	 * the ring registers as of gen7.
	 */
	if (IS_GEN7(dev)) {
		switch (ring->id) {
738
		case RCS:
739
740
			mmio = RENDER_HWS_PGA_GEN7;
			break;
741
		case BCS:
742
743
			mmio = BLT_HWS_PGA_GEN7;
			break;
744
		case VCS:
745
746
747
748
749
750
751
752
753
			mmio = BSD_HWS_PGA_GEN7;
			break;
		}
	} else if (IS_GEN6(ring->dev)) {
		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
	} else {
		mmio = RING_HWS_PGA(ring->mmio_base);
	}

754
755
	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
	POSTING_READ(mmio);
756
757
}

758
static int
759
760
761
bsd_ring_flush(struct intel_ring_buffer *ring,
	       u32     invalidate_domains,
	       u32     flush_domains)
762
{
763
764
765
766
767
768
769
770
771
772
	int ret;

	ret = intel_ring_begin(ring, 2);
	if (ret)
		return ret;

	intel_ring_emit(ring, MI_FLUSH);
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);
	return 0;
773
774
}

775
static int
776
ring_add_request(struct intel_ring_buffer *ring,
777
		 u32 *result)
778
779
{
	u32 seqno;
780
781
782
783
784
	int ret;

	ret = intel_ring_begin(ring, 4);
	if (ret)
		return ret;
785

786
	seqno = i915_gem_next_request_seqno(ring);
787

788
789
790
791
792
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
	intel_ring_emit(ring, seqno);
	intel_ring_emit(ring, MI_USER_INTERRUPT);
	intel_ring_advance(ring);
793

794
795
	*result = seqno;
	return 0;
796
797
}

798
799
800
801
static bool
gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
{
	struct drm_device *dev = ring->dev;
802
	drm_i915_private_t *dev_priv = dev->dev_private;
803
804
805
806

	if (!dev->irq_enabled)
	       return false;

807
808
809
	/* It looks like we need to prevent the gt from suspending while waiting
	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
	 * blt/bsd rings on ivb. */
810
	gen6_gt_force_wake_get(dev_priv);
811

812
	spin_lock(&ring->irq_lock);
813
	if (ring->irq_refcount++ == 0) {
814
815
816
817
		ring->irq_mask &= ~rflag;
		I915_WRITE_IMR(ring, ring->irq_mask);
		ironlake_enable_irq(dev_priv, gflag);
	}
818
	spin_unlock(&ring->irq_lock);
819
820
821
822
823
824
825
826

	return true;
}

static void
gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
{
	struct drm_device *dev = ring->dev;
827
	drm_i915_private_t *dev_priv = dev->dev_private;
828

829
	spin_lock(&ring->irq_lock);
830
	if (--ring->irq_refcount == 0) {
831
832
833
		ring->irq_mask |= rflag;
		I915_WRITE_IMR(ring, ring->irq_mask);
		ironlake_disable_irq(dev_priv, gflag);
834
	}
835
	spin_unlock(&ring->irq_lock);
836

837
	gen6_gt_force_wake_put(dev_priv);
838
839
}

840
static bool
841
bsd_ring_get_irq(struct intel_ring_buffer *ring)
842
{
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
	struct drm_device *dev = ring->dev;
	drm_i915_private_t *dev_priv = dev->dev_private;

	if (!dev->irq_enabled)
		return false;

	spin_lock(&ring->irq_lock);
	if (ring->irq_refcount++ == 0) {
		if (IS_G4X(dev))
			i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
		else
			ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
	}
	spin_unlock(&ring->irq_lock);

	return true;
859
860
861
862
}
static void
bsd_ring_put_irq(struct intel_ring_buffer *ring)
{
863
864
865
866
867
868
869
870
871
872
873
	struct drm_device *dev = ring->dev;
	drm_i915_private_t *dev_priv = dev->dev_private;

	spin_lock(&ring->irq_lock);
	if (--ring->irq_refcount == 0) {
		if (IS_G4X(dev))
			i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
		else
			ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
	}
	spin_unlock(&ring->irq_lock);
874
875
876
}

static int
877
ring_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length)
878
{
879
	int ret;
880

881
882
883
884
	ret = intel_ring_begin(ring, 2);
	if (ret)
		return ret;

885
	intel_ring_emit(ring,
886
			MI_BATCH_BUFFER_START | (2 << 6) |
887
			MI_BATCH_NON_SECURE_I965);
888
	intel_ring_emit(ring, offset);
889
890
	intel_ring_advance(ring);

891
892
893
	return 0;
}

894
static int
895
render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
896
				u32 offset, u32 len)
897
{
898
	struct drm_device *dev = ring->dev;
899
	int ret;
900

901
902
903
904
	if (IS_I830(dev) || IS_845G(dev)) {
		ret = intel_ring_begin(ring, 4);
		if (ret)
			return ret;
905

906
907
908
909
910
911
912
913
		intel_ring_emit(ring, MI_BATCH_BUFFER);
		intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
		intel_ring_emit(ring, offset + len - 8);
		intel_ring_emit(ring, 0);
	} else {
		ret = intel_ring_begin(ring, 2);
		if (ret)
			return ret;
914

915
916
917
918
919
		if (INTEL_INFO(dev)->gen >= 4) {
			intel_ring_emit(ring,
					MI_BATCH_BUFFER_START | (2 << 6) |
					MI_BATCH_NON_SECURE_I965);
			intel_ring_emit(ring, offset);
920
		} else {
921
922
923
			intel_ring_emit(ring,
					MI_BATCH_BUFFER_START | (2 << 6));
			intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
924
925
		}
	}
926
	intel_ring_advance(ring);
927
928
929
930

	return 0;
}

931
static void cleanup_status_page(struct intel_ring_buffer *ring)
932
{
933
	drm_i915_private_t *dev_priv = ring->dev->dev_private;
934
	struct drm_i915_gem_object *obj;
935

936
937
	obj = ring->status_page.obj;
	if (obj == NULL)
938
939
		return;

940
	kunmap(obj->pages[0]);
941
	i915_gem_object_unpin(obj);
942
	drm_gem_object_unreference(&obj->base);
943
	ring->status_page.obj = NULL;
944
945
946
947

	memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
}

948
static int init_status_page(struct intel_ring_buffer *ring)
949
{
950
	struct drm_device *dev = ring->dev;
951
	drm_i915_private_t *dev_priv = dev->dev_private;
952
	struct drm_i915_gem_object *obj;
953
954
955
956
957
958
959
960
	int ret;

	obj = i915_gem_alloc_object(dev, 4096);
	if (obj == NULL) {
		DRM_ERROR("Failed to allocate status page\n");
		ret = -ENOMEM;
		goto err;
	}
961
962

	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
963

964
	ret = i915_gem_object_pin(obj, 4096, true);
965
966
967
968
	if (ret != 0) {
		goto err_unref;
	}

969
970
	ring->status_page.gfx_addr = obj->gtt_offset;
	ring->status_page.page_addr = kmap(obj->pages[0]);
971
	if (ring->status_page.page_addr == NULL) {
972
973
974
		memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
		goto err_unpin;
	}
975
976
	ring->status_page.obj = obj;
	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
977

978
	intel_ring_setup_status_page(ring);
979
980
	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
			ring->name, ring->status_page.gfx_addr);
981
982
983
984
985
986

	return 0;

err_unpin:
	i915_gem_object_unpin(obj);
err_unref:
987
	drm_gem_object_unreference(&obj->base);
988
err:
989
	return ret;
990
991
}

992
int intel_init_ring_buffer(struct drm_device *dev,
993
			   struct intel_ring_buffer *ring)
994
{
995
	struct drm_i915_gem_object *obj;
996
997
	int ret;

998
	ring->dev = dev;
999
1000
	INIT_LIST_HEAD(&ring->active_list);
	INIT_LIST_HEAD(&ring->request_list);
1001
	INIT_LIST_HEAD(&ring->gpu_write_list);
1002

1003
	init_waitqueue_head(&ring->irq_queue);
1004
	spin_lock_init(&ring->irq_lock);
1005
	ring->irq_mask = ~0;
1006

1007
	if (I915_NEED_GFX_HWS(dev)) {
1008
		ret = init_status_page(ring);
1009
1010
1011
		if (ret)
			return ret;
	}
1012

1013
	obj = i915_gem_alloc_object(dev, ring->size);
1014
1015
	if (obj == NULL) {
		DRM_ERROR("Failed to allocate ringbuffer\n");
1016
		ret = -ENOMEM;
1017
		goto err_hws;
1018
1019
	}

1020
	ring->obj = obj;
1021

1022
	ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
1023
1024
	if (ret)
		goto err_unref;
1025

1026
	ring->map.size = ring->size;
1027
	ring->map.offset = dev->agp->base + obj->gtt_offset;
1028
1029
1030
1031
1032
1033
1034
	ring->map.type = 0;
	ring->map.flags = 0;
	ring->map.mtrr = 0;

	drm_core_ioremap_wc(&ring->map, dev);
	if (ring->map.handle == NULL) {
		DRM_ERROR("Failed to map ringbuffer.\n");
1035
		ret = -EINVAL;
1036
		goto err_unpin;
1037
1038
	}

1039
	ring->virtual_start = ring->map.handle;
1040
	ret = ring->init(ring);
1041
1042
	if (ret)
		goto err_unmap;
1043

1044
1045
1046
1047
1048
	/* Workaround an erratum on the i830 which causes a hang if
	 * the TAIL pointer points to within the last 2 cachelines
	 * of the buffer.
	 */
	ring->effective_size = ring->size;
1049
	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1050
1051
		ring->effective_size -= 128;

1052
	return 0;
1053
1054
1055
1056
1057
1058

err_unmap:
	drm_core_ioremapfree(&ring->map, dev);
err_unpin:
	i915_gem_object_unpin(obj);
err_unref:
1059
1060
	drm_gem_object_unreference(&obj->base);
	ring->obj = NULL;
1061
err_hws:
1062
	cleanup_status_page(ring);
1063
	return ret;
1064
1065
}

1066
void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1067
{
1068
1069
1070
	struct drm_i915_private *dev_priv;
	int ret;

1071
	if (ring->obj == NULL)
1072
1073
		return;

1074
1075
	/* Disable the ring buffer. The ring must be idle at this point */
	dev_priv = ring->dev->dev_private;
1076
	ret = intel_wait_ring_idle(ring);
1077
1078
1079
1080
	if (ret)
		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
			  ring->name, ret);

1081
1082
	I915_WRITE_CTL(ring, 0);

1083
	drm_core_ioremapfree(&ring->map, ring->dev);
1084

1085
1086
1087
	i915_gem_object_unpin(ring->obj);
	drm_gem_object_unreference(&ring->obj->base);
	ring->obj = NULL;
1088

Zou Nan hai's avatar
Zou Nan hai committed
1089
1090
1091
	if (ring->cleanup)
		ring->cleanup(ring);

1092
	cleanup_status_page(ring);
1093
1094
}

1095
static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1096
{
1097
	unsigned int *virt;
1098
	int rem = ring->size - ring->tail;
1099

1100
	if (ring->space < rem) {
1101
		int ret = intel_wait_ring_buffer(ring, rem);
1102
1103
1104
1105
		if (ret)
			return ret;
	}

1106
	virt = (unsigned int *)(ring->virtual_start + ring->tail);
1107
1108
	rem /= 8;
	while (rem--) {
1109
		*virt++ = MI_NOOP;
1110
1111
		*virt++ = MI_NOOP;
	}
1112

1113
	ring->tail = 0;
1114
	ring->space = ring_space(ring);
1115
1116
1117
1118

	return 0;
}

1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
{
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
	bool was_interruptible;
	int ret;

	/* XXX As we have not yet audited all the paths to check that
	 * they are ready for ERESTARTSYS from intel_ring_begin, do not
	 * allow us to be interruptible by a signal.
	 */
	was_interruptible = dev_priv->mm.interruptible;
	dev_priv->mm.interruptible = false;

	ret = i915_wait_request(ring, seqno, true);

	dev_priv->mm.interruptible = was_interruptible;

	return ret;
}

static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
{
	struct drm_i915_gem_request *request;
	u32 seqno = 0;
	int ret;

	i915_gem_retire_requests_ring(ring);

	if (ring->last_retired_head != -1) {
		ring->head = ring->last_retired_head;
		ring->last_retired_head = -1;
		ring->space = ring_space(ring);
		if (ring->space >= n)
			return 0;
	}

	list_for_each_entry(request, &ring->request_list, list) {
		int space;

		if (request->tail == -1)
			continue;

		space = request->tail - (ring->tail + 8);
		if (space < 0)
			space += ring->size;
		if (space >= n) {
			seqno = request->seqno;
			break;
		}

		/* Consume this request in case we need more space than
		 * is available and so need to prevent a race between
		 * updating last_retired_head and direct reads of
		 * I915_RING_HEAD. It also provides a nice sanity check.
		 */
		request->tail = -1;
	}

	if (seqno == 0)
		return -ENOSPC;

	ret = intel_ring_wait_seqno(ring, seqno);
	if (ret)
		return ret;

	if (WARN_ON(ring->last_retired_head == -1))
		return -ENOSPC;

	ring->head = ring->last_retired_head;
	ring->last_retired_head = -1;
	ring->space = ring_space(ring);
	if (WARN_ON(ring->space < n))
		return -ENOSPC;

	return 0;
}

1196
int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1197
{
1198
	struct drm_device *dev = ring->dev;
1199
	struct drm_i915_private *dev_priv = dev->dev_private;
1200
	unsigned long end;
1201
	int ret;
1202