r600_cs.c 73 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/*
 * Copyright 2008 Advanced Micro Devices, Inc.
 * Copyright 2008 Red Hat Inc.
 * Copyright 2009 Jerome Glisse.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Dave Airlie
 *          Alex Deucher
 *          Jerome Glisse
 */
28
#include <linux/kernel.h>
29
30
31
#include "drmP.h"
#include "radeon.h"
#include "r600d.h"
32
#include "r600_reg_safe.h"
33
34
35
36
37
38
39

static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
					struct radeon_cs_reloc **cs_reloc);
static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
					struct radeon_cs_reloc **cs_reloc);
typedef int (*next_reloc_t)(struct radeon_cs_parser*, struct radeon_cs_reloc**);
static next_reloc_t r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_mm;
40
41
extern void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size);

42

43
struct r600_cs_track {
44
45
46
47
48
	/* configuration we miror so that we use same code btw kms/ums */
	u32			group_size;
	u32			nbanks;
	u32			npipes;
	/* value we track */
49
	u32			sq_config;
50
	u32			log_nsamples;
51
52
53
	u32			nsamples;
	u32			cb_color_base_last[8];
	struct radeon_bo	*cb_color_bo[8];
54
	u64			cb_color_bo_mc[8];
55
56
57
58
59
60
	u64			cb_color_bo_offset[8];
	struct radeon_bo	*cb_color_frag_bo[8];
	u64			cb_color_frag_offset[8];
	struct radeon_bo	*cb_color_tile_bo[8];
	u64			cb_color_tile_offset[8];
	u32			cb_color_mask[8];
61
	u32			cb_color_info[8];
62
	u32			cb_color_view[8];
63
	u32			cb_color_size_idx[8]; /* unused */
64
	u32			cb_target_mask;
65
	u32			cb_shader_mask;  /* unused */
66
	bool			is_resolve;
67
68
69
	u32			cb_color_size[8];
	u32			vgt_strmout_en;
	u32			vgt_strmout_buffer_en;
70
	struct radeon_bo	*vgt_strmout_bo[4];
71
	u64			vgt_strmout_bo_mc[4]; /* unused */
72
73
	u32			vgt_strmout_bo_offset[4];
	u32			vgt_strmout_size[4];
74
75
76
77
78
79
80
	u32			db_depth_control;
	u32			db_depth_info;
	u32			db_depth_size_idx;
	u32			db_depth_view;
	u32			db_depth_size;
	u32			db_offset;
	struct radeon_bo	*db_bo;
81
	u64			db_bo_mc;
82
	bool			sx_misc_kill_all_prims;
83
84
85
	bool			cb_dirty;
	bool			db_dirty;
	bool			streamout_dirty;
86
87
88
	struct radeon_bo	*htile_bo;
	u64			htile_offset;
	u32			htile_surface;
89
90
};

91
92
#define FMT_8_BIT(fmt, vc)   [fmt] = { 1, 1, 1, vc, CHIP_R600 }
#define FMT_16_BIT(fmt, vc)  [fmt] = { 1, 1, 2, vc, CHIP_R600 }
93
#define FMT_24_BIT(fmt)      [fmt] = { 1, 1, 4,  0, CHIP_R600 }
94
#define FMT_32_BIT(fmt, vc)  [fmt] = { 1, 1, 4, vc, CHIP_R600 }
95
#define FMT_48_BIT(fmt)      [fmt] = { 1, 1, 8,  0, CHIP_R600 }
96
97
98
#define FMT_64_BIT(fmt, vc)  [fmt] = { 1, 1, 8, vc, CHIP_R600 }
#define FMT_96_BIT(fmt)      [fmt] = { 1, 1, 12, 0, CHIP_R600 }
#define FMT_128_BIT(fmt, vc) [fmt] = { 1, 1, 16,vc, CHIP_R600 }
99
100
101
102
103
104

struct gpu_formats {
	unsigned blockwidth;
	unsigned blockheight;
	unsigned blocksize;
	unsigned valid_color;
105
	enum radeon_family min_family;
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
};

static const struct gpu_formats color_formats_table[] = {
	/* 8 bit */
	FMT_8_BIT(V_038004_COLOR_8, 1),
	FMT_8_BIT(V_038004_COLOR_4_4, 1),
	FMT_8_BIT(V_038004_COLOR_3_3_2, 1),
	FMT_8_BIT(V_038004_FMT_1, 0),

	/* 16-bit */
	FMT_16_BIT(V_038004_COLOR_16, 1),
	FMT_16_BIT(V_038004_COLOR_16_FLOAT, 1),
	FMT_16_BIT(V_038004_COLOR_8_8, 1),
	FMT_16_BIT(V_038004_COLOR_5_6_5, 1),
	FMT_16_BIT(V_038004_COLOR_6_5_5, 1),
	FMT_16_BIT(V_038004_COLOR_1_5_5_5, 1),
	FMT_16_BIT(V_038004_COLOR_4_4_4_4, 1),
	FMT_16_BIT(V_038004_COLOR_5_5_5_1, 1),

	/* 24-bit */
	FMT_24_BIT(V_038004_FMT_8_8_8),
127

128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
	/* 32-bit */
	FMT_32_BIT(V_038004_COLOR_32, 1),
	FMT_32_BIT(V_038004_COLOR_32_FLOAT, 1),
	FMT_32_BIT(V_038004_COLOR_16_16, 1),
	FMT_32_BIT(V_038004_COLOR_16_16_FLOAT, 1),
	FMT_32_BIT(V_038004_COLOR_8_24, 1),
	FMT_32_BIT(V_038004_COLOR_8_24_FLOAT, 1),
	FMT_32_BIT(V_038004_COLOR_24_8, 1),
	FMT_32_BIT(V_038004_COLOR_24_8_FLOAT, 1),
	FMT_32_BIT(V_038004_COLOR_10_11_11, 1),
	FMT_32_BIT(V_038004_COLOR_10_11_11_FLOAT, 1),
	FMT_32_BIT(V_038004_COLOR_11_11_10, 1),
	FMT_32_BIT(V_038004_COLOR_11_11_10_FLOAT, 1),
	FMT_32_BIT(V_038004_COLOR_2_10_10_10, 1),
	FMT_32_BIT(V_038004_COLOR_8_8_8_8, 1),
	FMT_32_BIT(V_038004_COLOR_10_10_10_2, 1),
	FMT_32_BIT(V_038004_FMT_5_9_9_9_SHAREDEXP, 0),
	FMT_32_BIT(V_038004_FMT_32_AS_8, 0),
	FMT_32_BIT(V_038004_FMT_32_AS_8_8, 0),

	/* 48-bit */
	FMT_48_BIT(V_038004_FMT_16_16_16),
	FMT_48_BIT(V_038004_FMT_16_16_16_FLOAT),

	/* 64-bit */
	FMT_64_BIT(V_038004_COLOR_X24_8_32_FLOAT, 1),
	FMT_64_BIT(V_038004_COLOR_32_32, 1),
	FMT_64_BIT(V_038004_COLOR_32_32_FLOAT, 1),
	FMT_64_BIT(V_038004_COLOR_16_16_16_16, 1),
	FMT_64_BIT(V_038004_COLOR_16_16_16_16_FLOAT, 1),

	FMT_96_BIT(V_038004_FMT_32_32_32),
	FMT_96_BIT(V_038004_FMT_32_32_32_FLOAT),

	/* 128-bit */
	FMT_128_BIT(V_038004_COLOR_32_32_32_32, 1),
	FMT_128_BIT(V_038004_COLOR_32_32_32_32_FLOAT, 1),

	[V_038004_FMT_GB_GR] = { 2, 1, 4, 0 },
	[V_038004_FMT_BG_RG] = { 2, 1, 4, 0 },

	/* block compressed formats */
	[V_038004_FMT_BC1] = { 4, 4, 8, 0 },
	[V_038004_FMT_BC2] = { 4, 4, 16, 0 },
	[V_038004_FMT_BC3] = { 4, 4, 16, 0 },
	[V_038004_FMT_BC4] = { 4, 4, 8, 0 },
	[V_038004_FMT_BC5] = { 4, 4, 16, 0},
175
176
	[V_038004_FMT_BC6] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */
	[V_038004_FMT_BC7] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */
177

178
179
	/* The other Evergreen formats */
	[V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR},
180
181
};

182
bool r600_fmt_is_valid_color(u32 format)
183
{
184
	if (format >= ARRAY_SIZE(color_formats_table))
185
		return false;
186

187
188
189
190
191
192
	if (color_formats_table[format].valid_color)
		return true;

	return false;
}

193
bool r600_fmt_is_valid_texture(u32 format, enum radeon_family family)
194
{
195
	if (format >= ARRAY_SIZE(color_formats_table))
196
		return false;
197

198
199
200
	if (family < color_formats_table[format].min_family)
		return false;

201
202
203
204
205
206
	if (color_formats_table[format].blockwidth > 0)
		return true;

	return false;
}

207
int r600_fmt_get_blocksize(u32 format)
208
{
209
	if (format >= ARRAY_SIZE(color_formats_table))
210
211
212
213
214
		return 0;

	return color_formats_table[format].blocksize;
}

215
int r600_fmt_get_nblocksx(u32 format, u32 w)
216
217
{
	unsigned bw;
218
219

	if (format >= ARRAY_SIZE(color_formats_table))
220
221
222
223
224
225
226
227
228
		return 0;

	bw = color_formats_table[format].blockwidth;
	if (bw == 0)
		return 0;

	return (w + bw - 1) / bw;
}

229
int r600_fmt_get_nblocksy(u32 format, u32 h)
230
231
{
	unsigned bh;
232
233

	if (format >= ARRAY_SIZE(color_formats_table))
234
235
236
237
238
239
240
241
242
		return 0;

	bh = color_formats_table[format].blockheight;
	if (bh == 0)
		return 0;

	return (h + bh - 1) / bh;
}

243
244
245
246
247
248
struct array_mode_checker {
	int array_mode;
	u32 group_size;
	u32 nbanks;
	u32 npipes;
	u32 nsamples;
249
	u32 blocksize;
250
251
252
};

/* returns alignment in pixels for pitch/height/depth and bytes for base */
253
static int r600_get_array_mode_alignment(struct array_mode_checker *values,
254
255
256
257
258
259
260
261
262
						u32 *pitch_align,
						u32 *height_align,
						u32 *depth_align,
						u64 *base_align)
{
	u32 tile_width = 8;
	u32 tile_height = 8;
	u32 macro_tile_width = values->nbanks;
	u32 macro_tile_height = values->npipes;
263
	u32 tile_bytes = tile_width * tile_height * values->blocksize * values->nsamples;
264
265
266
267
268
269
270
271
272
273
274
	u32 macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes;

	switch (values->array_mode) {
	case ARRAY_LINEAR_GENERAL:
		/* technically tile_width/_height for pitch/height */
		*pitch_align = 1; /* tile_width */
		*height_align = 1; /* tile_height */
		*depth_align = 1;
		*base_align = 1;
		break;
	case ARRAY_LINEAR_ALIGNED:
275
		*pitch_align = max((u32)64, (u32)(values->group_size / values->blocksize));
276
		*height_align = 1;
277
278
279
280
281
282
		*depth_align = 1;
		*base_align = values->group_size;
		break;
	case ARRAY_1D_TILED_THIN1:
		*pitch_align = max((u32)tile_width,
				   (u32)(values->group_size /
283
					 (tile_height * values->blocksize * values->nsamples)));
284
285
286
287
288
		*height_align = tile_height;
		*depth_align = 1;
		*base_align = values->group_size;
		break;
	case ARRAY_2D_TILED_THIN1:
289
290
291
		*pitch_align = max((u32)macro_tile_width * tile_width,
				(u32)((values->group_size * values->nbanks) /
				(values->blocksize * values->nsamples * tile_width)));
292
293
294
		*height_align = macro_tile_height * tile_height;
		*depth_align = 1;
		*base_align = max(macro_tile_bytes,
295
				  (*pitch_align) * values->blocksize * (*height_align) * values->nsamples);
296
297
298
299
300
301
302
303
		break;
	default:
		return -EINVAL;
	}

	return 0;
}

304
305
306
307
static void r600_cs_track_init(struct r600_cs_track *track)
{
	int i;

308
309
	/* assume DX9 mode */
	track->sq_config = DX9_CONSTS;
310
311
312
313
314
	for (i = 0; i < 8; i++) {
		track->cb_color_base_last[i] = 0;
		track->cb_color_size[i] = 0;
		track->cb_color_size_idx[i] = 0;
		track->cb_color_info[i] = 0;
315
		track->cb_color_view[i] = 0xFFFFFFFF;
316
317
		track->cb_color_bo[i] = NULL;
		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
318
		track->cb_color_bo_mc[i] = 0xFFFFFFFF;
319
320
321
322
323
		track->cb_color_frag_bo[i] = NULL;
		track->cb_color_frag_offset[i] = 0xFFFFFFFF;
		track->cb_color_tile_bo[i] = NULL;
		track->cb_color_tile_offset[i] = 0xFFFFFFFF;
		track->cb_color_mask[i] = 0xFFFFFFFF;
324
	}
325
	track->is_resolve = false;
326
327
	track->nsamples = 16;
	track->log_nsamples = 4;
328
329
	track->cb_target_mask = 0xFFFFFFFF;
	track->cb_shader_mask = 0xFFFFFFFF;
330
	track->cb_dirty = true;
331
	track->db_bo = NULL;
332
	track->db_bo_mc = 0xFFFFFFFF;
333
334
335
336
337
338
	/* assume the biggest format and that htile is enabled */
	track->db_depth_info = 7 | (1 << 25);
	track->db_depth_view = 0xFFFFC000;
	track->db_depth_size = 0xFFFFFFFF;
	track->db_depth_size_idx = 0;
	track->db_depth_control = 0xFFFFFFFF;
339
	track->db_dirty = true;
340
341
342
	track->htile_bo = NULL;
	track->htile_offset = 0xFFFFFFFF;
	track->htile_surface = 0;
343
344
345
346
347
348
349

	for (i = 0; i < 4; i++) {
		track->vgt_strmout_size[i] = 0;
		track->vgt_strmout_bo[i] = NULL;
		track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
		track->vgt_strmout_bo_mc[i] = 0xFFFFFFFF;
	}
350
	track->streamout_dirty = true;
351
	track->sx_misc_kill_all_prims = false;
352
353
}

354
static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
355
356
{
	struct r600_cs_track *track = p->track;
357
	u32 slice_tile_max, size, tmp;
358
359
360
	u32 height, height_align, pitch, pitch_align, depth_align;
	u64 base_offset, base_align;
	struct array_mode_checker array_check;
361
	volatile u32 *ib = p->ib.ptr;
362
	unsigned array_mode;
363
	u32 format;
364
365
	/* When resolve is used, the second colorbuffer has always 1 sample. */
	unsigned nsamples = track->is_resolve && i == 1 ? 1 : track->nsamples;
366

367
	size = radeon_bo_size(track->cb_color_bo[i]) - track->cb_color_bo_offset[i];
368
	format = G_0280A0_FORMAT(track->cb_color_info[i]);
369
	if (!r600_fmt_is_valid_color(format)) {
370
		dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n",
371
			 __func__, __LINE__, format,
372
373
374
			i, track->cb_color_info[i]);
		return -EINVAL;
	}
375
376
	/* pitch in pixels */
	pitch = (G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1) * 8;
377
	slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1;
378
	slice_tile_max *= 64;
379
	height = slice_tile_max / pitch;
380
381
	if (height > 8192)
		height = 8192;
382
	array_mode = G_0280A0_ARRAY_MODE(track->cb_color_info[i]);
383
384
385
386
387
388

	base_offset = track->cb_color_bo_mc[i] + track->cb_color_bo_offset[i];
	array_check.array_mode = array_mode;
	array_check.group_size = track->group_size;
	array_check.nbanks = track->nbanks;
	array_check.npipes = track->npipes;
389
	array_check.nsamples = nsamples;
390
	array_check.blocksize = r600_fmt_get_blocksize(format);
391
392
393
394
395
396
397
	if (r600_get_array_mode_alignment(&array_check,
					  &pitch_align, &height_align, &depth_align, &base_align)) {
		dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
			 G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i,
			 track->cb_color_info[i]);
		return -EINVAL;
	}
398
	switch (array_mode) {
399
	case V_0280A0_ARRAY_LINEAR_GENERAL:
400
		break;
401
402
403
	case V_0280A0_ARRAY_LINEAR_ALIGNED:
		break;
	case V_0280A0_ARRAY_1D_TILED_THIN1:
404
405
406
		/* avoid breaking userspace */
		if (height > 7)
			height &= ~0x7;
407
408
409
410
411
412
413
414
415
		break;
	case V_0280A0_ARRAY_2D_TILED_THIN1:
		break;
	default:
		dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
			G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i,
			track->cb_color_info[i]);
		return -EINVAL;
	}
416
417

	if (!IS_ALIGNED(pitch, pitch_align)) {
418
419
		dev_warn(p->dev, "%s:%d cb pitch (%d, 0x%x, %d) invalid\n",
			 __func__, __LINE__, pitch, pitch_align, array_mode);
420
421
422
		return -EINVAL;
	}
	if (!IS_ALIGNED(height, height_align)) {
423
424
		dev_warn(p->dev, "%s:%d cb height (%d, 0x%x, %d) invalid\n",
			 __func__, __LINE__, height, height_align, array_mode);
425
426
427
		return -EINVAL;
	}
	if (!IS_ALIGNED(base_offset, base_align)) {
428
429
		dev_warn(p->dev, "%s offset[%d] 0x%llx 0x%llx, %d not aligned\n", __func__, i,
			 base_offset, base_align, array_mode);
430
431
432
		return -EINVAL;
	}

433
	/* check offset */
434
	tmp = r600_fmt_get_nblocksy(format, height) * r600_fmt_get_nblocksx(format, pitch) *
435
	      r600_fmt_get_blocksize(format) * nsamples;
436
437
438
439
440
441
442
443
444
445
446
	switch (array_mode) {
	default:
	case V_0280A0_ARRAY_LINEAR_GENERAL:
	case V_0280A0_ARRAY_LINEAR_ALIGNED:
		tmp += track->cb_color_view[i] & 0xFF;
		break;
	case V_0280A0_ARRAY_1D_TILED_THIN1:
	case V_0280A0_ARRAY_2D_TILED_THIN1:
		tmp += G_028080_SLICE_MAX(track->cb_color_view[i]) * tmp;
		break;
	}
447
	if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
448
449
450
		if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) {
			/* the initial DDX does bad things with the CB size occasionally */
			/* it rounds up height too far for slice tile max but the BO is smaller */
451
452
453
454
			/* r600c,g also seem to flush at bad times in some apps resulting in
			 * bogus values here. So for linear just allow anything to avoid breaking
			 * broken userspace.
			 */
455
		} else {
456
			dev_warn(p->dev, "%s offset[%d] %d %llu %d %lu too big (%d %d) (%d %d %d)\n",
457
				 __func__, i, array_mode,
458
				 track->cb_color_bo_offset[i], tmp,
459
460
461
462
				 radeon_bo_size(track->cb_color_bo[i]),
				 pitch, height, r600_fmt_get_nblocksx(format, pitch),
				 r600_fmt_get_nblocksy(format, height),
				 r600_fmt_get_blocksize(format));
463
464
			return -EINVAL;
		}
465
	}
466
	/* limit max tile */
467
	tmp = (height * pitch) >> 6;
468
469
	if (tmp < slice_tile_max)
		slice_tile_max = tmp;
470
	tmp = S_028060_PITCH_TILE_MAX((pitch / 8) - 1) |
471
472
		S_028060_SLICE_TILE_MAX(slice_tile_max - 1);
	ib[track->cb_color_size_idx[i]] = tmp;
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517

	/* FMASK/CMASK */
	switch (G_0280A0_TILE_MODE(track->cb_color_info[i])) {
	case V_0280A0_TILE_DISABLE:
		break;
	case V_0280A0_FRAG_ENABLE:
		if (track->nsamples > 1) {
			uint32_t tile_max = G_028100_FMASK_TILE_MAX(track->cb_color_mask[i]);
			/* the tile size is 8x8, but the size is in units of bits.
			 * for bytes, do just * 8. */
			uint32_t bytes = track->nsamples * track->log_nsamples * 8 * (tile_max + 1);

			if (bytes + track->cb_color_frag_offset[i] >
			    radeon_bo_size(track->cb_color_frag_bo[i])) {
				dev_warn(p->dev, "%s FMASK_TILE_MAX too large "
					 "(tile_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n",
					 __func__, tile_max, bytes,
					 track->cb_color_frag_offset[i],
					 radeon_bo_size(track->cb_color_frag_bo[i]));
				return -EINVAL;
			}
		}
		/* fall through */
	case V_0280A0_CLEAR_ENABLE:
	{
		uint32_t block_max = G_028100_CMASK_BLOCK_MAX(track->cb_color_mask[i]);
		/* One block = 128x128 pixels, one 8x8 tile has 4 bits..
		 * (128*128) / (8*8) / 2 = 128 bytes per block. */
		uint32_t bytes = (block_max + 1) * 128;

		if (bytes + track->cb_color_tile_offset[i] >
		    radeon_bo_size(track->cb_color_tile_bo[i])) {
			dev_warn(p->dev, "%s CMASK_BLOCK_MAX too large "
				 "(block_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n",
				 __func__, block_max, bytes,
				 track->cb_color_tile_offset[i],
				 radeon_bo_size(track->cb_color_tile_bo[i]));
			return -EINVAL;
		}
		break;
	}
	default:
		dev_warn(p->dev, "%s invalid tile mode\n", __func__);
		return -EINVAL;
	}
518
519
520
	return 0;
}

521
522
523
524
525
526
527
528
529
530
static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
{
	struct r600_cs_track *track = p->track;
	u32 nviews, bpe, ntiles, size, slice_tile_max, tmp;
	u32 height_align, pitch_align, depth_align;
	u32 pitch = 8192;
	u32 height = 8192;
	u64 base_offset, base_align;
	struct array_mode_checker array_check;
	int array_mode;
531
	volatile u32 *ib = p->ib.ptr;
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625


	if (track->db_bo == NULL) {
		dev_warn(p->dev, "z/stencil with no depth buffer\n");
		return -EINVAL;
	}
	switch (G_028010_FORMAT(track->db_depth_info)) {
	case V_028010_DEPTH_16:
		bpe = 2;
		break;
	case V_028010_DEPTH_X8_24:
	case V_028010_DEPTH_8_24:
	case V_028010_DEPTH_X8_24_FLOAT:
	case V_028010_DEPTH_8_24_FLOAT:
	case V_028010_DEPTH_32_FLOAT:
		bpe = 4;
		break;
	case V_028010_DEPTH_X24_8_32_FLOAT:
		bpe = 8;
		break;
	default:
		dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info));
		return -EINVAL;
	}
	if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) {
		if (!track->db_depth_size_idx) {
			dev_warn(p->dev, "z/stencil buffer size not set\n");
			return -EINVAL;
		}
		tmp = radeon_bo_size(track->db_bo) - track->db_offset;
		tmp = (tmp / bpe) >> 6;
		if (!tmp) {
			dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n",
					track->db_depth_size, bpe, track->db_offset,
					radeon_bo_size(track->db_bo));
			return -EINVAL;
		}
		ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF);
	} else {
		size = radeon_bo_size(track->db_bo);
		/* pitch in pixels */
		pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8;
		slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
		slice_tile_max *= 64;
		height = slice_tile_max / pitch;
		if (height > 8192)
			height = 8192;
		base_offset = track->db_bo_mc + track->db_offset;
		array_mode = G_028010_ARRAY_MODE(track->db_depth_info);
		array_check.array_mode = array_mode;
		array_check.group_size = track->group_size;
		array_check.nbanks = track->nbanks;
		array_check.npipes = track->npipes;
		array_check.nsamples = track->nsamples;
		array_check.blocksize = bpe;
		if (r600_get_array_mode_alignment(&array_check,
					&pitch_align, &height_align, &depth_align, &base_align)) {
			dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
					G_028010_ARRAY_MODE(track->db_depth_info),
					track->db_depth_info);
			return -EINVAL;
		}
		switch (array_mode) {
		case V_028010_ARRAY_1D_TILED_THIN1:
			/* don't break userspace */
			height &= ~0x7;
			break;
		case V_028010_ARRAY_2D_TILED_THIN1:
			break;
		default:
			dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
					G_028010_ARRAY_MODE(track->db_depth_info),
					track->db_depth_info);
			return -EINVAL;
		}

		if (!IS_ALIGNED(pitch, pitch_align)) {
			dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n",
					__func__, __LINE__, pitch, pitch_align, array_mode);
			return -EINVAL;
		}
		if (!IS_ALIGNED(height, height_align)) {
			dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n",
					__func__, __LINE__, height, height_align, array_mode);
			return -EINVAL;
		}
		if (!IS_ALIGNED(base_offset, base_align)) {
			dev_warn(p->dev, "%s offset 0x%llx, 0x%llx, %d not aligned\n", __func__,
					base_offset, base_align, array_mode);
			return -EINVAL;
		}

		ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
		nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1;
626
		tmp = ntiles * bpe * 64 * nviews * track->nsamples;
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
		if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) {
			dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n",
					array_mode,
					track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset,
					radeon_bo_size(track->db_bo));
			return -EINVAL;
		}
	}

	/* hyperz */
	if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) {
		unsigned long size;
		unsigned nbx, nby;

		if (track->htile_bo == NULL) {
			dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
				 __func__, __LINE__, track->db_depth_info);
			return -EINVAL;
		}
		if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) {
			dev_warn(p->dev, "%s:%d htile can't be enabled with bogus db_depth_size 0x%08x\n",
				 __func__, __LINE__, track->db_depth_size);
			return -EINVAL;
		}

		nbx = pitch;
		nby = height;
		if (G_028D24_LINEAR(track->htile_surface)) {
			/* nbx must be 16 htiles aligned == 16 * 8 pixel aligned */
			nbx = round_up(nbx, 16 * 8);
			/* nby is npipes htiles aligned == npipes * 8 pixel aligned */
			nby = round_up(nby, track->npipes * 8);
		} else {
			/* htile widht & nby (8 or 4) make 2 bits number */
			tmp = track->htile_surface & 3;
			/* align is htile align * 8, htile align vary according to
			 * number of pipe and tile width and nby
			 */
			switch (track->npipes) {
			case 8:
				switch (tmp) {
				case 3:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
					nbx = round_up(nbx, 64 * 8);
					nby = round_up(nby, 64 * 8);
					break;
				case 2:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
				case 1:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
					nbx = round_up(nbx, 64 * 8);
					nby = round_up(nby, 32 * 8);
					break;
				case 0:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
					nbx = round_up(nbx, 32 * 8);
					nby = round_up(nby, 32 * 8);
					break;
				default:
					return -EINVAL;
				}
				break;
			case 4:
				switch (tmp) {
				case 3:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
					nbx = round_up(nbx, 64 * 8);
					nby = round_up(nby, 32 * 8);
					break;
				case 2:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
				case 1:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
					nbx = round_up(nbx, 32 * 8);
					nby = round_up(nby, 32 * 8);
					break;
				case 0:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
					nbx = round_up(nbx, 32 * 8);
					nby = round_up(nby, 16 * 8);
					break;
				default:
					return -EINVAL;
				}
				break;
			case 2:
				switch (tmp) {
				case 3:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
					nbx = round_up(nbx, 32 * 8);
					nby = round_up(nby, 32 * 8);
					break;
				case 2:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
				case 1:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
					nbx = round_up(nbx, 32 * 8);
					nby = round_up(nby, 16 * 8);
					break;
				case 0:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
					nbx = round_up(nbx, 16 * 8);
					nby = round_up(nby, 16 * 8);
					break;
				default:
					return -EINVAL;
				}
				break;
			case 1:
				switch (tmp) {
				case 3:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
					nbx = round_up(nbx, 32 * 8);
					nby = round_up(nby, 16 * 8);
					break;
				case 2:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
				case 1:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
					nbx = round_up(nbx, 16 * 8);
					nby = round_up(nby, 16 * 8);
					break;
				case 0:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
					nbx = round_up(nbx, 16 * 8);
					nby = round_up(nby, 8 * 8);
					break;
				default:
					return -EINVAL;
				}
				break;
			default:
				dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
					 __func__, __LINE__, track->npipes);
				return -EINVAL;
			}
		}
		/* compute number of htile */
		nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4;
		nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4;
		size = nbx * nby * 4;
		size += track->htile_offset;

		if (size > radeon_bo_size(track->htile_bo)) {
			dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
				 __func__, __LINE__, radeon_bo_size(track->htile_bo),
				 size, nbx, nby);
			return -EINVAL;
		}
	}

	track->db_dirty = false;
	return 0;
}

766
767
768
769
770
771
772
773
774
static int r600_cs_track_check(struct radeon_cs_parser *p)
{
	struct r600_cs_track *track = p->track;
	u32 tmp;
	int r, i;

	/* on legacy kernel we don't perform advanced check */
	if (p->rdev == NULL)
		return 0;
775
776

	/* check streamout */
777
	if (track->streamout_dirty && track->vgt_strmout_en) {
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
		for (i = 0; i < 4; i++) {
			if (track->vgt_strmout_buffer_en & (1 << i)) {
				if (track->vgt_strmout_bo[i]) {
					u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
						(u64)track->vgt_strmout_size[i];
					if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
						DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
							  i, offset,
							  radeon_bo_size(track->vgt_strmout_bo[i]));
						return -EINVAL;
					}
				} else {
					dev_warn(p->dev, "No buffer for streamout %d\n", i);
					return -EINVAL;
				}
			}
		}
795
		track->streamout_dirty = false;
796
	}
797

798
799
800
	if (track->sx_misc_kill_all_prims)
		return 0;

801
802
803
	/* check that we have a cb for each enabled target, we don't check
	 * shader_mask because it seems mesa isn't always setting it :(
	 */
804
805
	if (track->cb_dirty) {
		tmp = track->cb_target_mask;
806
807
808
809
810
811

		/* We must check both colorbuffers for RESOLVE. */
		if (track->is_resolve) {
			tmp |= 0xff;
		}

812
813
814
815
816
817
818
819
820
821
822
823
		for (i = 0; i < 8; i++) {
			if ((tmp >> (i * 4)) & 0xF) {
				/* at least one component is enabled */
				if (track->cb_color_bo[i] == NULL) {
					dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
						__func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
					return -EINVAL;
				}
				/* perform rewrite of CB_COLOR[0-7]_SIZE */
				r = r600_cs_track_validate_cb(p, i);
				if (r)
					return r;
824
825
			}
		}
826
		track->cb_dirty = false;
827
	}
828

829
	/* Check depth buffer */
830
831
832
833
	if (track->db_dirty &&
	    G_028010_FORMAT(track->db_depth_info) != V_028010_DEPTH_INVALID &&
	    (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
	     G_028800_Z_ENABLE(track->db_depth_control))) {
834
835
836
		r = r600_cs_track_validate_db(p);
		if (r)
			return r;
837
	}
838

839
840
841
	return 0;
}

842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
/**
 * r600_cs_packet_parse() - parse cp packet and point ib index to next packet
 * @parser:	parser structure holding parsing context.
 * @pkt:	where to store packet informations
 *
 * Assume that chunk_ib_index is properly set. Will return -EINVAL
 * if packet is bigger than remaining ib size. or if packets is unknown.
 **/
int r600_cs_packet_parse(struct radeon_cs_parser *p,
			struct radeon_cs_packet *pkt,
			unsigned idx)
{
	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
	uint32_t header;

	if (idx >= ib_chunk->length_dw) {
		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
			  idx, ib_chunk->length_dw);
		return -EINVAL;
	}
862
	header = radeon_get_ib_value(p, idx);
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
	pkt->idx = idx;
	pkt->type = CP_PACKET_GET_TYPE(header);
	pkt->count = CP_PACKET_GET_COUNT(header);
	pkt->one_reg_wr = 0;
	switch (pkt->type) {
	case PACKET_TYPE0:
		pkt->reg = CP_PACKET0_GET_REG(header);
		break;
	case PACKET_TYPE3:
		pkt->opcode = CP_PACKET3_GET_OPCODE(header);
		break;
	case PACKET_TYPE2:
		pkt->count = -1;
		break;
	default:
		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
		return -EINVAL;
	}
	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
		return -EINVAL;
	}
	return 0;
}

/**
 * r600_cs_packet_next_reloc_mm() - parse next packet which should be reloc packet3
 * @parser:		parser structure holding parsing context.
 * @data:		pointer to relocation data
 * @offset_start:	starting offset
 * @offset_mask:	offset mask (to align start offset on)
 * @reloc:		reloc informations
 *
 * Check next packet is relocation packet3, do bo validation and compute
 * GPU offset using the provided start.
 **/
static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
					struct radeon_cs_reloc **cs_reloc)
{
	struct radeon_cs_chunk *relocs_chunk;
	struct radeon_cs_packet p3reloc;
	unsigned idx;
	int r;

	if (p->chunk_relocs_idx == -1) {
		DRM_ERROR("No relocation chunk !\n");
		return -EINVAL;
	}
	*cs_reloc = NULL;
	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
	r = r600_cs_packet_parse(p, &p3reloc, p->idx);
	if (r) {
		return r;
	}
	p->idx += p3reloc.count + 2;
	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
			  p3reloc.idx);
		return -EINVAL;
	}
924
	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
	if (idx >= relocs_chunk->length_dw) {
		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
			  idx, relocs_chunk->length_dw);
		return -EINVAL;
	}
	/* FIXME: we assume reloc size is 4 dwords */
	*cs_reloc = p->relocs_ptr[(idx / 4)];
	return 0;
}

/**
 * r600_cs_packet_next_reloc_nomm() - parse next packet which should be reloc packet3
 * @parser:		parser structure holding parsing context.
 * @data:		pointer to relocation data
 * @offset_start:	starting offset
 * @offset_mask:	offset mask (to align start offset on)
 * @reloc:		reloc informations
 *
 * Check next packet is relocation packet3, do bo validation and compute
 * GPU offset using the provided start.
 **/
static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
					struct radeon_cs_reloc **cs_reloc)
{
	struct radeon_cs_chunk *relocs_chunk;
	struct radeon_cs_packet p3reloc;
	unsigned idx;
	int r;

	if (p->chunk_relocs_idx == -1) {
		DRM_ERROR("No relocation chunk !\n");
		return -EINVAL;
	}
	*cs_reloc = NULL;
	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
	r = r600_cs_packet_parse(p, &p3reloc, p->idx);
	if (r) {
		return r;
	}
	p->idx += p3reloc.count + 2;
	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
			  p3reloc.idx);
		return -EINVAL;
	}
970
	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
971
972
973
974
975
	if (idx >= relocs_chunk->length_dw) {
		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
			  idx, relocs_chunk->length_dw);
		return -EINVAL;
	}
976
	*cs_reloc = p->relocs;
977
978
979
980
981
	(*cs_reloc)->lobj.gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32;
	(*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0];
	return 0;
}

982
983
984
985
986
987
988
/**
 * r600_cs_packet_next_is_pkt3_nop() - test if next packet is packet3 nop for reloc
 * @parser:		parser structure holding parsing context.
 *
 * Check next packet is relocation packet3, do bo validation and compute
 * GPU offset using the provided start.
 **/
989
static int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
{
	struct radeon_cs_packet p3reloc;
	int r;

	r = r600_cs_packet_parse(p, &p3reloc, p->idx);
	if (r) {
		return 0;
	}
	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
		return 0;
	}
	return 1;
}

1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
/**
 * r600_cs_packet_next_vline() - parse userspace VLINE packet
 * @parser:		parser structure holding parsing context.
 *
 * Userspace sends a special sequence for VLINE waits.
 * PACKET0 - VLINE_START_END + value
 * PACKET3 - WAIT_REG_MEM poll vline status reg
 * RELOC (P3) - crtc_id in reloc.
 *
 * This function parses this and relocates the VLINE START END
 * and WAIT_REG_MEM packets to the correct crtc.
 * It also detects a switched off crtc and nulls out the
 * wait in that case.
 */
static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
{
	struct drm_mode_object *obj;
	struct drm_crtc *crtc;
	struct radeon_crtc *radeon_crtc;
	struct radeon_cs_packet p3reloc, wait_reg_mem;
	int crtc_id;
	int r;
	uint32_t header, h_idx, reg, wait_reg_mem_info;
	volatile uint32_t *ib;

1029
	ib = p->ib.ptr;
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039

	/* parse the WAIT_REG_MEM */
	r = r600_cs_packet_parse(p, &wait_reg_mem, p->idx);
	if (r)
		return r;

	/* check its a WAIT_REG_MEM */
	if (wait_reg_mem.type != PACKET_TYPE3 ||
	    wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) {
		DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
1040
		return -EINVAL;
1041
1042
1043
1044
1045
1046
	}

	wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1);
	/* bit 4 is reg (0) or mem (1) */
	if (wait_reg_mem_info & 0x10) {
		DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n");
1047
		return -EINVAL;
1048
1049
1050
1051
	}
	/* waiting for value to be equal */
	if ((wait_reg_mem_info & 0x7) != 0x3) {
		DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
1052
		return -EINVAL;
1053
1054
1055
	}
	if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != AVIVO_D1MODE_VLINE_STATUS) {
		DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
1056
		return -EINVAL;
1057
1058
1059
1060
	}

	if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != AVIVO_D1MODE_VLINE_STAT) {
		DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
1061
		return -EINVAL;
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
	}

	/* jump over the NOP */
	r = r600_cs_packet_parse(p, &p3reloc, p->idx + wait_reg_mem.count + 2);
	if (r)
		return r;

	h_idx = p->idx - 2;
	p->idx += wait_reg_mem.count + 2;
	p->idx += p3reloc.count + 2;

	header = radeon_get_ib_value(p, h_idx);
	crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1);
1075
	reg = CP_PACKET0_GET_REG(header);
1076

1077
1078
1079
	obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
	if (!obj) {
		DRM_ERROR("cannot find crtc %d\n", crtc_id);
1080
		return -EINVAL;
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
	}
	crtc = obj_to_crtc(obj);
	radeon_crtc = to_radeon_crtc(crtc);
	crtc_id = radeon_crtc->crtc_id;

	if (!crtc->enabled) {
		/* if the CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
		ib[h_idx + 2] = PACKET2(0);
		ib[h_idx + 3] = PACKET2(0);
		ib[h_idx + 4] = PACKET2(0);
		ib[h_idx + 5] = PACKET2(0);
		ib[h_idx + 6] = PACKET2(0);
		ib[h_idx + 7] = PACKET2(0);
		ib[h_idx + 8] = PACKET2(0);
	} else if (crtc_id == 1) {
		switch (reg) {
		case AVIVO_D1MODE_VLINE_START_END:
			header &= ~R600_CP_PACKET0_REG_MASK;
			header |= AVIVO_D2MODE_VLINE_START_END >> 2;
			break;
		default:
			DRM_ERROR("unknown crtc reloc\n");
1103
			return -EINVAL;
1104
1105
1106
1107
		}
		ib[h_idx] = header;
		ib[h_idx + 4] = AVIVO_D2MODE_VLINE_STATUS >> 2;
	}
1108
1109

	return 0;
1110
1111
}

1112
1113
1114
1115
static int r600_packet0_check(struct radeon_cs_parser *p,
				struct radeon_cs_packet *pkt,
				unsigned idx, unsigned reg)
{
1116
1117
	int r;

1118
1119
	switch (reg) {
	case AVIVO_D1MODE_VLINE_START_END:
1120
1121
1122
1123
1124
1125
		r = r600_cs_packet_parse_vline(p);
		if (r) {
			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
					idx, reg);
			return r;
		}
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
		break;
	default:
		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
		       reg, idx);
		return -EINVAL;
	}
	return 0;
}

static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
				struct radeon_cs_packet *pkt)
{
	unsigned reg, i;
	unsigned idx;
	int r;

	idx = pkt->idx + 1;
	reg = pkt->reg;
	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
		r = r600_packet0_check(p, pkt, idx, reg);
		if (r) {
			return r;
		}
	}
	return 0;
}

1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
/**
 * r600_cs_check_reg() - check if register is authorized or not
 * @parser: parser structure holding parsing context
 * @reg: register we are testing
 * @idx: index into the cs buffer
 *
 * This function will test against r600_reg_safe_bm and return 0
 * if register is safe. If register is not flag as safe this function
 * will test it against a list of register needind special handling.
 */
1163
static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1164
1165
1166
1167
1168
1169
1170
{
	struct r600_cs_track *track = (struct r600_cs_track *)p->track;
	struct radeon_cs_reloc *reloc;
	u32 m, i, tmp, *ib;
	int r;

	i = (reg >> 7);
1171
	if (i >= ARRAY_SIZE(r600_reg_safe_bm)) {
1172
1173
1174
1175
1176
1177
		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
		return -EINVAL;
	}
	m = 1 << ((reg >> 2) & 31);
	if (!(r600_reg_safe_bm[i] & m))
		return 0;
1178
	ib = p->ib.ptr;
1179
	switch (reg) {
Lucas De Marchi's avatar
Lucas De Marchi committed
1180
	/* force following reg to 0 in an attempt to disable out buffer
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
	 * which will need us to better understand how it works to perform
	 * security check on it (Jerome)
	 */
	case R_0288A8_SQ_ESGS_RING_ITEMSIZE:
	case R_008C44_SQ_ESGS_RING_SIZE:
	case R_0288B0_SQ_ESTMP_RING_ITEMSIZE:
	case R_008C54_SQ_ESTMP_RING_SIZE:
	case R_0288C0_SQ_FBUF_RING_ITEMSIZE:
	case R_008C74_SQ_FBUF_RING_SIZE:
	case R_0288B4_SQ_GSTMP_RING_ITEMSIZE:
	case R_008C5C_SQ_GSTMP_RING_SIZE:
	case R_0288AC_SQ_GSVS_RING_ITEMSIZE:
	case R_008C4C_SQ_GSVS_RING_SIZE:
	case R_0288BC_SQ_PSTMP_RING_ITEMSIZE:
	case R_008C6C_SQ_PSTMP_RING_SIZE:
	case R_0288C4_SQ_REDUC_RING_ITEMSIZE:
	case R_008C7C_SQ_REDUC_RING_SIZE:
	case R_0288B8_SQ_VSTMP_RING_ITEMSIZE:
	case R_008C64_SQ_VSTMP_RING_SIZE:
	case R_0288C8_SQ_GS_VERT_ITEMSIZE:
		/* get value to populate the IB don't remove */
		tmp =radeon_get_ib_value(p, idx);
		ib[idx] = 0;
		break;
1205
1206
1207
	case SQ_CONFIG:
		track->sq_config = radeon_get_ib_value(p, idx);
		break;
1208
1209
	case R_028800_DB_DEPTH_CONTROL:
		track->db_depth_control = radeon_get_ib_value(p, idx);
1210
		track->db_dirty = true;
1211
1212
		break;
	case R_028010_DB_DEPTH_INFO:
1213
		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) &&
1214
		    r600_cs_packet_next_is_pkt3_nop(p)) {
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
			r = r600_cs_packet_next_reloc(p, &reloc);
			if (r) {
				dev_warn(p->dev, "bad SET_CONTEXT_REG "
					 "0x%04X\n", reg);
				return -EINVAL;
			}
			track->db_depth_info = radeon_get_ib_value(p, idx);
			ib[idx] &= C_028010_ARRAY_MODE;
			track->db_depth_info &= C_028010_ARRAY_MODE;
			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
				ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
				track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
			} else {
				ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
				track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
			}
1231
		} else {
1232
			track->db_depth_info = radeon_get_ib_value(p, idx);
1233
1234
		}
		track->db_dirty = true;
1235
1236
1237
		break;
	case R_028004_DB_DEPTH_VIEW:
		track->db_depth_view = radeon_get_ib_value(p, idx);
1238
		track->db_dirty = true;
1239
1240
1241
1242
		break;
	case R_028000_DB_DEPTH_SIZE:
		track->db_depth_size = radeon_get_ib_value(p, idx);
		track->db_depth_size_idx = idx;
1243
		track->db_dirty = true;
1244
1245
1246
		break;
	case R_028AB0_VGT_STRMOUT_EN:
		track->vgt_strmout_en = radeon_get_ib_value(p, idx);
1247
		track->streamout_dirty = true;
1248
1249
1250
		break;
	case R_028B20_VGT_STRMOUT_BUFFER_EN:
		track->vgt_strmout_buffer_en = radeon_get_ib_value(p, idx);
1251
		track->streamout_dirty = true;
1252
		break;
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
	case VGT_STRMOUT_BUFFER_BASE_0:
	case VGT_STRMOUT_BUFFER_BASE_1:
	case VGT_STRMOUT_BUFFER_BASE_2:
	case VGT_STRMOUT_BUFFER_BASE_3:
		r = r600_cs_packet_next_reloc(p, &reloc);
		if (r) {
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
					"0x%04X\n", reg);
			return -EINVAL;
		}
		tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
		track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
		track->vgt_strmout_bo[tmp] = reloc->robj;
		track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset;
1268
		track->streamout_dirty = true;
1269
1270
1271
1272
1273
1274
1275
1276
		break;
	case VGT_STRMOUT_BUFFER_SIZE_0:
	case VGT_STRMOUT_BUFFER_SIZE_1:
	case VGT_STRMOUT_BUFFER_SIZE_2:
	case VGT_STRMOUT_BUFFER_SIZE_3:
		tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
		/* size in register is DWs, convert to bytes */
		track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1277
		track->streamout_dirty = true;
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
		break;
	case CP_COHER_BASE:
		r = r600_cs_packet_next_reloc(p, &reloc);
		if (r) {
			dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
					"0x%04X\n", reg);
			return -EINVAL;
		}
		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
		break;
1288
1289
	case R_028238_CB_TARGET_MASK:
		track->cb_target_mask = radeon_get_ib_value(p, idx);
1290
		track->cb_dirty = true;
1291
1292
1293
1294
1295
1296
		break;
	case R_02823C_CB_SHADER_MASK:
		track->cb_shader_mask = radeon_get_ib_value(p, idx);
		break;
	case R_028C04_PA_SC_AA_CONFIG:
		tmp = G_028C04_MSAA_NUM_SAMPLES(radeon_get_ib_value(p, idx));
1297
		track->log_nsamples = tmp;
1298
		track->nsamples = 1 << tmp;
1299
		track->cb_dirty = true;
1300
		break;
1301
1302
1303
1304
1305
	case R_028808_CB_COLOR_CONTROL:
		tmp = G_028808_SPECIAL_OP(radeon_get_ib_value(p, idx));
		track->is_resolve = tmp == V_028808_SPECIAL_RESOLVE_BOX;
		track->cb_dirty = true;
		break;
1306
1307
1308
1309
1310
1311
1312
1313
	case R_0280A0_CB_COLOR0_INFO:
	case R_0280A4_CB_COLOR1_INFO:
	case R_0280A8_CB_COLOR2_INFO:
	case R_0280AC_CB_COLOR3_INFO:
	case R_0280B0_CB_COLOR4_INFO:
	case R_0280B4_CB_COLOR5_INFO:
	case R_0280B8_CB_COLOR6_INFO:
	case R_0280BC_CB_COLOR7_INFO:
1314
		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) &&
1315
		     r600_cs_packet_next_is_pkt3_nop(p)) {
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
			r = r600_cs_packet_next_reloc(p, &reloc);
			if (r) {
				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
				return -EINVAL;
			}
			tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
			track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
				ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
				track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
			} else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
				ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
				track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
			}
		} else {
			tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
			track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
		}
1334
		track->cb_dirty = true;
1335
		break;
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
	case R_028080_CB_COLOR0_VIEW:
	case R_028084_CB_COLOR1_VIEW:
	case R_028088_CB_COLOR2_VIEW:
	case R_02808C_CB_COLOR3_VIEW:
	case R_028090_CB_COLOR4_VIEW:
	case R_028094_CB_COLOR5_VIEW:
	case R_028098_CB_COLOR6_VIEW:
	case R_02809C_CB_COLOR7_VIEW:
		tmp = (reg - R_028080_CB_COLOR0_VIEW) / 4;
		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1346
		track->cb_dirty = true;
1347
		break;
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
	case R_028060_CB_COLOR0_SIZE:
	case R_028064_CB_COLOR1_SIZE:
	case R_028068_CB_COLOR2_SIZE:
	case R_02806C_CB_COLOR3_SIZE:
	case R_028070_CB_COLOR4_SIZE:
	case R_028074_CB_COLOR5_SIZE:
	case R_028078_CB_COLOR6_SIZE:
	case R_02807C_CB_COLOR7_SIZE:
		tmp = (reg - R_028060_CB_COLOR0_SIZE) / 4;
		track->cb_color_size[tmp] = radeon_get_ib_value(p, idx);
		track->cb_color_size_idx[tmp] = idx;
1359
		track->cb_dirty = true;
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
		break;
		/* This register were added late, there is userspace
		 * which does provide relocation for those but set
		 * 0 offset. In order to avoid breaking old userspace
		 * we detect this and set address to point to last
		 * CB_COLOR0_BASE, note that if userspace doesn't set
		 * CB_COLOR0_BASE before this register we will report
		 * error. Old userspace always set CB_COLOR0_BASE
		 * before any of this.
		 */
	case R_0280E0_CB_COLOR0_FRAG:
	case R_0280E4_CB_COLOR1_FRAG:
	case R_0280E8_CB_COLOR2_FRAG:
	case R_0280EC_CB_COLOR3_FRAG:
	case R_0280F0_CB_COLOR4_FRAG:
	case R_0280F4_CB_COLOR5_FRAG:
	case R_0280F8_CB_COLOR6_FRAG:
	case R_0280FC_CB_COLOR7_FRAG:
		tmp = (reg - R_0280E0_CB_COLOR0_FRAG) / 4;
		if (!r600_cs_packet_next_is_pkt3_nop(p)) {
			if (!track->cb_color_base_last[tmp]) {
				dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
				return -EINVAL;
			}
			track->cb_color_frag_bo[tmp] = track->cb_color_bo[tmp];
1385
1386
			track->cb_color_frag_offset[tmp] = track->cb_color_bo_offset[tmp];
			ib[idx] = track->cb_color_base_last[tmp];
1387
1388
1389
1390
1391
1392
1393
		} else {
			r = r600_cs_packet_next_reloc(p, &reloc);
			if (r) {
				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
				return -EINVAL;
			}
			track->cb_color_frag_bo[tmp] = reloc->robj;
1394
1395
1396
1397
1398
			track->cb_color_frag_offset[tmp] = (u64)ib[idx] << 8;
			ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
		}
		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
			track->cb_dirty = true;
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
		}
		break;
	case R_0280C0_CB_COLOR0_TILE:
	case R_0280C4_CB_COLOR1_TILE:
	case R_0280C8_CB_COLOR2_TILE:
	case R_0280CC_CB_COLOR3_TILE:
	case R_0280D0_CB_COLOR4_TILE:
	case R_0280D4_CB_COLOR5_TILE:
	case R_0280D8_CB_COLOR6_TILE:
	case R_0280DC_CB_COLOR7_TILE:
		tmp = (reg - R_0280C0_CB_COLOR0_TILE) / 4;
		if (!r600_cs_packet_next_is_pkt3_nop(p)) {
			if (!track->cb_color_base_last[tmp]) {
				dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
				return -EINVAL;
			}
			track->cb_color_tile_bo[tmp] = track->cb_color_bo[tmp];
1416
1417
			track->cb_color_tile_offset[tmp] = track->cb_color_bo_offset[tmp];
			ib[idx] = track->cb_color_base_last[tmp];
1418
1419
1420
1421
1422
1423
1424
		} else {
			r = r600_cs_packet_next_reloc(p, &reloc);
			if (r) {
				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
				return -EINVAL;
			}
			track->cb_color_tile_bo[tmp] = reloc->robj;
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
			track->cb_color_tile_offset[tmp] = (u64)ib[idx] << 8;
			ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
		}
		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
			track->cb_dirty = true;
		}
		break;
	case R_028100_CB_COLOR0_MASK:
	case R_028104_CB_COLOR1_MASK:
	case R_028108_CB_COLOR2_MASK:
	case R_02810C_CB_COLOR3_MASK:
	case R_028110_CB_COLOR4_MASK:
	case R_028114_CB_COLOR5_MASK:
	case R_028118_CB_COLOR6_MASK:
	case R_02811C_CB_COLOR7_MASK:
		tmp = (reg - R_028100_CB_COLOR0_MASK) / 4;
1441
		track->cb_color_mask[tmp] = radeon_get_ib_value(p, idx);
1442
1443
		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
			track->cb_dirty = true;
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
		}
		break;
	case CB_COLOR0_BASE:
	case CB_COLOR1_BASE:
	case CB_COLOR2_BASE:
	case CB_COLOR3_BASE:
	case CB_COLOR4_BASE:
	case CB_COLOR5_BASE:
	case CB_COLOR6_BASE:
	case CB_COLOR7_BASE:
		r = r600_cs_packet_next_reloc(p, &reloc);
		if (r) {
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
					"0x%04X\n", reg);
			return -EINVAL;
		}
1460
		tmp = (reg - CB_COLOR0_BASE) / 4;
1461
		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1462
1463
1464
		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
		track->cb_color_base_last[tmp] = ib[idx];
		track->cb_color_bo[tmp] = reloc->robj;
1465
		track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset;
1466
		track->cb_dirty = true;
1467
1468
1469
1470
1471
1472
1473
1474
		break;
	case DB_DEPTH_BASE:
		r = r600_cs_packet_next_reloc(p, &reloc);
		if (r) {
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
					"0x%04X\n", reg);
			return -EINVAL;
		}
1475
		track->db_offset = radeon_get_ib_value(p, idx) << 8;
1476
1477
		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
		track->db_bo = reloc->robj;
1478
		track->db_bo_mc = reloc->lobj.gpu_offset;
1479
		track->db_dirty = true;
1480
1481
		break;
	case DB_HTILE_DATA_BASE:
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
		r = r600_cs_packet_next_reloc(p, &reloc);
		if (r) {
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
					"0x%04X\n", r