i915_gem.c 110 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/*
 * Copyright © 2008 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *
 */

28
29
#include <drm/drmP.h>
#include <drm/i915_drm.h>
30
#include "i915_drv.h"
Chris Wilson's avatar
Chris Wilson committed
31
#include "i915_trace.h"
32
#include "intel_drv.h"
33
#include <linux/shmem_fs.h>
34
#include <linux/slab.h>
35
#include <linux/swap.h>
36
#include <linux/pci.h>
37
#include <linux/dma-buf.h>
38

39
40
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
41
42
static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
						    unsigned alignment,
43
44
						    bool map_and_fenceable,
						    bool nonblocking);
45
46
static int i915_gem_phys_pwrite(struct drm_device *dev,
				struct drm_i915_gem_object *obj,
47
				struct drm_i915_gem_pwrite *args,
48
				struct drm_file *file);
49

50
51
52
53
54
55
static void i915_gem_write_fence(struct drm_device *dev, int reg,
				 struct drm_i915_gem_object *obj);
static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
					 struct drm_i915_fence_reg *fence,
					 bool enable);

56
static int i915_gem_inactive_shrink(struct shrinker *shrinker,
57
				    struct shrink_control *sc);
Chris Wilson's avatar
Chris Wilson committed
58
59
static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
60
static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
61

62
63
64
65
66
67
68
69
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{
	if (obj->tiling_mode)
		i915_gem_release_mmap(obj);

	/* As we do not have an associated fence register, we will force
	 * a tiling change if we ever need to acquire one.
	 */
70
	obj->fence_dirty = false;
71
72
73
	obj->fence_reg = I915_FENCE_REG_NONE;
}

74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/* some bookkeeping */
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
				  size_t size)
{
	dev_priv->mm.object_count++;
	dev_priv->mm.object_memory += size;
}

static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
				     size_t size)
{
	dev_priv->mm.object_count--;
	dev_priv->mm.object_memory -= size;
}

89
90
static int
i915_gem_wait_for_error(struct drm_device *dev)
91
92
93
94
95
96
97
98
99
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct completion *x = &dev_priv->error_completion;
	unsigned long flags;
	int ret;

	if (!atomic_read(&dev_priv->mm.wedged))
		return 0;

100
101
102
103
104
105
106
107
108
109
	/*
	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
	 * userspace. If it takes that long something really bad is going on and
	 * we should simply try to bail out and fail as gracefully as possible.
	 */
	ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
	if (ret == 0) {
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
		return -EIO;
	} else if (ret < 0) {
110
		return ret;
111
	}
112

113
114
115
116
117
118
119
120
121
122
123
	if (atomic_read(&dev_priv->mm.wedged)) {
		/* GPU is hung, bump the completion count to account for
		 * the token we just consumed so that we never hit zero and
		 * end up waiting upon a subsequent completion event that
		 * will never happen.
		 */
		spin_lock_irqsave(&x->wait.lock, flags);
		x->done++;
		spin_unlock_irqrestore(&x->wait.lock, flags);
	}
	return 0;
124
125
}

126
int i915_mutex_lock_interruptible(struct drm_device *dev)
127
128
129
{
	int ret;

130
	ret = i915_gem_wait_for_error(dev);
131
132
133
134
135
136
137
	if (ret)
		return ret;

	ret = mutex_lock_interruptible(&dev->struct_mutex);
	if (ret)
		return ret;

138
	WARN_ON(i915_verify_lists(dev));
139
140
	return 0;
}
141

142
static inline bool
143
i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
144
{
Chris Wilson's avatar
Chris Wilson committed
145
	return obj->gtt_space && !obj->active;
146
147
}

148
149
int
i915_gem_init_ioctl(struct drm_device *dev, void *data,
150
		    struct drm_file *file)
151
152
{
	struct drm_i915_gem_init *args = data;
153

154
155
156
	if (drm_core_check_feature(dev, DRIVER_MODESET))
		return -ENODEV;

157
158
159
	if (args->gtt_start >= args->gtt_end ||
	    (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
		return -EINVAL;
160

161
162
163
164
	/* GEM with user mode setting was never supported on ilk and later. */
	if (INTEL_INFO(dev)->gen >= 5)
		return -ENODEV;

165
	mutex_lock(&dev->struct_mutex);
166
167
	i915_gem_init_global_gtt(dev, args->gtt_start,
				 args->gtt_end, args->gtt_end);
168
169
	mutex_unlock(&dev->struct_mutex);

170
	return 0;
171
172
}

173
174
int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
175
			    struct drm_file *file)
176
{
177
	struct drm_i915_private *dev_priv = dev->dev_private;
178
	struct drm_i915_gem_get_aperture *args = data;
179
180
	struct drm_i915_gem_object *obj;
	size_t pinned;
181

182
	pinned = 0;
183
	mutex_lock(&dev->struct_mutex);
Chris Wilson's avatar
Chris Wilson committed
184
	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
185
186
		if (obj->pin_count)
			pinned += obj->gtt_space->size;
187
	mutex_unlock(&dev->struct_mutex);
188

189
	args->aper_size = dev_priv->mm.gtt_total;
190
	args->aper_available_size = args->aper_size - pinned;
191

192
193
194
	return 0;
}

195
196
197
198
199
static int
i915_gem_create(struct drm_file *file,
		struct drm_device *dev,
		uint64_t size,
		uint32_t *handle_p)
200
{
201
	struct drm_i915_gem_object *obj;
202
203
	int ret;
	u32 handle;
204

205
	size = roundup(size, PAGE_SIZE);
206
207
	if (size == 0)
		return -EINVAL;
208
209

	/* Allocate the new object */
210
	obj = i915_gem_alloc_object(dev, size);
211
212
213
	if (obj == NULL)
		return -ENOMEM;

214
	ret = drm_gem_handle_create(file, &obj->base, &handle);
215
	if (ret) {
216
217
		drm_gem_object_release(&obj->base);
		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
218
		kfree(obj);
219
		return ret;
220
	}
221

222
	/* drop reference from allocate - handle holds it now */
223
	drm_gem_object_unreference(&obj->base);
224
225
	trace_i915_gem_object_create(obj);

226
	*handle_p = handle;
227
228
229
	return 0;
}

230
231
232
233
234
235
int
i915_gem_dumb_create(struct drm_file *file,
		     struct drm_device *dev,
		     struct drm_mode_create_dumb *args)
{
	/* have to work out size/pitch and return them */
236
	args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
	args->size = args->pitch * args->height;
	return i915_gem_create(file, dev,
			       args->size, &args->handle);
}

int i915_gem_dumb_destroy(struct drm_file *file,
			  struct drm_device *dev,
			  uint32_t handle)
{
	return drm_gem_handle_delete(file, handle);
}

/**
 * Creates a new mm object and returns a handle to it.
 */
int
i915_gem_create_ioctl(struct drm_device *dev, void *data,
		      struct drm_file *file)
{
	struct drm_i915_gem_create *args = data;
257

258
259
260
261
	return i915_gem_create(file, dev,
			       args->size, &args->handle);
}

262
static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
263
{
264
	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
265
266

	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
267
		obj->tiling_mode != I915_TILING_NONE;
268
269
}

270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
static inline int
__copy_to_user_swizzled(char __user *cpu_vaddr,
			const char *gpu_vaddr, int gpu_offset,
			int length)
{
	int ret, cpu_offset = 0;

	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		ret = __copy_to_user(cpu_vaddr + cpu_offset,
				     gpu_vaddr + swizzled_gpu_offset,
				     this_length);
		if (ret)
			return ret + length;

		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

	return 0;
}

296
static inline int
297
298
__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
			  const char __user *cpu_vaddr,
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
			  int length)
{
	int ret, cpu_offset = 0;

	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
				       cpu_vaddr + cpu_offset,
				       this_length);
		if (ret)
			return ret + length;

		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

	return 0;
}

322
323
324
/* Per-page copy function for the shmem pread fastpath.
 * Flushes invalid cachelines before reading the target if
 * needs_clflush is set. */
325
static int
326
327
328
329
330
331
332
shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
		 char __user *user_data,
		 bool page_do_bit17_swizzling, bool needs_clflush)
{
	char *vaddr;
	int ret;

333
	if (unlikely(page_do_bit17_swizzling))
334
335
336
337
338
339
340
341
342
343
344
		return -EINVAL;

	vaddr = kmap_atomic(page);
	if (needs_clflush)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	ret = __copy_to_user_inatomic(user_data,
				      vaddr + shmem_page_offset,
				      page_length);
	kunmap_atomic(vaddr);

345
	return ret ? -EFAULT : 0;
346
347
}

348
349
350
351
static void
shmem_clflush_swizzled_range(char *addr, unsigned long length,
			     bool swizzled)
{
352
	if (unlikely(swizzled)) {
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
		unsigned long start = (unsigned long) addr;
		unsigned long end = (unsigned long) addr + length;

		/* For swizzling simply ensure that we always flush both
		 * channels. Lame, but simple and it works. Swizzled
		 * pwrite/pread is far from a hotpath - current userspace
		 * doesn't use it at all. */
		start = round_down(start, 128);
		end = round_up(end, 128);

		drm_clflush_virt_range((void *)start, end - start);
	} else {
		drm_clflush_virt_range(addr, length);
	}

}

370
371
372
373
374
375
376
377
378
379
380
381
/* Only difference to the fast-path function is that this can handle bit17
 * and uses non-atomic copy and kmap functions. */
static int
shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
		 char __user *user_data,
		 bool page_do_bit17_swizzling, bool needs_clflush)
{
	char *vaddr;
	int ret;

	vaddr = kmap(page);
	if (needs_clflush)
382
383
384
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
385
386
387
388
389
390
391
392
393
394
395

	if (page_do_bit17_swizzling)
		ret = __copy_to_user_swizzled(user_data,
					      vaddr, shmem_page_offset,
					      page_length);
	else
		ret = __copy_to_user(user_data,
				     vaddr + shmem_page_offset,
				     page_length);
	kunmap(page);

396
	return ret ? - EFAULT : 0;
397
398
}

399
static int
400
401
402
403
i915_gem_shmem_pread(struct drm_device *dev,
		     struct drm_i915_gem_object *obj,
		     struct drm_i915_gem_pread *args,
		     struct drm_file *file)
404
{
405
	char __user *user_data;
406
	ssize_t remain;
407
	loff_t offset;
408
	int shmem_page_offset, page_length, ret = 0;
409
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
410
	int hit_slowpath = 0;
411
	int prefaulted = 0;
412
	int needs_clflush = 0;
413
414
	struct scatterlist *sg;
	int i;
415

416
	user_data = (char __user *) (uintptr_t) args->data_ptr;
417
418
	remain = args->size;

419
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
420

421
422
423
424
425
426
427
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
		/* If we're not in the cpu read domain, set ourself into the gtt
		 * read domain and manually flush cachelines (if required). This
		 * optimizes for the case when the gpu will dirty the data
		 * anyway again before the next pread happens. */
		if (obj->cache_level == I915_CACHE_NONE)
			needs_clflush = 1;
Chris Wilson's avatar
Chris Wilson committed
428
429
430
431
432
		if (obj->gtt_space) {
			ret = i915_gem_object_set_to_gtt_domain(obj, false);
			if (ret)
				return ret;
		}
433
	}
434

435
436
437
438
439
440
	ret = i915_gem_object_get_pages(obj);
	if (ret)
		return ret;

	i915_gem_object_pin_pages(obj);

441
	offset = args->offset;
442

443
	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
444
445
		struct page *page;

446
447
448
449
450
451
		if (i < offset >> PAGE_SHIFT)
			continue;

		if (remain <= 0)
			break;

452
453
454
455
456
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * page_length = bytes to copy for this page
		 */
457
		shmem_page_offset = offset_in_page(offset);
458
459
460
461
		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;

462
		page = sg_page(sg);
463
464
465
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
			(page_to_phys(page) & (1 << 17)) != 0;

466
467
468
469
470
		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
				       user_data, page_do_bit17_swizzling,
				       needs_clflush);
		if (ret == 0)
			goto next_page;
471
472
473
474

		hit_slowpath = 1;
		mutex_unlock(&dev->struct_mutex);

475
		if (!prefaulted) {
476
			ret = fault_in_multipages_writeable(user_data, remain);
477
478
479
480
481
482
483
			/* Userspace is tricking us, but we've already clobbered
			 * its pages with the prefault and promised to write the
			 * data up to the first fault. Hence ignore any errors
			 * and just continue. */
			(void)ret;
			prefaulted = 1;
		}
484

485
486
487
		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
				       user_data, page_do_bit17_swizzling,
				       needs_clflush);
488

489
		mutex_lock(&dev->struct_mutex);
490

491
next_page:
492
493
		mark_page_accessed(page);

494
		if (ret)
495
496
			goto out;

497
		remain -= page_length;
498
		user_data += page_length;
499
500
501
		offset += page_length;
	}

502
out:
503
504
	i915_gem_object_unpin_pages(obj);

505
506
507
508
509
	if (hit_slowpath) {
		/* Fixup: Kill any reinstated backing storage pages */
		if (obj->madv == __I915_MADV_PURGED)
			i915_gem_object_truncate(obj);
	}
510
511
512
513

	return ret;
}

514
515
516
517
518
519
520
/**
 * Reads data from the object referenced by handle.
 *
 * On error, the contents of *data are undefined.
 */
int
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
521
		     struct drm_file *file)
522
523
{
	struct drm_i915_gem_pread *args = data;
524
	struct drm_i915_gem_object *obj;
525
	int ret = 0;
526

527
528
529
530
531
532
533
534
	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_WRITE,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size))
		return -EFAULT;

535
	ret = i915_mutex_lock_interruptible(dev);
536
	if (ret)
537
		return ret;
538

539
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
540
	if (&obj->base == NULL) {
541
542
		ret = -ENOENT;
		goto unlock;
543
	}
544

545
	/* Bounds check source.  */
546
547
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
548
		ret = -EINVAL;
549
		goto out;
550
551
	}

552
553
554
555
556
557
558
559
	/* prime objects have no backing filp to GEM pread/pwrite
	 * pages from.
	 */
	if (!obj->base.filp) {
		ret = -EINVAL;
		goto out;
	}

Chris Wilson's avatar
Chris Wilson committed
560
561
	trace_i915_gem_object_pread(obj, args->offset, args->size);

562
	ret = i915_gem_shmem_pread(dev, obj, args, file);
563

564
out:
565
	drm_gem_object_unreference(&obj->base);
566
unlock:
567
	mutex_unlock(&dev->struct_mutex);
568
	return ret;
569
570
}

571
572
/* This is the fast write path which cannot handle
 * page faults in the source data
573
 */
574
575
576
577
578
579

static inline int
fast_user_write(struct io_mapping *mapping,
		loff_t page_base, int page_offset,
		char __user *user_data,
		int length)
580
{
581
582
	void __iomem *vaddr_atomic;
	void *vaddr;
583
	unsigned long unwritten;
584

Peter Zijlstra's avatar
Peter Zijlstra committed
585
	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
586
587
588
	/* We can use the cpu mem copy function because this is X86. */
	vaddr = (void __force*)vaddr_atomic + page_offset;
	unwritten = __copy_from_user_inatomic_nocache(vaddr,
589
						      user_data, length);
Peter Zijlstra's avatar
Peter Zijlstra committed
590
	io_mapping_unmap_atomic(vaddr_atomic);
591
	return unwritten;
592
593
}

594
595
596
597
/**
 * This is the fast pwrite path, where we copy the data directly from the
 * user into the GTT, uncached.
 */
598
static int
599
600
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
			 struct drm_i915_gem_object *obj,
601
			 struct drm_i915_gem_pwrite *args,
602
			 struct drm_file *file)
603
{
604
	drm_i915_private_t *dev_priv = dev->dev_private;
605
	ssize_t remain;
606
	loff_t offset, page_base;
607
	char __user *user_data;
Daniel Vetter's avatar
Daniel Vetter committed
608
609
	int page_offset, page_length, ret;

610
	ret = i915_gem_object_pin(obj, 0, true, true);
Daniel Vetter's avatar
Daniel Vetter committed
611
612
613
614
615
616
617
618
619
620
	if (ret)
		goto out;

	ret = i915_gem_object_set_to_gtt_domain(obj, true);
	if (ret)
		goto out_unpin;

	ret = i915_gem_object_put_fence(obj);
	if (ret)
		goto out_unpin;
621
622
623
624

	user_data = (char __user *) (uintptr_t) args->data_ptr;
	remain = args->size;

625
	offset = obj->gtt_offset + args->offset;
626
627
628
629

	while (remain > 0) {
		/* Operation in this page
		 *
630
631
632
		 * page_base = page offset within aperture
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
633
		 */
634
635
		page_base = offset & PAGE_MASK;
		page_offset = offset_in_page(offset);
636
637
638
639
640
		page_length = remain;
		if ((page_offset + remain) > PAGE_SIZE)
			page_length = PAGE_SIZE - page_offset;

		/* If we get a fault while copying data, then (presumably) our
641
642
		 * source page isn't available.  Return the error and we'll
		 * retry in the slow path.
643
		 */
644
		if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
Daniel Vetter's avatar
Daniel Vetter committed
645
646
647
648
				    page_offset, user_data, page_length)) {
			ret = -EFAULT;
			goto out_unpin;
		}
649

650
651
652
		remain -= page_length;
		user_data += page_length;
		offset += page_length;
653
654
	}

Daniel Vetter's avatar
Daniel Vetter committed
655
656
657
out_unpin:
	i915_gem_object_unpin(obj);
out:
658
	return ret;
659
660
}

661
662
663
664
/* Per-page copy function for the shmem pwrite fastpath.
 * Flushes invalid cachelines before writing to the target if
 * needs_clflush_before is set and flushes out any written cachelines after
 * writing if needs_clflush is set. */
665
static int
666
667
668
669
670
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
		  char __user *user_data,
		  bool page_do_bit17_swizzling,
		  bool needs_clflush_before,
		  bool needs_clflush_after)
671
{
672
	char *vaddr;
673
	int ret;
674

675
	if (unlikely(page_do_bit17_swizzling))
676
		return -EINVAL;
677

678
679
680
681
682
683
684
685
686
687
688
	vaddr = kmap_atomic(page);
	if (needs_clflush_before)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
						user_data,
						page_length);
	if (needs_clflush_after)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	kunmap_atomic(vaddr);
689

690
	return ret ? -EFAULT : 0;
691
692
}

693
694
/* Only difference to the fast-path function is that this can handle bit17
 * and uses non-atomic copy and kmap functions. */
695
static int
696
697
698
699
700
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
		  char __user *user_data,
		  bool page_do_bit17_swizzling,
		  bool needs_clflush_before,
		  bool needs_clflush_after)
701
{
702
703
	char *vaddr;
	int ret;
704

705
	vaddr = kmap(page);
706
	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
707
708
709
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
710
711
	if (page_do_bit17_swizzling)
		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
712
713
						user_data,
						page_length);
714
715
716
717
718
	else
		ret = __copy_from_user(vaddr + shmem_page_offset,
				       user_data,
				       page_length);
	if (needs_clflush_after)
719
720
721
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
722
	kunmap(page);
723

724
	return ret ? -EFAULT : 0;
725
726
727
}

static int
728
729
730
731
i915_gem_shmem_pwrite(struct drm_device *dev,
		      struct drm_i915_gem_object *obj,
		      struct drm_i915_gem_pwrite *args,
		      struct drm_file *file)
732
733
{
	ssize_t remain;
734
735
	loff_t offset;
	char __user *user_data;
736
	int shmem_page_offset, page_length, ret = 0;
737
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
738
	int hit_slowpath = 0;
739
740
	int needs_clflush_after = 0;
	int needs_clflush_before = 0;
741
742
	int i;
	struct scatterlist *sg;
743

744
	user_data = (char __user *) (uintptr_t) args->data_ptr;
745
746
	remain = args->size;

747
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
748

749
750
751
752
753
754
755
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
		/* If we're not in the cpu write domain, set ourself into the gtt
		 * write domain and manually flush cachelines (if required). This
		 * optimizes for the case when the gpu will use the data
		 * right away and we therefore have to clflush anyway. */
		if (obj->cache_level == I915_CACHE_NONE)
			needs_clflush_after = 1;
Chris Wilson's avatar
Chris Wilson committed
756
757
758
759
760
		if (obj->gtt_space) {
			ret = i915_gem_object_set_to_gtt_domain(obj, true);
			if (ret)
				return ret;
		}
761
762
763
764
765
766
767
	}
	/* Same trick applies for invalidate partially written cachelines before
	 * writing.  */
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
	    && obj->cache_level == I915_CACHE_NONE)
		needs_clflush_before = 1;

768
769
770
771
772
773
	ret = i915_gem_object_get_pages(obj);
	if (ret)
		return ret;

	i915_gem_object_pin_pages(obj);

774
	offset = args->offset;
775
	obj->dirty = 1;
776

777
	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
778
		struct page *page;
779
		int partial_cacheline_write;
780

781
782
783
784
785
786
		if (i < offset >> PAGE_SHIFT)
			continue;

		if (remain <= 0)
			break;

787
788
789
790
791
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * page_length = bytes to copy for this page
		 */
792
		shmem_page_offset = offset_in_page(offset);
793
794
795
796
797

		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;

798
799
800
801
802
803
804
		/* If we don't overwrite a cacheline completely we need to be
		 * careful to have up-to-date data by first clflushing. Don't
		 * overcomplicate things and flush the entire patch. */
		partial_cacheline_write = needs_clflush_before &&
			((shmem_page_offset | page_length)
				& (boot_cpu_data.x86_clflush_size - 1));

805
		page = sg_page(sg);
806
807
808
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
			(page_to_phys(page) & (1 << 17)) != 0;

809
810
811
812
813
814
		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
					user_data, page_do_bit17_swizzling,
					partial_cacheline_write,
					needs_clflush_after);
		if (ret == 0)
			goto next_page;
815
816
817

		hit_slowpath = 1;
		mutex_unlock(&dev->struct_mutex);
818
819
820
821
		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
					user_data, page_do_bit17_swizzling,
					partial_cacheline_write,
					needs_clflush_after);
822

823
		mutex_lock(&dev->struct_mutex);
824

825
next_page:
826
827
828
		set_page_dirty(page);
		mark_page_accessed(page);

829
		if (ret)
830
831
			goto out;

832
		remain -= page_length;
833
		user_data += page_length;
834
		offset += page_length;
835
836
	}

837
out:
838
839
	i915_gem_object_unpin_pages(obj);

840
841
842
843
844
845
846
847
848
849
	if (hit_slowpath) {
		/* Fixup: Kill any reinstated backing storage pages */
		if (obj->madv == __I915_MADV_PURGED)
			i915_gem_object_truncate(obj);
		/* and flush dirty cachelines in case the object isn't in the cpu write
		 * domain anymore. */
		if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
			i915_gem_clflush_object(obj);
			intel_gtt_chipset_flush();
		}
850
	}
851

852
853
854
	if (needs_clflush_after)
		intel_gtt_chipset_flush();

855
	return ret;
856
857
858
859
860
861
862
863
864
}

/**
 * Writes data to the object referenced by handle.
 *
 * On error, the contents of the buffer that were to be modified are undefined.
 */
int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
865
		      struct drm_file *file)
866
867
{
	struct drm_i915_gem_pwrite *args = data;
868
	struct drm_i915_gem_object *obj;
869
870
871
872
873
874
875
876
877
878
	int ret;

	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_READ,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size))
		return -EFAULT;

879
880
	ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
					   args->size);
881
882
	if (ret)
		return -EFAULT;
883

884
	ret = i915_mutex_lock_interruptible(dev);
885
	if (ret)
886
		return ret;
887

888
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
889
	if (&obj->base == NULL) {
890
891
		ret = -ENOENT;
		goto unlock;
892
	}
893

894
	/* Bounds check destination. */
895
896
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
897
		ret = -EINVAL;
898
		goto out;
899
900
	}

901
902
903
904
905
906
907
908
	/* prime objects have no backing filp to GEM pread/pwrite
	 * pages from.
	 */
	if (!obj->base.filp) {
		ret = -EINVAL;
		goto out;
	}

Chris Wilson's avatar
Chris Wilson committed
909
910
	trace_i915_gem_object_pwrite(obj, args->offset, args->size);

Daniel Vetter's avatar
Daniel Vetter committed
911
	ret = -EFAULT;
912
913
914
915
916
917
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
	 * it would end up going through the fenced access, and we'll get
	 * different detiling behavior between reading and writing.
	 * pread/pwrite currently are reading and writing from the CPU
	 * perspective, requiring manual detiling by the client.
	 */
918
	if (obj->phys_obj) {
919
		ret = i915_gem_phys_pwrite(dev, obj, args, file);
920
921
922
		goto out;
	}

923
	if (obj->cache_level == I915_CACHE_NONE &&
924
	    obj->tiling_mode == I915_TILING_NONE &&
925
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
926
		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
Daniel Vetter's avatar
Daniel Vetter committed
927
928
929
		/* Note that the gtt paths might fail with non-page-backed user
		 * pointers (e.g. gtt mappings when moving data between
		 * textures). Fallback to the shmem path in that case. */
930
	}
931

932
	if (ret == -EFAULT || ret == -ENOSPC)
Daniel Vetter's avatar
Daniel Vetter committed
933
		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
934

935
out:
936
	drm_gem_object_unreference(&obj->base);
937
unlock:
938
	mutex_unlock(&dev->struct_mutex);
939
940
941
	return ret;
}

942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
int
i915_gem_check_wedge(struct drm_i915_private *dev_priv,
		     bool interruptible)
{
	if (atomic_read(&dev_priv->mm.wedged)) {
		struct completion *x = &dev_priv->error_completion;
		bool recovery_complete;
		unsigned long flags;

		/* Give the error handler a chance to run. */
		spin_lock_irqsave(&x->wait.lock, flags);
		recovery_complete = x->done > 0;
		spin_unlock_irqrestore(&x->wait.lock, flags);

		/* Non-interruptible callers can't handle -EAGAIN, hence return
		 * -EIO unconditionally for these. */
		if (!interruptible)
			return -EIO;

		/* Recovery complete, but still wedged means reset failure. */
		if (recovery_complete)
			return -EIO;

		return -EAGAIN;
	}

	return 0;
}

/*
 * Compare seqno against outstanding lazy request. Emit a request if they are
 * equal.
 */
static int
i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
{
	int ret;

	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));

	ret = 0;
	if (seqno == ring->outstanding_lazy_request)
		ret = i915_add_request(ring, NULL, NULL);

	return ret;
}

/**
 * __wait_seqno - wait until execution of seqno has finished
 * @ring: the ring expected to report seqno
 * @seqno: duh!
 * @interruptible: do an interruptible wait (normally yes)
 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
 *
 * Returns 0 if the seqno was found within the alloted time. Else returns the
 * errno with remaining time filled in timeout argument.
 */
static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
			bool interruptible, struct timespec *timeout)
{
	drm_i915_private_t *dev_priv = ring->dev->dev_private;
	struct timespec before, now, wait_time={1,0};
	unsigned long timeout_jiffies;
	long end;
	bool wait_forever = true;
	int ret;

	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
		return 0;

	trace_i915_gem_request_wait_begin(ring, seqno);

	if (timeout != NULL) {
		wait_time = *timeout;
		wait_forever = false;
	}

	timeout_jiffies = timespec_to_jiffies(&wait_time);

	if (WARN_ON(!ring->irq_get(ring)))
		return -ENODEV;

	/* Record current time in case interrupted by signal, or wedged * */
	getrawmonotonic(&before);

#define EXIT_COND \
	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
	atomic_read(&dev_priv->mm.wedged))
	do {
		if (interruptible)
			end = wait_event_interruptible_timeout(ring->irq_queue,
							       EXIT_COND,
							       timeout_jiffies);
		else
			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
						 timeout_jiffies);

		ret = i915_gem_check_wedge(dev_priv, interruptible);
		if (ret)
			end = ret;
	} while (end == 0 && wait_forever);

	getrawmonotonic(&now);

	ring->irq_put(ring);
	trace_i915_gem_request_wait_end(ring, seqno);
#undef EXIT_COND

	if (timeout) {
		struct timespec sleep_time = timespec_sub(now, before);
		*timeout = timespec_sub(*timeout, sleep_time);
	}

	switch (end) {
	case -EIO:
	case -EAGAIN: /* Wedged */
	case -ERESTARTSYS: /* Signal */
		return (int)end;
	case 0: /* Timeout */
		if (timeout)
			set_normalized_timespec(timeout, 0, 0);
		return -ETIME;
	default: /* Completed */
		WARN_ON(end < 0); /* We're not aware of other errors */
		return 0;
	}
}

/**
 * Waits for a sequence number to be signaled, and cleans up the
 * request and object lists appropriately for that event.
 */
int
i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
{
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	bool interruptible = dev_priv->mm.interruptible;
	int ret;

	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
	BUG_ON(seqno == 0);

	ret = i915_gem_check_wedge(dev_priv, interruptible);
	if (ret)
		return ret;

	ret = i915_gem_check_olr(ring, seqno);
	if (ret)
		return ret;

	return __wait_seqno(ring, seqno, interruptible, NULL);
}

/**
 * Ensures that all rendering to the object has completed and the object is
 * safe to unbind from the GTT or access from the CPU.
 */
static __must_check int
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
			       bool readonly)
{
	struct intel_ring_buffer *ring = obj->ring;
	u32 seqno;
	int ret;

	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
	if (seqno == 0)
		return 0;

	ret = i915_wait_seqno(ring, seqno);
	if (ret)
		return ret;

	i915_gem_retire_requests_ring(ring);

	/* Manually manage the write flush as we may have not yet
	 * retired the buffer.
	 */
	if (obj->last_write_seqno &&
	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
		obj->last_write_seqno = 0;
		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
	}

	return 0;
}

1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
/* A nonblocking variant of the above wait. This is a highly dangerous routine
 * as the object state may change during this call.
 */
static __must_check int
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
					    bool readonly)
{
	struct drm_device *dev = obj->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct intel_ring_buffer *ring = obj->ring;
	u32 seqno;
	int ret;

	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
	BUG_ON(!dev_priv->mm.interruptible);

	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
	if (seqno == 0)
		return 0;

	ret = i915_gem_check_wedge(dev_priv, true);
	if (ret)
		return ret;

	ret = i915_gem_check_olr(ring, seqno);
	if (ret)
		return ret;

	mutex_unlock(&dev->struct_mutex);
	ret = __wait_seqno(ring, seqno, true, NULL);
	mutex_lock(&dev->struct_mutex);

	i915_gem_retire_requests_ring(ring);

	/* Manually manage the write flush as we may have not yet
	 * retired the buffer.
	 */
	if (obj->last_write_seqno &&
	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
		obj->last_write_seqno = 0;
		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
	}

	return ret;
}

1176
/**
1177
1178
 * Called when user space prepares to use an object with the CPU, either
 * through the mmap ioctl's mapping or a GTT mapping.
1179
1180
1181
 */
int
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1182
			  struct drm_file *file)
1183
1184
{
	struct drm_i915_gem_set_domain *args = data;
1185
	struct drm_i915_gem_object *obj;
1186
1187
	uint32_t read_domains = args->read_domains;
	uint32_t write_domain = args->write_domain;
1188
1189
	int ret;

1190
	/* Only handle setting domains to types used by the CPU. */
1191
	if (write_domain & I915_GEM_GPU_DOMAINS)
1192
1193
		return -EINVAL;

1194
	if (read_domains & I915_GEM_GPU_DOMAINS)
1195
1196
1197
1198
1199
1200
1201
1202
		return -EINVAL;

	/* Having something in the write domain implies it's in the read
	 * domain, and only that read domain.  Enforce that in the request.
	 */
	if (write_domain != 0 && read_domains != write_domain)
		return -EINVAL;

1203
	ret = i915_mutex_lock_interruptible(dev);
1204
	if (ret)
1205
		return ret;