i915_gem.c 110 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/*
 * Copyright © 2008 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *
 */

#include "drmP.h"
#include "drm.h"
#include "i915_drm.h"
#include "i915_drv.h"
Chris Wilson's avatar
Chris Wilson committed
32
#include "i915_trace.h"
33
#include "intel_drv.h"
34
#include <linux/shmem_fs.h>
35
#include <linux/slab.h>
36
#include <linux/swap.h>
37
#include <linux/pci.h>
38
#include <linux/dma-buf.h>
39

40
41
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
42
43
static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
						    unsigned alignment,
44
45
						    bool map_and_fenceable,
						    bool nonblocking);
46
47
static int i915_gem_phys_pwrite(struct drm_device *dev,
				struct drm_i915_gem_object *obj,
48
				struct drm_i915_gem_pwrite *args,
49
				struct drm_file *file);
50

51
52
53
54
55
56
static void i915_gem_write_fence(struct drm_device *dev, int reg,
				 struct drm_i915_gem_object *obj);
static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
					 struct drm_i915_fence_reg *fence,
					 bool enable);

57
static int i915_gem_inactive_shrink(struct shrinker *shrinker,
58
				    struct shrink_control *sc);
Chris Wilson's avatar
Chris Wilson committed
59
60
static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
61
static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
62

63
64
65
66
67
68
69
70
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{
	if (obj->tiling_mode)
		i915_gem_release_mmap(obj);

	/* As we do not have an associated fence register, we will force
	 * a tiling change if we ever need to acquire one.
	 */
71
	obj->fence_dirty = false;
72
73
74
	obj->fence_reg = I915_FENCE_REG_NONE;
}

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/* some bookkeeping */
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
				  size_t size)
{
	dev_priv->mm.object_count++;
	dev_priv->mm.object_memory += size;
}

static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
				     size_t size)
{
	dev_priv->mm.object_count--;
	dev_priv->mm.object_memory -= size;
}

90
91
static int
i915_gem_wait_for_error(struct drm_device *dev)
92
93
94
95
96
97
98
99
100
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct completion *x = &dev_priv->error_completion;
	unsigned long flags;
	int ret;

	if (!atomic_read(&dev_priv->mm.wedged))
		return 0;

101
102
103
104
105
106
107
108
109
110
	/*
	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
	 * userspace. If it takes that long something really bad is going on and
	 * we should simply try to bail out and fail as gracefully as possible.
	 */
	ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
	if (ret == 0) {
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
		return -EIO;
	} else if (ret < 0) {
111
		return ret;
112
	}
113

114
115
116
117
118
119
120
121
122
123
124
	if (atomic_read(&dev_priv->mm.wedged)) {
		/* GPU is hung, bump the completion count to account for
		 * the token we just consumed so that we never hit zero and
		 * end up waiting upon a subsequent completion event that
		 * will never happen.
		 */
		spin_lock_irqsave(&x->wait.lock, flags);
		x->done++;
		spin_unlock_irqrestore(&x->wait.lock, flags);
	}
	return 0;
125
126
}

127
int i915_mutex_lock_interruptible(struct drm_device *dev)
128
129
130
{
	int ret;

131
	ret = i915_gem_wait_for_error(dev);
132
133
134
135
136
137
138
	if (ret)
		return ret;

	ret = mutex_lock_interruptible(&dev->struct_mutex);
	if (ret)
		return ret;

139
	WARN_ON(i915_verify_lists(dev));
140
141
	return 0;
}
142

143
static inline bool
144
i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
145
{
Chris Wilson's avatar
Chris Wilson committed
146
	return obj->gtt_space && !obj->active;
147
148
}

149
150
int
i915_gem_init_ioctl(struct drm_device *dev, void *data,
151
		    struct drm_file *file)
152
153
{
	struct drm_i915_gem_init *args = data;
154

155
156
157
	if (drm_core_check_feature(dev, DRIVER_MODESET))
		return -ENODEV;

158
159
160
	if (args->gtt_start >= args->gtt_end ||
	    (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
		return -EINVAL;
161

162
163
164
165
	/* GEM with user mode setting was never supported on ilk and later. */
	if (INTEL_INFO(dev)->gen >= 5)
		return -ENODEV;

166
	mutex_lock(&dev->struct_mutex);
167
168
	i915_gem_init_global_gtt(dev, args->gtt_start,
				 args->gtt_end, args->gtt_end);
169
170
	mutex_unlock(&dev->struct_mutex);

171
	return 0;
172
173
}

174
175
int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
176
			    struct drm_file *file)
177
{
178
	struct drm_i915_private *dev_priv = dev->dev_private;
179
	struct drm_i915_gem_get_aperture *args = data;
180
181
	struct drm_i915_gem_object *obj;
	size_t pinned;
182

183
	pinned = 0;
184
	mutex_lock(&dev->struct_mutex);
Chris Wilson's avatar
Chris Wilson committed
185
	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
186
187
		if (obj->pin_count)
			pinned += obj->gtt_space->size;
188
	mutex_unlock(&dev->struct_mutex);
189

190
	args->aper_size = dev_priv->mm.gtt_total;
191
	args->aper_available_size = args->aper_size - pinned;
192

193
194
195
	return 0;
}

196
197
198
199
200
static int
i915_gem_create(struct drm_file *file,
		struct drm_device *dev,
		uint64_t size,
		uint32_t *handle_p)
201
{
202
	struct drm_i915_gem_object *obj;
203
204
	int ret;
	u32 handle;
205

206
	size = roundup(size, PAGE_SIZE);
207
208
	if (size == 0)
		return -EINVAL;
209
210

	/* Allocate the new object */
211
	obj = i915_gem_alloc_object(dev, size);
212
213
214
	if (obj == NULL)
		return -ENOMEM;

215
	ret = drm_gem_handle_create(file, &obj->base, &handle);
216
	if (ret) {
217
218
		drm_gem_object_release(&obj->base);
		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
219
		kfree(obj);
220
		return ret;
221
	}
222

223
	/* drop reference from allocate - handle holds it now */
224
	drm_gem_object_unreference(&obj->base);
225
226
	trace_i915_gem_object_create(obj);

227
	*handle_p = handle;
228
229
230
	return 0;
}

231
232
233
234
235
236
int
i915_gem_dumb_create(struct drm_file *file,
		     struct drm_device *dev,
		     struct drm_mode_create_dumb *args)
{
	/* have to work out size/pitch and return them */
237
	args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
	args->size = args->pitch * args->height;
	return i915_gem_create(file, dev,
			       args->size, &args->handle);
}

int i915_gem_dumb_destroy(struct drm_file *file,
			  struct drm_device *dev,
			  uint32_t handle)
{
	return drm_gem_handle_delete(file, handle);
}

/**
 * Creates a new mm object and returns a handle to it.
 */
int
i915_gem_create_ioctl(struct drm_device *dev, void *data,
		      struct drm_file *file)
{
	struct drm_i915_gem_create *args = data;
258

259
260
261
262
	return i915_gem_create(file, dev,
			       args->size, &args->handle);
}

263
static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
264
{
265
	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
266
267

	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
268
		obj->tiling_mode != I915_TILING_NONE;
269
270
}

271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
static inline int
__copy_to_user_swizzled(char __user *cpu_vaddr,
			const char *gpu_vaddr, int gpu_offset,
			int length)
{
	int ret, cpu_offset = 0;

	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		ret = __copy_to_user(cpu_vaddr + cpu_offset,
				     gpu_vaddr + swizzled_gpu_offset,
				     this_length);
		if (ret)
			return ret + length;

		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

	return 0;
}

297
static inline int
298
299
__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
			  const char __user *cpu_vaddr,
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
			  int length)
{
	int ret, cpu_offset = 0;

	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
				       cpu_vaddr + cpu_offset,
				       this_length);
		if (ret)
			return ret + length;

		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

	return 0;
}

323
324
325
/* Per-page copy function for the shmem pread fastpath.
 * Flushes invalid cachelines before reading the target if
 * needs_clflush is set. */
326
static int
327
328
329
330
331
332
333
shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
		 char __user *user_data,
		 bool page_do_bit17_swizzling, bool needs_clflush)
{
	char *vaddr;
	int ret;

334
	if (unlikely(page_do_bit17_swizzling))
335
336
337
338
339
340
341
342
343
344
345
		return -EINVAL;

	vaddr = kmap_atomic(page);
	if (needs_clflush)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	ret = __copy_to_user_inatomic(user_data,
				      vaddr + shmem_page_offset,
				      page_length);
	kunmap_atomic(vaddr);

346
	return ret ? -EFAULT : 0;
347
348
}

349
350
351
352
static void
shmem_clflush_swizzled_range(char *addr, unsigned long length,
			     bool swizzled)
{
353
	if (unlikely(swizzled)) {
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
		unsigned long start = (unsigned long) addr;
		unsigned long end = (unsigned long) addr + length;

		/* For swizzling simply ensure that we always flush both
		 * channels. Lame, but simple and it works. Swizzled
		 * pwrite/pread is far from a hotpath - current userspace
		 * doesn't use it at all. */
		start = round_down(start, 128);
		end = round_up(end, 128);

		drm_clflush_virt_range((void *)start, end - start);
	} else {
		drm_clflush_virt_range(addr, length);
	}

}

371
372
373
374
375
376
377
378
379
380
381
382
/* Only difference to the fast-path function is that this can handle bit17
 * and uses non-atomic copy and kmap functions. */
static int
shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
		 char __user *user_data,
		 bool page_do_bit17_swizzling, bool needs_clflush)
{
	char *vaddr;
	int ret;

	vaddr = kmap(page);
	if (needs_clflush)
383
384
385
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
386
387
388
389
390
391
392
393
394
395
396

	if (page_do_bit17_swizzling)
		ret = __copy_to_user_swizzled(user_data,
					      vaddr, shmem_page_offset,
					      page_length);
	else
		ret = __copy_to_user(user_data,
				     vaddr + shmem_page_offset,
				     page_length);
	kunmap(page);

397
	return ret ? - EFAULT : 0;
398
399
}

400
static int
401
402
403
404
i915_gem_shmem_pread(struct drm_device *dev,
		     struct drm_i915_gem_object *obj,
		     struct drm_i915_gem_pread *args,
		     struct drm_file *file)
405
{
406
	char __user *user_data;
407
	ssize_t remain;
408
	loff_t offset;
409
	int shmem_page_offset, page_length, ret = 0;
410
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
411
	int hit_slowpath = 0;
412
	int prefaulted = 0;
413
	int needs_clflush = 0;
414
415
	struct scatterlist *sg;
	int i;
416

417
	user_data = (char __user *) (uintptr_t) args->data_ptr;
418
419
	remain = args->size;

420
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
421

422
423
424
425
426
427
428
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
		/* If we're not in the cpu read domain, set ourself into the gtt
		 * read domain and manually flush cachelines (if required). This
		 * optimizes for the case when the gpu will dirty the data
		 * anyway again before the next pread happens. */
		if (obj->cache_level == I915_CACHE_NONE)
			needs_clflush = 1;
Chris Wilson's avatar
Chris Wilson committed
429
430
431
432
433
		if (obj->gtt_space) {
			ret = i915_gem_object_set_to_gtt_domain(obj, false);
			if (ret)
				return ret;
		}
434
	}
435

436
437
438
439
440
441
	ret = i915_gem_object_get_pages(obj);
	if (ret)
		return ret;

	i915_gem_object_pin_pages(obj);

442
	offset = args->offset;
443

444
	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
445
446
		struct page *page;

447
448
449
450
451
452
		if (i < offset >> PAGE_SHIFT)
			continue;

		if (remain <= 0)
			break;

453
454
455
456
457
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * page_length = bytes to copy for this page
		 */
458
		shmem_page_offset = offset_in_page(offset);
459
460
461
462
		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;

463
		page = sg_page(sg);
464
465
466
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
			(page_to_phys(page) & (1 << 17)) != 0;

467
468
469
470
471
		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
				       user_data, page_do_bit17_swizzling,
				       needs_clflush);
		if (ret == 0)
			goto next_page;
472
473
474
475

		hit_slowpath = 1;
		mutex_unlock(&dev->struct_mutex);

476
		if (!prefaulted) {
477
			ret = fault_in_multipages_writeable(user_data, remain);
478
479
480
481
482
483
484
			/* Userspace is tricking us, but we've already clobbered
			 * its pages with the prefault and promised to write the
			 * data up to the first fault. Hence ignore any errors
			 * and just continue. */
			(void)ret;
			prefaulted = 1;
		}
485

486
487
488
		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
				       user_data, page_do_bit17_swizzling,
				       needs_clflush);
489

490
		mutex_lock(&dev->struct_mutex);
491

492
next_page:
493
494
		mark_page_accessed(page);

495
		if (ret)
496
497
			goto out;

498
		remain -= page_length;
499
		user_data += page_length;
500
501
502
		offset += page_length;
	}

503
out:
504
505
	i915_gem_object_unpin_pages(obj);

506
507
508
509
510
	if (hit_slowpath) {
		/* Fixup: Kill any reinstated backing storage pages */
		if (obj->madv == __I915_MADV_PURGED)
			i915_gem_object_truncate(obj);
	}
511
512
513
514

	return ret;
}

515
516
517
518
519
520
521
/**
 * Reads data from the object referenced by handle.
 *
 * On error, the contents of *data are undefined.
 */
int
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
522
		     struct drm_file *file)
523
524
{
	struct drm_i915_gem_pread *args = data;
525
	struct drm_i915_gem_object *obj;
526
	int ret = 0;
527

528
529
530
531
532
533
534
535
	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_WRITE,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size))
		return -EFAULT;

536
	ret = i915_mutex_lock_interruptible(dev);
537
	if (ret)
538
		return ret;
539

540
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
541
	if (&obj->base == NULL) {
542
543
		ret = -ENOENT;
		goto unlock;
544
	}
545

546
	/* Bounds check source.  */
547
548
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
549
		ret = -EINVAL;
550
		goto out;
551
552
	}

553
554
555
556
557
558
559
560
	/* prime objects have no backing filp to GEM pread/pwrite
	 * pages from.
	 */
	if (!obj->base.filp) {
		ret = -EINVAL;
		goto out;
	}

Chris Wilson's avatar
Chris Wilson committed
561
562
	trace_i915_gem_object_pread(obj, args->offset, args->size);

563
	ret = i915_gem_shmem_pread(dev, obj, args, file);
564

565
out:
566
	drm_gem_object_unreference(&obj->base);
567
unlock:
568
	mutex_unlock(&dev->struct_mutex);
569
	return ret;
570
571
}

572
573
/* This is the fast write path which cannot handle
 * page faults in the source data
574
 */
575
576
577
578
579
580

static inline int
fast_user_write(struct io_mapping *mapping,
		loff_t page_base, int page_offset,
		char __user *user_data,
		int length)
581
{
582
583
	void __iomem *vaddr_atomic;
	void *vaddr;
584
	unsigned long unwritten;
585

Peter Zijlstra's avatar
Peter Zijlstra committed
586
	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
587
588
589
	/* We can use the cpu mem copy function because this is X86. */
	vaddr = (void __force*)vaddr_atomic + page_offset;
	unwritten = __copy_from_user_inatomic_nocache(vaddr,
590
						      user_data, length);
Peter Zijlstra's avatar
Peter Zijlstra committed
591
	io_mapping_unmap_atomic(vaddr_atomic);
592
	return unwritten;
593
594
}

595
596
597
598
/**
 * This is the fast pwrite path, where we copy the data directly from the
 * user into the GTT, uncached.
 */
599
static int
600
601
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
			 struct drm_i915_gem_object *obj,
602
			 struct drm_i915_gem_pwrite *args,
603
			 struct drm_file *file)
604
{
605
	drm_i915_private_t *dev_priv = dev->dev_private;
606
	ssize_t remain;
607
	loff_t offset, page_base;
608
	char __user *user_data;
Daniel Vetter's avatar
Daniel Vetter committed
609
610
	int page_offset, page_length, ret;

611
	ret = i915_gem_object_pin(obj, 0, true, true);
Daniel Vetter's avatar
Daniel Vetter committed
612
613
614
615
616
617
618
619
620
621
	if (ret)
		goto out;

	ret = i915_gem_object_set_to_gtt_domain(obj, true);
	if (ret)
		goto out_unpin;

	ret = i915_gem_object_put_fence(obj);
	if (ret)
		goto out_unpin;
622
623
624
625

	user_data = (char __user *) (uintptr_t) args->data_ptr;
	remain = args->size;

626
	offset = obj->gtt_offset + args->offset;
627
628
629
630

	while (remain > 0) {
		/* Operation in this page
		 *
631
632
633
		 * page_base = page offset within aperture
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
634
		 */
635
636
		page_base = offset & PAGE_MASK;
		page_offset = offset_in_page(offset);
637
638
639
640
641
		page_length = remain;
		if ((page_offset + remain) > PAGE_SIZE)
			page_length = PAGE_SIZE - page_offset;

		/* If we get a fault while copying data, then (presumably) our
642
643
		 * source page isn't available.  Return the error and we'll
		 * retry in the slow path.
644
		 */
645
		if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
Daniel Vetter's avatar
Daniel Vetter committed
646
647
648
649
				    page_offset, user_data, page_length)) {
			ret = -EFAULT;
			goto out_unpin;
		}
650

651
652
653
		remain -= page_length;
		user_data += page_length;
		offset += page_length;
654
655
	}

Daniel Vetter's avatar
Daniel Vetter committed
656
657
658
out_unpin:
	i915_gem_object_unpin(obj);
out:
659
	return ret;
660
661
}

662
663
664
665
/* Per-page copy function for the shmem pwrite fastpath.
 * Flushes invalid cachelines before writing to the target if
 * needs_clflush_before is set and flushes out any written cachelines after
 * writing if needs_clflush is set. */
666
static int
667
668
669
670
671
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
		  char __user *user_data,
		  bool page_do_bit17_swizzling,
		  bool needs_clflush_before,
		  bool needs_clflush_after)
672
{
673
	char *vaddr;
674
	int ret;
675

676
	if (unlikely(page_do_bit17_swizzling))
677
		return -EINVAL;
678

679
680
681
682
683
684
685
686
687
688
689
	vaddr = kmap_atomic(page);
	if (needs_clflush_before)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
						user_data,
						page_length);
	if (needs_clflush_after)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	kunmap_atomic(vaddr);
690

691
	return ret ? -EFAULT : 0;
692
693
}

694
695
/* Only difference to the fast-path function is that this can handle bit17
 * and uses non-atomic copy and kmap functions. */
696
static int
697
698
699
700
701
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
		  char __user *user_data,
		  bool page_do_bit17_swizzling,
		  bool needs_clflush_before,
		  bool needs_clflush_after)
702
{
703
704
	char *vaddr;
	int ret;
705

706
	vaddr = kmap(page);
707
	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
708
709
710
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
711
712
	if (page_do_bit17_swizzling)
		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
713
714
						user_data,
						page_length);
715
716
717
718
719
	else
		ret = __copy_from_user(vaddr + shmem_page_offset,
				       user_data,
				       page_length);
	if (needs_clflush_after)
720
721
722
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
723
	kunmap(page);
724

725
	return ret ? -EFAULT : 0;
726
727
728
}

static int
729
730
731
732
i915_gem_shmem_pwrite(struct drm_device *dev,
		      struct drm_i915_gem_object *obj,
		      struct drm_i915_gem_pwrite *args,
		      struct drm_file *file)
733
734
{
	ssize_t remain;
735
736
	loff_t offset;
	char __user *user_data;
737
	int shmem_page_offset, page_length, ret = 0;
738
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
739
	int hit_slowpath = 0;
740
741
	int needs_clflush_after = 0;
	int needs_clflush_before = 0;
742
743
	int i;
	struct scatterlist *sg;
744

745
	user_data = (char __user *) (uintptr_t) args->data_ptr;
746
747
	remain = args->size;

748
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
749

750
751
752
753
754
755
756
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
		/* If we're not in the cpu write domain, set ourself into the gtt
		 * write domain and manually flush cachelines (if required). This
		 * optimizes for the case when the gpu will use the data
		 * right away and we therefore have to clflush anyway. */
		if (obj->cache_level == I915_CACHE_NONE)
			needs_clflush_after = 1;
Chris Wilson's avatar
Chris Wilson committed
757
758
759
760
761
		if (obj->gtt_space) {
			ret = i915_gem_object_set_to_gtt_domain(obj, true);
			if (ret)
				return ret;
		}
762
763
764
765
766
767
768
	}
	/* Same trick applies for invalidate partially written cachelines before
	 * writing.  */
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
	    && obj->cache_level == I915_CACHE_NONE)
		needs_clflush_before = 1;

769
770
771
772
773
774
	ret = i915_gem_object_get_pages(obj);
	if (ret)
		return ret;

	i915_gem_object_pin_pages(obj);

775
	offset = args->offset;
776
	obj->dirty = 1;
777

778
	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
779
		struct page *page;
780
		int partial_cacheline_write;
781

782
783
784
785
786
787
		if (i < offset >> PAGE_SHIFT)
			continue;

		if (remain <= 0)
			break;

788
789
790
791
792
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * page_length = bytes to copy for this page
		 */
793
		shmem_page_offset = offset_in_page(offset);
794
795
796
797
798

		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;

799
800
801
802
803
804
805
		/* If we don't overwrite a cacheline completely we need to be
		 * careful to have up-to-date data by first clflushing. Don't
		 * overcomplicate things and flush the entire patch. */
		partial_cacheline_write = needs_clflush_before &&
			((shmem_page_offset | page_length)
				& (boot_cpu_data.x86_clflush_size - 1));

806
		page = sg_page(sg);
807
808
809
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
			(page_to_phys(page) & (1 << 17)) != 0;

810
811
812
813
814
815
		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
					user_data, page_do_bit17_swizzling,
					partial_cacheline_write,
					needs_clflush_after);
		if (ret == 0)
			goto next_page;
816
817
818

		hit_slowpath = 1;
		mutex_unlock(&dev->struct_mutex);
819
820
821
822
		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
					user_data, page_do_bit17_swizzling,
					partial_cacheline_write,
					needs_clflush_after);
823

824
		mutex_lock(&dev->struct_mutex);
825

826
next_page:
827
828
829
		set_page_dirty(page);
		mark_page_accessed(page);

830
		if (ret)
831
832
			goto out;

833
		remain -= page_length;
834
		user_data += page_length;
835
		offset += page_length;
836
837
	}

838
out:
839
840
	i915_gem_object_unpin_pages(obj);

841
842
843
844
845
846
847
848
849
850
	if (hit_slowpath) {
		/* Fixup: Kill any reinstated backing storage pages */
		if (obj->madv == __I915_MADV_PURGED)
			i915_gem_object_truncate(obj);
		/* and flush dirty cachelines in case the object isn't in the cpu write
		 * domain anymore. */
		if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
			i915_gem_clflush_object(obj);
			intel_gtt_chipset_flush();
		}
851
	}
852

853
854
855
	if (needs_clflush_after)
		intel_gtt_chipset_flush();

856
	return ret;
857
858
859
860
861
862
863
864
865
}

/**
 * Writes data to the object referenced by handle.
 *
 * On error, the contents of the buffer that were to be modified are undefined.
 */
int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
866
		      struct drm_file *file)
867
868
{
	struct drm_i915_gem_pwrite *args = data;
869
	struct drm_i915_gem_object *obj;
870
871
872
873
874
875
876
877
878
879
	int ret;

	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_READ,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size))
		return -EFAULT;

880
881
	ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
					   args->size);
882
883
	if (ret)
		return -EFAULT;
884

885
	ret = i915_mutex_lock_interruptible(dev);
886
	if (ret)
887
		return ret;
888

889
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
890
	if (&obj->base == NULL) {
891
892
		ret = -ENOENT;
		goto unlock;
893
	}
894

895
	/* Bounds check destination. */
896
897
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
898
		ret = -EINVAL;
899
		goto out;
900
901
	}

902
903
904
905
906
907
908
909
	/* prime objects have no backing filp to GEM pread/pwrite
	 * pages from.
	 */
	if (!obj->base.filp) {
		ret = -EINVAL;
		goto out;
	}

Chris Wilson's avatar
Chris Wilson committed
910
911
	trace_i915_gem_object_pwrite(obj, args->offset, args->size);

Daniel Vetter's avatar
Daniel Vetter committed
912
	ret = -EFAULT;
913
914
915
916
917
918
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
	 * it would end up going through the fenced access, and we'll get
	 * different detiling behavior between reading and writing.
	 * pread/pwrite currently are reading and writing from the CPU
	 * perspective, requiring manual detiling by the client.
	 */
919
	if (obj->phys_obj) {
920
		ret = i915_gem_phys_pwrite(dev, obj, args, file);
921
922
923
		goto out;
	}

924
	if (obj->cache_level == I915_CACHE_NONE &&
925
	    obj->tiling_mode == I915_TILING_NONE &&
926
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
927
		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
Daniel Vetter's avatar
Daniel Vetter committed
928
929
930
		/* Note that the gtt paths might fail with non-page-backed user
		 * pointers (e.g. gtt mappings when moving data between
		 * textures). Fallback to the shmem path in that case. */
931
	}
932

933
	if (ret == -EFAULT || ret == -ENOSPC)
Daniel Vetter's avatar
Daniel Vetter committed
934
		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
935

936
out:
937
	drm_gem_object_unreference(&obj->base);
938
unlock:
939
	mutex_unlock(&dev->struct_mutex);
940
941
942
	return ret;
}

943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
int
i915_gem_check_wedge(struct drm_i915_private *dev_priv,
		     bool interruptible)
{
	if (atomic_read(&dev_priv->mm.wedged)) {
		struct completion *x = &dev_priv->error_completion;
		bool recovery_complete;
		unsigned long flags;

		/* Give the error handler a chance to run. */
		spin_lock_irqsave(&x->wait.lock, flags);
		recovery_complete = x->done > 0;
		spin_unlock_irqrestore(&x->wait.lock, flags);

		/* Non-interruptible callers can't handle -EAGAIN, hence return
		 * -EIO unconditionally for these. */
		if (!interruptible)
			return -EIO;

		/* Recovery complete, but still wedged means reset failure. */
		if (recovery_complete)
			return -EIO;

		return -EAGAIN;
	}

	return 0;
}

/*
 * Compare seqno against outstanding lazy request. Emit a request if they are
 * equal.
 */
static int
i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
{
	int ret;

	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));

	ret = 0;
	if (seqno == ring->outstanding_lazy_request)
		ret = i915_add_request(ring, NULL, NULL);

	return ret;
}

/**
 * __wait_seqno - wait until execution of seqno has finished
 * @ring: the ring expected to report seqno
 * @seqno: duh!
 * @interruptible: do an interruptible wait (normally yes)
 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
 *
 * Returns 0 if the seqno was found within the alloted time. Else returns the
 * errno with remaining time filled in timeout argument.
 */
static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
			bool interruptible, struct timespec *timeout)
{
	drm_i915_private_t *dev_priv = ring->dev->dev_private;
	struct timespec before, now, wait_time={1,0};
	unsigned long timeout_jiffies;
	long end;
	bool wait_forever = true;
	int ret;

	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
		return 0;

	trace_i915_gem_request_wait_begin(ring, seqno);

	if (timeout != NULL) {
		wait_time = *timeout;
		wait_forever = false;
	}

	timeout_jiffies = timespec_to_jiffies(&wait_time);

	if (WARN_ON(!ring->irq_get(ring)))
		return -ENODEV;

	/* Record current time in case interrupted by signal, or wedged * */
	getrawmonotonic(&before);

#define EXIT_COND \
	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
	atomic_read(&dev_priv->mm.wedged))
	do {
		if (interruptible)
			end = wait_event_interruptible_timeout(ring->irq_queue,
							       EXIT_COND,
							       timeout_jiffies);
		else
			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
						 timeout_jiffies);

		ret = i915_gem_check_wedge(dev_priv, interruptible);
		if (ret)
			end = ret;
	} while (end == 0 && wait_forever);

	getrawmonotonic(&now);

	ring->irq_put(ring);
	trace_i915_gem_request_wait_end(ring, seqno);
#undef EXIT_COND

	if (timeout) {
		struct timespec sleep_time = timespec_sub(now, before);
		*timeout = timespec_sub(*timeout, sleep_time);
	}

	switch (end) {
	case -EIO:
	case -EAGAIN: /* Wedged */
	case -ERESTARTSYS: /* Signal */
		return (int)end;
	case 0: /* Timeout */
		if (timeout)
			set_normalized_timespec(timeout, 0, 0);
		return -ETIME;
	default: /* Completed */
		WARN_ON(end < 0); /* We're not aware of other errors */
		return 0;
	}
}

/**
 * Waits for a sequence number to be signaled, and cleans up the
 * request and object lists appropriately for that event.
 */
int
i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
{
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	bool interruptible = dev_priv->mm.interruptible;
	int ret;

	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
	BUG_ON(seqno == 0);

	ret = i915_gem_check_wedge(dev_priv, interruptible);
	if (ret)
		return ret;

	ret = i915_gem_check_olr(ring, seqno);
	if (ret)
		return ret;

	return __wait_seqno(ring, seqno, interruptible, NULL);
}

/**
 * Ensures that all rendering to the object has completed and the object is
 * safe to unbind from the GTT or access from the CPU.
 */
static __must_check int
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
			       bool readonly)
{
	struct intel_ring_buffer *ring = obj->ring;
	u32 seqno;
	int ret;

	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
	if (seqno == 0)
		return 0;

	ret = i915_wait_seqno(ring, seqno);
	if (ret)
		return ret;

	i915_gem_retire_requests_ring(ring);

	/* Manually manage the write flush as we may have not yet
	 * retired the buffer.
	 */
	if (obj->last_write_seqno &&
	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
		obj->last_write_seqno = 0;
		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
	}

	return 0;
}

1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
/* A nonblocking variant of the above wait. This is a highly dangerous routine
 * as the object state may change during this call.
 */
static __must_check int
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
					    bool readonly)
{
	struct drm_device *dev = obj->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct intel_ring_buffer *ring = obj->ring;
	u32 seqno;
	int ret;

	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
	BUG_ON(!dev_priv->mm.interruptible);

	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
	if (seqno == 0)
		return 0;

	ret = i915_gem_check_wedge(dev_priv, true);
	if (ret)
		return ret;

	ret = i915_gem_check_olr(ring, seqno);
	if (ret)
		return ret;

	mutex_unlock(&dev->struct_mutex);
	ret = __wait_seqno(ring, seqno, true, NULL);
	mutex_lock(&dev->struct_mutex);

	i915_gem_retire_requests_ring(ring);

	/* Manually manage the write flush as we may have not yet
	 * retired the buffer.
	 */
	if (obj->last_write_seqno &&
	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
		obj->last_write_seqno = 0;
		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
	}

	return ret;
}

1177
/**
1178
1179
 * Called when user space prepares to use an object with the CPU, either
 * through the mmap ioctl's mapping or a GTT mapping.
1180
1181
1182
 */
int
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1183
			  struct drm_file *file)
1184
1185
{
	struct drm_i915_gem_set_domain *args = data;
1186
	struct drm_i915_gem_object *obj;
1187
1188
	uint32_t read_domains = args->read_domains;
	uint32_t write_domain = args->write_domain;
1189
1190
	int ret;

1191
	/* Only handle setting domains to types used by the CPU. */
1192
	if (write_domain & I915_GEM_GPU_DOMAINS)
1193
1194
		return -EINVAL;

1195
	if (read_domains & I915_GEM_GPU_DOMAINS)
1196
1197
1198
1199
1200
1201
1202
1203
		return -EINVAL;

	/* Having something in the write domain implies it's in the read
	 * domain, and only that read domain.  Enforce that in the request.
	 */
	if (write_domain != 0 && read_domains != write_domain)
		return -EINVAL;

1204
	ret = i915_mutex_lock_interruptible(dev);
1205
	if (ret)
1206
		return ret;
1207

1208
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));