i915_gem.c 110 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/*
 * Copyright © 2008 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *
 */

#include "drmP.h"
#include "drm.h"
#include "i915_drm.h"
#include "i915_drv.h"
Chris Wilson's avatar
Chris Wilson committed
32
#include "i915_trace.h"
33
#include "intel_drv.h"
34
#include <linux/shmem_fs.h>
35
#include <linux/slab.h>
36
#include <linux/swap.h>
37
#include <linux/pci.h>
38
#include <linux/dma-buf.h>
39

40
41
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
42
43
static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
						    unsigned alignment,
44
45
						    bool map_and_fenceable,
						    bool nonblocking);
46
47
static int i915_gem_phys_pwrite(struct drm_device *dev,
				struct drm_i915_gem_object *obj,
48
				struct drm_i915_gem_pwrite *args,
49
				struct drm_file *file);
50

51
52
53
54
55
56
static void i915_gem_write_fence(struct drm_device *dev, int reg,
				 struct drm_i915_gem_object *obj);
static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
					 struct drm_i915_fence_reg *fence,
					 bool enable);

57
static int i915_gem_inactive_shrink(struct shrinker *shrinker,
58
				    struct shrink_control *sc);
Chris Wilson's avatar
Chris Wilson committed
59
60
static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
61
static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
62

63
64
65
66
67
68
69
70
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{
	if (obj->tiling_mode)
		i915_gem_release_mmap(obj);

	/* As we do not have an associated fence register, we will force
	 * a tiling change if we ever need to acquire one.
	 */
71
	obj->fence_dirty = false;
72
73
74
	obj->fence_reg = I915_FENCE_REG_NONE;
}

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/* some bookkeeping */
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
				  size_t size)
{
	dev_priv->mm.object_count++;
	dev_priv->mm.object_memory += size;
}

static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
				     size_t size)
{
	dev_priv->mm.object_count--;
	dev_priv->mm.object_memory -= size;
}

90
91
static int
i915_gem_wait_for_error(struct drm_device *dev)
92
93
94
95
96
97
98
99
100
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct completion *x = &dev_priv->error_completion;
	unsigned long flags;
	int ret;

	if (!atomic_read(&dev_priv->mm.wedged))
		return 0;

101
102
103
104
105
106
107
108
109
110
	/*
	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
	 * userspace. If it takes that long something really bad is going on and
	 * we should simply try to bail out and fail as gracefully as possible.
	 */
	ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
	if (ret == 0) {
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
		return -EIO;
	} else if (ret < 0) {
111
		return ret;
112
	}
113

114
115
116
117
118
119
120
121
122
123
124
	if (atomic_read(&dev_priv->mm.wedged)) {
		/* GPU is hung, bump the completion count to account for
		 * the token we just consumed so that we never hit zero and
		 * end up waiting upon a subsequent completion event that
		 * will never happen.
		 */
		spin_lock_irqsave(&x->wait.lock, flags);
		x->done++;
		spin_unlock_irqrestore(&x->wait.lock, flags);
	}
	return 0;
125
126
}

127
int i915_mutex_lock_interruptible(struct drm_device *dev)
128
129
130
{
	int ret;

131
	ret = i915_gem_wait_for_error(dev);
132
133
134
135
136
137
138
	if (ret)
		return ret;

	ret = mutex_lock_interruptible(&dev->struct_mutex);
	if (ret)
		return ret;

139
	WARN_ON(i915_verify_lists(dev));
140
141
	return 0;
}
142

143
static inline bool
144
i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
145
{
Chris Wilson's avatar
Chris Wilson committed
146
	return obj->gtt_space && !obj->active;
147
148
}

149
150
int
i915_gem_init_ioctl(struct drm_device *dev, void *data,
151
		    struct drm_file *file)
152
153
{
	struct drm_i915_gem_init *args = data;
154

155
156
157
	if (drm_core_check_feature(dev, DRIVER_MODESET))
		return -ENODEV;

158
159
160
	if (args->gtt_start >= args->gtt_end ||
	    (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
		return -EINVAL;
161

162
163
164
165
	/* GEM with user mode setting was never supported on ilk and later. */
	if (INTEL_INFO(dev)->gen >= 5)
		return -ENODEV;

166
	mutex_lock(&dev->struct_mutex);
167
168
	i915_gem_init_global_gtt(dev, args->gtt_start,
				 args->gtt_end, args->gtt_end);
169
170
	mutex_unlock(&dev->struct_mutex);

171
	return 0;
172
173
}

174
175
int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
176
			    struct drm_file *file)
177
{
178
	struct drm_i915_private *dev_priv = dev->dev_private;
179
	struct drm_i915_gem_get_aperture *args = data;
180
181
	struct drm_i915_gem_object *obj;
	size_t pinned;
182

183
	pinned = 0;
184
	mutex_lock(&dev->struct_mutex);
Chris Wilson's avatar
Chris Wilson committed
185
	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
186
187
		if (obj->pin_count)
			pinned += obj->gtt_space->size;
188
	mutex_unlock(&dev->struct_mutex);
189

190
	args->aper_size = dev_priv->mm.gtt_total;
191
	args->aper_available_size = args->aper_size - pinned;
192

193
194
195
	return 0;
}

196
197
198
199
200
static int
i915_gem_create(struct drm_file *file,
		struct drm_device *dev,
		uint64_t size,
		uint32_t *handle_p)
201
{
202
	struct drm_i915_gem_object *obj;
203
204
	int ret;
	u32 handle;
205

206
	size = roundup(size, PAGE_SIZE);
207
208
	if (size == 0)
		return -EINVAL;
209
210

	/* Allocate the new object */
211
	obj = i915_gem_alloc_object(dev, size);
212
213
214
	if (obj == NULL)
		return -ENOMEM;

215
	ret = drm_gem_handle_create(file, &obj->base, &handle);
216
	if (ret) {
217
218
		drm_gem_object_release(&obj->base);
		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
219
		kfree(obj);
220
		return ret;
221
	}
222

223
	/* drop reference from allocate - handle holds it now */
224
	drm_gem_object_unreference(&obj->base);
225
226
	trace_i915_gem_object_create(obj);

227
	*handle_p = handle;
228
229
230
	return 0;
}

231
232
233
234
235
236
int
i915_gem_dumb_create(struct drm_file *file,
		     struct drm_device *dev,
		     struct drm_mode_create_dumb *args)
{
	/* have to work out size/pitch and return them */
237
	args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
	args->size = args->pitch * args->height;
	return i915_gem_create(file, dev,
			       args->size, &args->handle);
}

int i915_gem_dumb_destroy(struct drm_file *file,
			  struct drm_device *dev,
			  uint32_t handle)
{
	return drm_gem_handle_delete(file, handle);
}

/**
 * Creates a new mm object and returns a handle to it.
 */
int
i915_gem_create_ioctl(struct drm_device *dev, void *data,
		      struct drm_file *file)
{
	struct drm_i915_gem_create *args = data;
258

259
260
261
262
	return i915_gem_create(file, dev,
			       args->size, &args->handle);
}

263
static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
264
{
265
	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
266
267

	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
268
		obj->tiling_mode != I915_TILING_NONE;
269
270
}

271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
static inline int
__copy_to_user_swizzled(char __user *cpu_vaddr,
			const char *gpu_vaddr, int gpu_offset,
			int length)
{
	int ret, cpu_offset = 0;

	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		ret = __copy_to_user(cpu_vaddr + cpu_offset,
				     gpu_vaddr + swizzled_gpu_offset,
				     this_length);
		if (ret)
			return ret + length;

		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

	return 0;
}

297
static inline int
298
299
__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
			  const char __user *cpu_vaddr,
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
			  int length)
{
	int ret, cpu_offset = 0;

	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
				       cpu_vaddr + cpu_offset,
				       this_length);
		if (ret)
			return ret + length;

		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

	return 0;
}

323
324
325
/* Per-page copy function for the shmem pread fastpath.
 * Flushes invalid cachelines before reading the target if
 * needs_clflush is set. */
326
static int
327
328
329
330
331
332
333
shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
		 char __user *user_data,
		 bool page_do_bit17_swizzling, bool needs_clflush)
{
	char *vaddr;
	int ret;

334
	if (unlikely(page_do_bit17_swizzling))
335
336
337
338
339
340
341
342
343
344
345
346
347
348
		return -EINVAL;

	vaddr = kmap_atomic(page);
	if (needs_clflush)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	ret = __copy_to_user_inatomic(user_data,
				      vaddr + shmem_page_offset,
				      page_length);
	kunmap_atomic(vaddr);

	return ret;
}

349
350
351
352
static void
shmem_clflush_swizzled_range(char *addr, unsigned long length,
			     bool swizzled)
{
353
	if (unlikely(swizzled)) {
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
		unsigned long start = (unsigned long) addr;
		unsigned long end = (unsigned long) addr + length;

		/* For swizzling simply ensure that we always flush both
		 * channels. Lame, but simple and it works. Swizzled
		 * pwrite/pread is far from a hotpath - current userspace
		 * doesn't use it at all. */
		start = round_down(start, 128);
		end = round_up(end, 128);

		drm_clflush_virt_range((void *)start, end - start);
	} else {
		drm_clflush_virt_range(addr, length);
	}

}

371
372
373
374
375
376
377
378
379
380
381
382
/* Only difference to the fast-path function is that this can handle bit17
 * and uses non-atomic copy and kmap functions. */
static int
shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
		 char __user *user_data,
		 bool page_do_bit17_swizzling, bool needs_clflush)
{
	char *vaddr;
	int ret;

	vaddr = kmap(page);
	if (needs_clflush)
383
384
385
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
386
387
388
389
390
391
392
393
394
395
396
397
398
399

	if (page_do_bit17_swizzling)
		ret = __copy_to_user_swizzled(user_data,
					      vaddr, shmem_page_offset,
					      page_length);
	else
		ret = __copy_to_user(user_data,
				     vaddr + shmem_page_offset,
				     page_length);
	kunmap(page);

	return ret;
}

400
static int
401
402
403
404
i915_gem_shmem_pread(struct drm_device *dev,
		     struct drm_i915_gem_object *obj,
		     struct drm_i915_gem_pread *args,
		     struct drm_file *file)
405
{
406
	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
407
	char __user *user_data;
408
	ssize_t remain;
409
	loff_t offset;
410
	int shmem_page_offset, page_length, ret = 0;
411
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
412
	int hit_slowpath = 0;
413
	int prefaulted = 0;
414
	int needs_clflush = 0;
415
	int release_page;
416

417
	user_data = (char __user *) (uintptr_t) args->data_ptr;
418
419
	remain = args->size;

420
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
421

422
423
424
425
426
427
428
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
		/* If we're not in the cpu read domain, set ourself into the gtt
		 * read domain and manually flush cachelines (if required). This
		 * optimizes for the case when the gpu will dirty the data
		 * anyway again before the next pread happens. */
		if (obj->cache_level == I915_CACHE_NONE)
			needs_clflush = 1;
Chris Wilson's avatar
Chris Wilson committed
429
430
431
432
433
		if (obj->gtt_space) {
			ret = i915_gem_object_set_to_gtt_domain(obj, false);
			if (ret)
				return ret;
		}
434
	}
435

436
	offset = args->offset;
437
438

	while (remain > 0) {
439
440
		struct page *page;

441
442
443
444
445
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * page_length = bytes to copy for this page
		 */
446
		shmem_page_offset = offset_in_page(offset);
447
448
449
450
		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;

451
452
453
454
455
456
457
458
459
460
		if (obj->pages) {
			page = obj->pages[offset >> PAGE_SHIFT];
			release_page = 0;
		} else {
			page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
			if (IS_ERR(page)) {
				ret = PTR_ERR(page);
				goto out;
			}
			release_page = 1;
461
		}
462

463
464
465
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
			(page_to_phys(page) & (1 << 17)) != 0;

466
467
468
469
470
		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
				       user_data, page_do_bit17_swizzling,
				       needs_clflush);
		if (ret == 0)
			goto next_page;
471
472

		hit_slowpath = 1;
473
		page_cache_get(page);
474
475
		mutex_unlock(&dev->struct_mutex);

476
		if (!prefaulted) {
477
			ret = fault_in_multipages_writeable(user_data, remain);
478
479
480
481
482
483
484
			/* Userspace is tricking us, but we've already clobbered
			 * its pages with the prefault and promised to write the
			 * data up to the first fault. Hence ignore any errors
			 * and just continue. */
			(void)ret;
			prefaulted = 1;
		}
485

486
487
488
		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
				       user_data, page_do_bit17_swizzling,
				       needs_clflush);
489

490
		mutex_lock(&dev->struct_mutex);
491
		page_cache_release(page);
492
next_page:
493
		mark_page_accessed(page);
494
495
		if (release_page)
			page_cache_release(page);
496

497
498
499
500
501
		if (ret) {
			ret = -EFAULT;
			goto out;
		}

502
		remain -= page_length;
503
		user_data += page_length;
504
505
506
		offset += page_length;
	}

507
out:
508
509
510
511
512
	if (hit_slowpath) {
		/* Fixup: Kill any reinstated backing storage pages */
		if (obj->madv == __I915_MADV_PURGED)
			i915_gem_object_truncate(obj);
	}
513
514
515
516

	return ret;
}

517
518
519
520
521
522
523
/**
 * Reads data from the object referenced by handle.
 *
 * On error, the contents of *data are undefined.
 */
int
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
524
		     struct drm_file *file)
525
526
{
	struct drm_i915_gem_pread *args = data;
527
	struct drm_i915_gem_object *obj;
528
	int ret = 0;
529

530
531
532
533
534
535
536
537
	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_WRITE,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size))
		return -EFAULT;

538
	ret = i915_mutex_lock_interruptible(dev);
539
	if (ret)
540
		return ret;
541

542
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
543
	if (&obj->base == NULL) {
544
545
		ret = -ENOENT;
		goto unlock;
546
	}
547

548
	/* Bounds check source.  */
549
550
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
551
		ret = -EINVAL;
552
		goto out;
553
554
	}

555
556
557
558
559
560
561
562
	/* prime objects have no backing filp to GEM pread/pwrite
	 * pages from.
	 */
	if (!obj->base.filp) {
		ret = -EINVAL;
		goto out;
	}

Chris Wilson's avatar
Chris Wilson committed
563
564
	trace_i915_gem_object_pread(obj, args->offset, args->size);

565
	ret = i915_gem_shmem_pread(dev, obj, args, file);
566

567
out:
568
	drm_gem_object_unreference(&obj->base);
569
unlock:
570
	mutex_unlock(&dev->struct_mutex);
571
	return ret;
572
573
}

574
575
/* This is the fast write path which cannot handle
 * page faults in the source data
576
 */
577
578
579
580
581
582

static inline int
fast_user_write(struct io_mapping *mapping,
		loff_t page_base, int page_offset,
		char __user *user_data,
		int length)
583
{
584
585
	void __iomem *vaddr_atomic;
	void *vaddr;
586
	unsigned long unwritten;
587

Peter Zijlstra's avatar
Peter Zijlstra committed
588
	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
589
590
591
	/* We can use the cpu mem copy function because this is X86. */
	vaddr = (void __force*)vaddr_atomic + page_offset;
	unwritten = __copy_from_user_inatomic_nocache(vaddr,
592
						      user_data, length);
Peter Zijlstra's avatar
Peter Zijlstra committed
593
	io_mapping_unmap_atomic(vaddr_atomic);
594
	return unwritten;
595
596
}

597
598
599
600
/**
 * This is the fast pwrite path, where we copy the data directly from the
 * user into the GTT, uncached.
 */
601
static int
602
603
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
			 struct drm_i915_gem_object *obj,
604
			 struct drm_i915_gem_pwrite *args,
605
			 struct drm_file *file)
606
{
607
	drm_i915_private_t *dev_priv = dev->dev_private;
608
	ssize_t remain;
609
	loff_t offset, page_base;
610
	char __user *user_data;
Daniel Vetter's avatar
Daniel Vetter committed
611
612
	int page_offset, page_length, ret;

613
	ret = i915_gem_object_pin(obj, 0, true, true);
Daniel Vetter's avatar
Daniel Vetter committed
614
615
616
617
618
619
620
621
622
623
	if (ret)
		goto out;

	ret = i915_gem_object_set_to_gtt_domain(obj, true);
	if (ret)
		goto out_unpin;

	ret = i915_gem_object_put_fence(obj);
	if (ret)
		goto out_unpin;
624
625
626
627

	user_data = (char __user *) (uintptr_t) args->data_ptr;
	remain = args->size;

628
	offset = obj->gtt_offset + args->offset;
629
630
631
632

	while (remain > 0) {
		/* Operation in this page
		 *
633
634
635
		 * page_base = page offset within aperture
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
636
		 */
637
638
		page_base = offset & PAGE_MASK;
		page_offset = offset_in_page(offset);
639
640
641
642
643
		page_length = remain;
		if ((page_offset + remain) > PAGE_SIZE)
			page_length = PAGE_SIZE - page_offset;

		/* If we get a fault while copying data, then (presumably) our
644
645
		 * source page isn't available.  Return the error and we'll
		 * retry in the slow path.
646
		 */
647
		if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
Daniel Vetter's avatar
Daniel Vetter committed
648
649
650
651
				    page_offset, user_data, page_length)) {
			ret = -EFAULT;
			goto out_unpin;
		}
652

653
654
655
		remain -= page_length;
		user_data += page_length;
		offset += page_length;
656
657
	}

Daniel Vetter's avatar
Daniel Vetter committed
658
659
660
out_unpin:
	i915_gem_object_unpin(obj);
out:
661
	return ret;
662
663
}

664
665
666
667
/* Per-page copy function for the shmem pwrite fastpath.
 * Flushes invalid cachelines before writing to the target if
 * needs_clflush_before is set and flushes out any written cachelines after
 * writing if needs_clflush is set. */
668
static int
669
670
671
672
673
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
		  char __user *user_data,
		  bool page_do_bit17_swizzling,
		  bool needs_clflush_before,
		  bool needs_clflush_after)
674
{
675
	char *vaddr;
676
	int ret;
677

678
	if (unlikely(page_do_bit17_swizzling))
679
		return -EINVAL;
680

681
682
683
684
685
686
687
688
689
690
691
	vaddr = kmap_atomic(page);
	if (needs_clflush_before)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
						user_data,
						page_length);
	if (needs_clflush_after)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	kunmap_atomic(vaddr);
692
693
694
695

	return ret;
}

696
697
/* Only difference to the fast-path function is that this can handle bit17
 * and uses non-atomic copy and kmap functions. */
698
static int
699
700
701
702
703
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
		  char __user *user_data,
		  bool page_do_bit17_swizzling,
		  bool needs_clflush_before,
		  bool needs_clflush_after)
704
{
705
706
	char *vaddr;
	int ret;
707

708
	vaddr = kmap(page);
709
	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
710
711
712
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
713
714
	if (page_do_bit17_swizzling)
		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
715
716
						user_data,
						page_length);
717
718
719
720
721
	else
		ret = __copy_from_user(vaddr + shmem_page_offset,
				       user_data,
				       page_length);
	if (needs_clflush_after)
722
723
724
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
725
	kunmap(page);
726

727
	return ret;
728
729
730
}

static int
731
732
733
734
i915_gem_shmem_pwrite(struct drm_device *dev,
		      struct drm_i915_gem_object *obj,
		      struct drm_i915_gem_pwrite *args,
		      struct drm_file *file)
735
{
736
	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
737
	ssize_t remain;
738
739
	loff_t offset;
	char __user *user_data;
740
	int shmem_page_offset, page_length, ret = 0;
741
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
742
	int hit_slowpath = 0;
743
744
	int needs_clflush_after = 0;
	int needs_clflush_before = 0;
745
	int release_page;
746

747
	user_data = (char __user *) (uintptr_t) args->data_ptr;
748
749
	remain = args->size;

750
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
751

752
753
754
755
756
757
758
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
		/* If we're not in the cpu write domain, set ourself into the gtt
		 * write domain and manually flush cachelines (if required). This
		 * optimizes for the case when the gpu will use the data
		 * right away and we therefore have to clflush anyway. */
		if (obj->cache_level == I915_CACHE_NONE)
			needs_clflush_after = 1;
Chris Wilson's avatar
Chris Wilson committed
759
760
761
762
763
		if (obj->gtt_space) {
			ret = i915_gem_object_set_to_gtt_domain(obj, true);
			if (ret)
				return ret;
		}
764
765
766
767
768
769
770
	}
	/* Same trick applies for invalidate partially written cachelines before
	 * writing.  */
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
	    && obj->cache_level == I915_CACHE_NONE)
		needs_clflush_before = 1;

771
	offset = args->offset;
772
	obj->dirty = 1;
773

774
	while (remain > 0) {
775
		struct page *page;
776
		int partial_cacheline_write;
777

778
779
780
781
782
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * page_length = bytes to copy for this page
		 */
783
		shmem_page_offset = offset_in_page(offset);
784
785
786
787
788

		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;

789
790
791
792
793
794
795
		/* If we don't overwrite a cacheline completely we need to be
		 * careful to have up-to-date data by first clflushing. Don't
		 * overcomplicate things and flush the entire patch. */
		partial_cacheline_write = needs_clflush_before &&
			((shmem_page_offset | page_length)
				& (boot_cpu_data.x86_clflush_size - 1));

796
797
798
799
800
801
802
803
804
805
		if (obj->pages) {
			page = obj->pages[offset >> PAGE_SHIFT];
			release_page = 0;
		} else {
			page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
			if (IS_ERR(page)) {
				ret = PTR_ERR(page);
				goto out;
			}
			release_page = 1;
806
807
		}

808
809
810
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
			(page_to_phys(page) & (1 << 17)) != 0;

811
812
813
814
815
816
		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
					user_data, page_do_bit17_swizzling,
					partial_cacheline_write,
					needs_clflush_after);
		if (ret == 0)
			goto next_page;
817
818

		hit_slowpath = 1;
819
		page_cache_get(page);
820
821
		mutex_unlock(&dev->struct_mutex);

822
823
824
825
		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
					user_data, page_do_bit17_swizzling,
					partial_cacheline_write,
					needs_clflush_after);
826

827
		mutex_lock(&dev->struct_mutex);
828
		page_cache_release(page);
829
next_page:
830
831
		set_page_dirty(page);
		mark_page_accessed(page);
832
833
		if (release_page)
			page_cache_release(page);
834

835
836
837
838
839
		if (ret) {
			ret = -EFAULT;
			goto out;
		}

840
		remain -= page_length;
841
		user_data += page_length;
842
		offset += page_length;
843
844
	}

845
out:
846
847
848
849
850
851
852
853
854
855
	if (hit_slowpath) {
		/* Fixup: Kill any reinstated backing storage pages */
		if (obj->madv == __I915_MADV_PURGED)
			i915_gem_object_truncate(obj);
		/* and flush dirty cachelines in case the object isn't in the cpu write
		 * domain anymore. */
		if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
			i915_gem_clflush_object(obj);
			intel_gtt_chipset_flush();
		}
856
	}
857

858
859
860
	if (needs_clflush_after)
		intel_gtt_chipset_flush();

861
	return ret;
862
863
864
865
866
867
868
869
870
}

/**
 * Writes data to the object referenced by handle.
 *
 * On error, the contents of the buffer that were to be modified are undefined.
 */
int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
871
		      struct drm_file *file)
872
873
{
	struct drm_i915_gem_pwrite *args = data;
874
	struct drm_i915_gem_object *obj;
875
876
877
878
879
880
881
882
883
884
	int ret;

	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_READ,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size))
		return -EFAULT;

885
886
	ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
					   args->size);
887
888
	if (ret)
		return -EFAULT;
889

890
	ret = i915_mutex_lock_interruptible(dev);
891
	if (ret)
892
		return ret;
893

894
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
895
	if (&obj->base == NULL) {
896
897
		ret = -ENOENT;
		goto unlock;
898
	}
899

900
	/* Bounds check destination. */
901
902
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
903
		ret = -EINVAL;
904
		goto out;
905
906
	}

907
908
909
910
911
912
913
914
	/* prime objects have no backing filp to GEM pread/pwrite
	 * pages from.
	 */
	if (!obj->base.filp) {
		ret = -EINVAL;
		goto out;
	}

Chris Wilson's avatar
Chris Wilson committed
915
916
	trace_i915_gem_object_pwrite(obj, args->offset, args->size);

Daniel Vetter's avatar
Daniel Vetter committed
917
	ret = -EFAULT;
918
919
920
921
922
923
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
	 * it would end up going through the fenced access, and we'll get
	 * different detiling behavior between reading and writing.
	 * pread/pwrite currently are reading and writing from the CPU
	 * perspective, requiring manual detiling by the client.
	 */
924
	if (obj->phys_obj) {
925
		ret = i915_gem_phys_pwrite(dev, obj, args, file);
926
927
928
		goto out;
	}

929
	if (obj->cache_level == I915_CACHE_NONE &&
930
	    obj->tiling_mode == I915_TILING_NONE &&
931
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
932
		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
Daniel Vetter's avatar
Daniel Vetter committed
933
934
935
		/* Note that the gtt paths might fail with non-page-backed user
		 * pointers (e.g. gtt mappings when moving data between
		 * textures). Fallback to the shmem path in that case. */
936
	}
937

938
	if (ret == -EFAULT || ret == -ENOSPC)
Daniel Vetter's avatar
Daniel Vetter committed
939
		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
940

941
out:
942
	drm_gem_object_unreference(&obj->base);
943
unlock:
944
	mutex_unlock(&dev->struct_mutex);
945
946
947
	return ret;
}

948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
int
i915_gem_check_wedge(struct drm_i915_private *dev_priv,
		     bool interruptible)
{
	if (atomic_read(&dev_priv->mm.wedged)) {
		struct completion *x = &dev_priv->error_completion;
		bool recovery_complete;
		unsigned long flags;

		/* Give the error handler a chance to run. */
		spin_lock_irqsave(&x->wait.lock, flags);
		recovery_complete = x->done > 0;
		spin_unlock_irqrestore(&x->wait.lock, flags);

		/* Non-interruptible callers can't handle -EAGAIN, hence return
		 * -EIO unconditionally for these. */
		if (!interruptible)
			return -EIO;

		/* Recovery complete, but still wedged means reset failure. */
		if (recovery_complete)
			return -EIO;

		return -EAGAIN;
	}

	return 0;
}

/*
 * Compare seqno against outstanding lazy request. Emit a request if they are
 * equal.
 */
static int
i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
{
	int ret;

	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));

	ret = 0;
	if (seqno == ring->outstanding_lazy_request)
		ret = i915_add_request(ring, NULL, NULL);

	return ret;
}

/**
 * __wait_seqno - wait until execution of seqno has finished
 * @ring: the ring expected to report seqno
 * @seqno: duh!
 * @interruptible: do an interruptible wait (normally yes)
 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
 *
 * Returns 0 if the seqno was found within the alloted time. Else returns the
 * errno with remaining time filled in timeout argument.
 */
static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
			bool interruptible, struct timespec *timeout)
{
	drm_i915_private_t *dev_priv = ring->dev->dev_private;
	struct timespec before, now, wait_time={1,0};
	unsigned long timeout_jiffies;
	long end;
	bool wait_forever = true;
	int ret;

	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
		return 0;

	trace_i915_gem_request_wait_begin(ring, seqno);

	if (timeout != NULL) {
		wait_time = *timeout;
		wait_forever = false;
	}

	timeout_jiffies = timespec_to_jiffies(&wait_time);

	if (WARN_ON(!ring->irq_get(ring)))
		return -ENODEV;

	/* Record current time in case interrupted by signal, or wedged * */
	getrawmonotonic(&before);

#define EXIT_COND \
	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
	atomic_read(&dev_priv->mm.wedged))
	do {
		if (interruptible)
			end = wait_event_interruptible_timeout(ring->irq_queue,
							       EXIT_COND,
							       timeout_jiffies);
		else
			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
						 timeout_jiffies);

		ret = i915_gem_check_wedge(dev_priv, interruptible);
		if (ret)
			end = ret;
	} while (end == 0 && wait_forever);

	getrawmonotonic(&now);

	ring->irq_put(ring);
	trace_i915_gem_request_wait_end(ring, seqno);
#undef EXIT_COND

	if (timeout) {
		struct timespec sleep_time = timespec_sub(now, before);
		*timeout = timespec_sub(*timeout, sleep_time);
	}

	switch (end) {
	case -EIO:
	case -EAGAIN: /* Wedged */
	case -ERESTARTSYS: /* Signal */
		return (int)end;
	case 0: /* Timeout */
		if (timeout)
			set_normalized_timespec(timeout, 0, 0);
		return -ETIME;
	default: /* Completed */
		WARN_ON(end < 0); /* We're not aware of other errors */
		return 0;
	}
}

/**
 * Waits for a sequence number to be signaled, and cleans up the
 * request and object lists appropriately for that event.
 */
int
i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
{
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	bool interruptible = dev_priv->mm.interruptible;
	int ret;

	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
	BUG_ON(seqno == 0);

	ret = i915_gem_check_wedge(dev_priv, interruptible);
	if (ret)
		return ret;

	ret = i915_gem_check_olr(ring, seqno);
	if (ret)
		return ret;

	return __wait_seqno(ring, seqno, interruptible, NULL);
}

/**
 * Ensures that all rendering to the object has completed and the object is
 * safe to unbind from the GTT or access from the CPU.
 */
static __must_check int
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
			       bool readonly)
{
	struct intel_ring_buffer *ring = obj->ring;
	u32 seqno;
	int ret;

	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
	if (seqno == 0)
		return 0;

	ret = i915_wait_seqno(ring, seqno);
	if (ret)
		return ret;

	i915_gem_retire_requests_ring(ring);

	/* Manually manage the write flush as we may have not yet
	 * retired the buffer.
	 */
	if (obj->last_write_seqno &&
	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
		obj->last_write_seqno = 0;
		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
	}

	return 0;
}

1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
/* A nonblocking variant of the above wait. This is a highly dangerous routine
 * as the object state may change during this call.
 */
static __must_check int
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
					    bool readonly)
{
	struct drm_device *dev = obj->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct intel_ring_buffer *ring = obj->ring;
	u32 seqno;
	int ret;

	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
	BUG_ON(!dev_priv->mm.interruptible);

	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
	if (seqno == 0)
		return 0;

	ret = i915_gem_check_wedge(dev_priv, true);
	if (ret)
		return ret;

	ret = i915_gem_check_olr(ring, seqno);
	if (ret)
		return ret;

	mutex_unlock(&dev->struct_mutex);
	ret = __wait_seqno(ring, seqno, true, NULL);
	mutex_lock(&dev->struct_mutex);

	i915_gem_retire_requests_ring(ring);

	/* Manually manage the write flush as we may have not yet
	 * retired the buffer.
	 */
	if (obj->last_write_seqno &&
	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
		obj->last_write_seqno = 0;
		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
	}

	return ret;
}

1182
/**
1183
1184
 * Called when user space prepares to use an object with the CPU, either
 * through the mmap ioctl's mapping or a GTT mapping.
1185
1186
1187
 */
int
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1188
			  struct drm_file *file)
1189
1190
{
	struct drm_i915_gem_set_domain *args = data;
1191
	struct drm_i915_gem_object *obj;
1192
1193
	uint32_t read_domains = args->read_domains;
	uint32_t write_domain = args->write_domain;
1194
1195
	int ret;

1196
	/* Only handle setting domains to types used by the CPU. */
1197
	if (write_domain & I915_GEM_GPU_DOMAINS)
1198
1199
		return -EINVAL;

1200
	if (read_domains & I915_GEM_GPU_DOMAINS)
1201
1202
1203
1204
1205
1206
1207
1208
		return -EINVAL;

	/* Having something in the write domain implies it's in the read
	 * domain, and only that read domain.  Enforce that in the request.
	 */
	if (write_domain != 0 && read_domains != write_domain)
		return -EINVAL;

1209
	ret = i915_mutex_lock_interruptible(dev);
1210
	if (ret)
1211
		return ret;
1212