1/**************************************************************************
2 *
3 * Copyright � 2007 Red Hat Inc.
4 * Copyright � 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30/*
31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
32 *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 *	    Eric Anholt <eric@anholt.net>
34 *	    Dave Airlie <airlied@linux.ie>
35 */
36
37#include <xf86drm.h>
38#include <xf86atomic.h>
39#include <fcntl.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44#include <assert.h>
45#include <pthread.h>
46#include <sys/ioctl.h>
47#include <sys/stat.h>
48#include <sys/types.h>
49#include <stdbool.h>
50
51#include "errno.h"
52#ifndef ETIME
53#define ETIME ETIMEDOUT
54#endif
55#include "libdrm_macros.h"
56#include "libdrm_lists.h"
57#include "intel_bufmgr.h"
58#include "intel_bufmgr_priv.h"
59#include "intel_chipset.h"
60#include "string.h"
61
62#include "i915_drm.h"
63#include "uthash.h"
64
65#if HAVE_VALGRIND
66#include <valgrind.h>
67#include <memcheck.h>
68#define VG(x) x
69#else
70#define VG(x)
71#endif
72
73#define memclear(s) memset(&s, 0, sizeof(s))
74
75#define DBG(...) do {					\
76	if (bufmgr_gem->bufmgr.debug)			\
77		fprintf(stderr, __VA_ARGS__);		\
78} while (0)
79
80#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
81#define MAX2(A, B) ((A) > (B) ? (A) : (B))
82
83/**
84 * upper_32_bits - return bits 32-63 of a number
85 * @n: the number we're accessing
86 *
87 * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
88 * the "right shift count >= width of type" warning when that quantity is
89 * 32-bits.
90 */
91#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
92
93/**
94 * lower_32_bits - return bits 0-31 of a number
95 * @n: the number we're accessing
96 */
97#define lower_32_bits(n) ((__u32)(n))
98
99typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
100
101struct drm_intel_gem_bo_bucket {
102	drmMMListHead head;
103	unsigned long size;
104};
105
106typedef struct _drm_intel_bufmgr_gem {
107	drm_intel_bufmgr bufmgr;
108
109	atomic_t refcount;
110
111	int fd;
112
113	int max_relocs;
114
115	pthread_mutex_t lock;
116
117	struct drm_i915_gem_exec_object2 *exec2_objects;
118	drm_intel_bo **exec_bos;
119	int exec_size;
120	int exec_count;
121
122	/** Array of lists of cached gem objects of power-of-two sizes */
123	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
124	int num_buckets;
125	time_t time;
126
127	drmMMListHead managers;
128
129	drm_intel_bo_gem *name_table;
130	drm_intel_bo_gem *handle_table;
131
132	drmMMListHead vma_cache;
133	int vma_count, vma_open, vma_max;
134
135	uint64_t gtt_size;
136	int available_fences;
137	int pci_device;
138	int gen;
139	unsigned int has_bsd : 1;
140	unsigned int has_blt : 1;
141	unsigned int has_relaxed_fencing : 1;
142	unsigned int has_llc : 1;
143	unsigned int has_wait_timeout : 1;
144	unsigned int bo_reuse : 1;
145	unsigned int no_exec : 1;
146	unsigned int has_vebox : 1;
147	unsigned int has_exec_async : 1;
148	bool fenced_relocs;
149
150	struct {
151		void *ptr;
152		uint32_t handle;
153	} userptr_active;
154
155} drm_intel_bufmgr_gem;
156
157#define DRM_INTEL_RELOC_FENCE (1<<0)
158
159typedef struct _drm_intel_reloc_target_info {
160	drm_intel_bo *bo;
161	int flags;
162} drm_intel_reloc_target;
163
164struct _drm_intel_bo_gem {
165	drm_intel_bo bo;
166
167	atomic_t refcount;
168	uint32_t gem_handle;
169	const char *name;
170
171	/**
172	 * Kenel-assigned global name for this object
173         *
174         * List contains both flink named and prime fd'd objects
175	 */
176	unsigned int global_name;
177
178	UT_hash_handle handle_hh;
179	UT_hash_handle name_hh;
180
181	/**
182	 * Index of the buffer within the validation list while preparing a
183	 * batchbuffer execution.
184	 */
185	int validate_index;
186
187	/**
188	 * Current tiling mode
189	 */
190	uint32_t tiling_mode;
191	uint32_t swizzle_mode;
192	unsigned long stride;
193
194	unsigned long kflags;
195
196	time_t free_time;
197
198	/** Array passed to the DRM containing relocation information. */
199	struct drm_i915_gem_relocation_entry *relocs;
200	/**
201	 * Array of info structs corresponding to relocs[i].target_handle etc
202	 */
203	drm_intel_reloc_target *reloc_target_info;
204	/** Number of entries in relocs */
205	int reloc_count;
206	/** Array of BOs that are referenced by this buffer and will be softpinned */
207	drm_intel_bo **softpin_target;
208	/** Number softpinned BOs that are referenced by this buffer */
209	int softpin_target_count;
210	/** Maximum amount of softpinned BOs that are referenced by this buffer */
211	int softpin_target_size;
212
213	/** Mapped address for the buffer, saved across map/unmap cycles */
214	void *mem_virtual;
215	/** GTT virtual address for the buffer, saved across map/unmap cycles */
216	void *gtt_virtual;
217	/** WC CPU address for the buffer, saved across map/unmap cycles */
218	void *wc_virtual;
219	/**
220	 * Virtual address of the buffer allocated by user, used for userptr
221	 * objects only.
222	 */
223	void *user_virtual;
224	int map_count;
225	drmMMListHead vma_list;
226
227	/** BO cache list */
228	drmMMListHead head;
229
230	/**
231	 * Boolean of whether this BO and its children have been included in
232	 * the current drm_intel_bufmgr_check_aperture_space() total.
233	 */
234	bool included_in_check_aperture;
235
236	/**
237	 * Boolean of whether this buffer has been used as a relocation
238	 * target and had its size accounted for, and thus can't have any
239	 * further relocations added to it.
240	 */
241	bool used_as_reloc_target;
242
243	/**
244	 * Boolean of whether we have encountered an error whilst building the relocation tree.
245	 */
246	bool has_error;
247
248	/**
249	 * Boolean of whether this buffer can be re-used
250	 */
251	bool reusable;
252
253	/**
254	 * Boolean of whether the GPU is definitely not accessing the buffer.
255	 *
256	 * This is only valid when reusable, since non-reusable
257	 * buffers are those that have been shared with other
258	 * processes, so we don't know their state.
259	 */
260	bool idle;
261
262	/**
263	 * Boolean of whether this buffer was allocated with userptr
264	 */
265	bool is_userptr;
266
267	/**
268	 * Size in bytes of this buffer and its relocation descendents.
269	 *
270	 * Used to avoid costly tree walking in
271	 * drm_intel_bufmgr_check_aperture in the common case.
272	 */
273	int reloc_tree_size;
274
275	/**
276	 * Number of potential fence registers required by this buffer and its
277	 * relocations.
278	 */
279	int reloc_tree_fences;
280
281	/** Flags that we may need to do the SW_FINISH ioctl on unmap. */
282	bool mapped_cpu_write;
283};
284
285static unsigned int
286drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
287
288static unsigned int
289drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
290
291static int
292drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
293			    uint32_t * swizzle_mode);
294
295static int
296drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
297				     uint32_t tiling_mode,
298				     uint32_t stride);
299
300static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
301						      time_t time);
302
303static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
304
305static void drm_intel_gem_bo_free(drm_intel_bo *bo);
306
307static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo)
308{
309        return (drm_intel_bo_gem *)bo;
310}
311
312static unsigned long
313drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
314			   uint32_t *tiling_mode)
315{
316	unsigned long min_size, max_size;
317	unsigned long i;
318
319	if (*tiling_mode == I915_TILING_NONE)
320		return size;
321
322	/* 965+ just need multiples of page size for tiling */
323	if (bufmgr_gem->gen >= 4)
324		return ROUND_UP_TO(size, 4096);
325
326	/* Older chips need powers of two, of at least 512k or 1M */
327	if (bufmgr_gem->gen == 3) {
328		min_size = 1024*1024;
329		max_size = 128*1024*1024;
330	} else {
331		min_size = 512*1024;
332		max_size = 64*1024*1024;
333	}
334
335	if (size > max_size) {
336		*tiling_mode = I915_TILING_NONE;
337		return size;
338	}
339
340	/* Do we need to allocate every page for the fence? */
341	if (bufmgr_gem->has_relaxed_fencing)
342		return ROUND_UP_TO(size, 4096);
343
344	for (i = min_size; i < size; i <<= 1)
345		;
346
347	return i;
348}
349
350/*
351 * Round a given pitch up to the minimum required for X tiling on a
352 * given chip.  We use 512 as the minimum to allow for a later tiling
353 * change.
354 */
355static unsigned long
356drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
357			    unsigned long pitch, uint32_t *tiling_mode)
358{
359	unsigned long tile_width;
360	unsigned long i;
361
362	/* If untiled, then just align it so that we can do rendering
363	 * to it with the 3D engine.
364	 */
365	if (*tiling_mode == I915_TILING_NONE)
366		return ALIGN(pitch, 64);
367
368	if (*tiling_mode == I915_TILING_X
369			|| (IS_915(bufmgr_gem->pci_device)
370			    && *tiling_mode == I915_TILING_Y))
371		tile_width = 512;
372	else
373		tile_width = 128;
374
375	/* 965 is flexible */
376	if (bufmgr_gem->gen >= 4)
377		return ROUND_UP_TO(pitch, tile_width);
378
379	/* The older hardware has a maximum pitch of 8192 with tiled
380	 * surfaces, so fallback to untiled if it's too large.
381	 */
382	if (pitch > 8192) {
383		*tiling_mode = I915_TILING_NONE;
384		return ALIGN(pitch, 64);
385	}
386
387	/* Pre-965 needs power of two tile width */
388	for (i = tile_width; i < pitch; i <<= 1)
389		;
390
391	return i;
392}
393
394static struct drm_intel_gem_bo_bucket *
395drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
396				 unsigned long size)
397{
398	int i;
399
400	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
401		struct drm_intel_gem_bo_bucket *bucket =
402		    &bufmgr_gem->cache_bucket[i];
403		if (bucket->size >= size) {
404			return bucket;
405		}
406	}
407
408	return NULL;
409}
410
411static void
412drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
413{
414	int i, j;
415
416	for (i = 0; i < bufmgr_gem->exec_count; i++) {
417		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
418		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
419
420		if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
421			DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
422			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
423			    bo_gem->name);
424			continue;
425		}
426
427		for (j = 0; j < bo_gem->reloc_count; j++) {
428			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
429			drm_intel_bo_gem *target_gem =
430			    (drm_intel_bo_gem *) target_bo;
431
432			DBG("%2d: %d %s(%s)@0x%08x %08x -> "
433			    "%d (%s)@0x%08x %08x + 0x%08x\n",
434			    i,
435			    bo_gem->gem_handle,
436			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
437			    bo_gem->name,
438			    upper_32_bits(bo_gem->relocs[j].offset),
439			    lower_32_bits(bo_gem->relocs[j].offset),
440			    target_gem->gem_handle,
441			    target_gem->name,
442			    upper_32_bits(target_bo->offset64),
443			    lower_32_bits(target_bo->offset64),
444			    bo_gem->relocs[j].delta);
445		}
446
447		for (j = 0; j < bo_gem->softpin_target_count; j++) {
448			drm_intel_bo *target_bo = bo_gem->softpin_target[j];
449			drm_intel_bo_gem *target_gem =
450			    (drm_intel_bo_gem *) target_bo;
451			DBG("%2d: %d %s(%s) -> "
452			    "%d *(%s)@0x%08x %08x\n",
453			    i,
454			    bo_gem->gem_handle,
455			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
456			    bo_gem->name,
457			    target_gem->gem_handle,
458			    target_gem->name,
459			    upper_32_bits(target_bo->offset64),
460			    lower_32_bits(target_bo->offset64));
461		}
462	}
463}
464
465static inline void
466drm_intel_gem_bo_reference(drm_intel_bo *bo)
467{
468	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
469
470	atomic_inc(&bo_gem->refcount);
471}
472
473/**
474 * Adds the given buffer to the list of buffers to be validated (moved into the
475 * appropriate memory type) with the next batch submission.
476 *
477 * If a buffer is validated multiple times in a batch submission, it ends up
478 * with the intersection of the memory type flags and the union of the
479 * access flags.
480 */
481static void
482drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
483{
484	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
485	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
486	int index;
487	unsigned long flags;
488
489	flags = 0;
490	if (need_fence)
491		flags |= EXEC_OBJECT_NEEDS_FENCE;
492
493	if (bo_gem->validate_index != -1) {
494		bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
495		return;
496	}
497
498	/* Extend the array of validation entries as necessary. */
499	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
500		int new_size = bufmgr_gem->exec_size * 2;
501
502		if (new_size == 0)
503			new_size = 5;
504
505		bufmgr_gem->exec2_objects =
506			realloc(bufmgr_gem->exec2_objects,
507				sizeof(*bufmgr_gem->exec2_objects) * new_size);
508		bufmgr_gem->exec_bos =
509			realloc(bufmgr_gem->exec_bos,
510				sizeof(*bufmgr_gem->exec_bos) * new_size);
511		bufmgr_gem->exec_size = new_size;
512	}
513
514	index = bufmgr_gem->exec_count;
515	bo_gem->validate_index = index;
516	/* Fill in array entry */
517	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
518	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
519	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
520	bufmgr_gem->exec2_objects[index].alignment = bo->align;
521	bufmgr_gem->exec2_objects[index].offset = bo->offset64;
522	bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags;
523	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
524	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
525	bufmgr_gem->exec_bos[index] = bo;
526	bufmgr_gem->exec_count++;
527}
528
529#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
530	sizeof(uint32_t))
531
532static void
533drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
534				      drm_intel_bo_gem *bo_gem,
535				      unsigned int alignment)
536{
537	unsigned int size;
538
539	assert(!bo_gem->used_as_reloc_target);
540
541	/* The older chipsets are far-less flexible in terms of tiling,
542	 * and require tiled buffer to be size aligned in the aperture.
543	 * This means that in the worst possible case we will need a hole
544	 * twice as large as the object in order for it to fit into the
545	 * aperture. Optimal packing is for wimps.
546	 */
547	size = bo_gem->bo.size;
548	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
549		unsigned int min_size;
550
551		if (bufmgr_gem->has_relaxed_fencing) {
552			if (bufmgr_gem->gen == 3)
553				min_size = 1024*1024;
554			else
555				min_size = 512*1024;
556
557			while (min_size < size)
558				min_size *= 2;
559		} else
560			min_size = size;
561
562		/* Account for worst-case alignment. */
563		alignment = MAX2(alignment, min_size);
564	}
565
566	bo_gem->reloc_tree_size = size + alignment;
567}
568
569static int
570drm_intel_setup_reloc_list(drm_intel_bo *bo)
571{
572	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
573	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
574	unsigned int max_relocs = bufmgr_gem->max_relocs;
575
576	if (bo->size / 4 < max_relocs)
577		max_relocs = bo->size / 4;
578
579	bo_gem->relocs = malloc(max_relocs *
580				sizeof(struct drm_i915_gem_relocation_entry));
581	bo_gem->reloc_target_info = malloc(max_relocs *
582					   sizeof(drm_intel_reloc_target));
583	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
584		bo_gem->has_error = true;
585
586		free (bo_gem->relocs);
587		bo_gem->relocs = NULL;
588
589		free (bo_gem->reloc_target_info);
590		bo_gem->reloc_target_info = NULL;
591
592		return 1;
593	}
594
595	return 0;
596}
597
598static int
599drm_intel_gem_bo_busy(drm_intel_bo *bo)
600{
601	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
602	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
603	struct drm_i915_gem_busy busy;
604	int ret;
605
606	if (bo_gem->reusable && bo_gem->idle)
607		return false;
608
609	memclear(busy);
610	busy.handle = bo_gem->gem_handle;
611
612	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
613	if (ret == 0) {
614		bo_gem->idle = !busy.busy;
615		return busy.busy;
616	} else {
617		return false;
618	}
619}
620
621static int
622drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
623				  drm_intel_bo_gem *bo_gem, int state)
624{
625	struct drm_i915_gem_madvise madv;
626
627	memclear(madv);
628	madv.handle = bo_gem->gem_handle;
629	madv.madv = state;
630	madv.retained = 1;
631	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
632
633	return madv.retained;
634}
635
636static int
637drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
638{
639	return drm_intel_gem_bo_madvise_internal
640		((drm_intel_bufmgr_gem *) bo->bufmgr,
641		 (drm_intel_bo_gem *) bo,
642		 madv);
643}
644
645/* drop the oldest entries that have been purged by the kernel */
646static void
647drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
648				    struct drm_intel_gem_bo_bucket *bucket)
649{
650	while (!DRMLISTEMPTY(&bucket->head)) {
651		drm_intel_bo_gem *bo_gem;
652
653		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
654				      bucket->head.next, head);
655		if (drm_intel_gem_bo_madvise_internal
656		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
657			break;
658
659		DRMLISTDEL(&bo_gem->head);
660		drm_intel_gem_bo_free(&bo_gem->bo);
661	}
662}
663
664static drm_intel_bo *
665drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
666				const char *name,
667				unsigned long size,
668				unsigned long flags,
669				uint32_t tiling_mode,
670				unsigned long stride,
671				unsigned int alignment)
672{
673	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
674	drm_intel_bo_gem *bo_gem;
675	unsigned int page_size = getpagesize();
676	int ret;
677	struct drm_intel_gem_bo_bucket *bucket;
678	bool alloc_from_cache;
679	unsigned long bo_size;
680	bool for_render = false;
681
682	if (flags & BO_ALLOC_FOR_RENDER)
683		for_render = true;
684
685	/* Round the allocated size up to a power of two number of pages. */
686	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
687
688	/* If we don't have caching at this size, don't actually round the
689	 * allocation up.
690	 */
691	if (bucket == NULL) {
692		bo_size = size;
693		if (bo_size < page_size)
694			bo_size = page_size;
695	} else {
696		bo_size = bucket->size;
697	}
698
699	pthread_mutex_lock(&bufmgr_gem->lock);
700	/* Get a buffer out of the cache if available */
701retry:
702	alloc_from_cache = false;
703	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
704		if (for_render) {
705			/* Allocate new render-target BOs from the tail (MRU)
706			 * of the list, as it will likely be hot in the GPU
707			 * cache and in the aperture for us.
708			 */
709			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
710					      bucket->head.prev, head);
711			DRMLISTDEL(&bo_gem->head);
712			alloc_from_cache = true;
713			bo_gem->bo.align = alignment;
714		} else {
715			assert(alignment == 0);
716			/* For non-render-target BOs (where we're probably
717			 * going to map it first thing in order to fill it
718			 * with data), check if the last BO in the cache is
719			 * unbusy, and only reuse in that case. Otherwise,
720			 * allocating a new buffer is probably faster than
721			 * waiting for the GPU to finish.
722			 */
723			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
724					      bucket->head.next, head);
725			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
726				alloc_from_cache = true;
727				DRMLISTDEL(&bo_gem->head);
728			}
729		}
730
731		if (alloc_from_cache) {
732			if (!drm_intel_gem_bo_madvise_internal
733			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
734				drm_intel_gem_bo_free(&bo_gem->bo);
735				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
736								    bucket);
737				goto retry;
738			}
739
740			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
741								 tiling_mode,
742								 stride)) {
743				drm_intel_gem_bo_free(&bo_gem->bo);
744				goto retry;
745			}
746		}
747	}
748
749	if (!alloc_from_cache) {
750		struct drm_i915_gem_create create;
751
752		bo_gem = calloc(1, sizeof(*bo_gem));
753		if (!bo_gem)
754			goto err;
755
756		/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
757		   list (vma_list), so better set the list head here */
758		DRMINITLISTHEAD(&bo_gem->vma_list);
759
760		bo_gem->bo.size = bo_size;
761
762		memclear(create);
763		create.size = bo_size;
764
765		ret = drmIoctl(bufmgr_gem->fd,
766			       DRM_IOCTL_I915_GEM_CREATE,
767			       &create);
768		if (ret != 0) {
769			free(bo_gem);
770			goto err;
771		}
772
773		bo_gem->gem_handle = create.handle;
774		HASH_ADD(handle_hh, bufmgr_gem->handle_table,
775			 gem_handle, sizeof(bo_gem->gem_handle),
776			 bo_gem);
777
778		bo_gem->bo.handle = bo_gem->gem_handle;
779		bo_gem->bo.bufmgr = bufmgr;
780		bo_gem->bo.align = alignment;
781
782		bo_gem->tiling_mode = I915_TILING_NONE;
783		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
784		bo_gem->stride = 0;
785
786		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
787							 tiling_mode,
788							 stride))
789			goto err_free;
790	}
791
792	bo_gem->name = name;
793	atomic_set(&bo_gem->refcount, 1);
794	bo_gem->validate_index = -1;
795	bo_gem->reloc_tree_fences = 0;
796	bo_gem->used_as_reloc_target = false;
797	bo_gem->has_error = false;
798	bo_gem->reusable = true;
799
800	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
801	pthread_mutex_unlock(&bufmgr_gem->lock);
802
803	DBG("bo_create: buf %d (%s) %ldb\n",
804	    bo_gem->gem_handle, bo_gem->name, size);
805
806	return &bo_gem->bo;
807
808err_free:
809	drm_intel_gem_bo_free(&bo_gem->bo);
810err:
811	pthread_mutex_unlock(&bufmgr_gem->lock);
812	return NULL;
813}
814
815static drm_intel_bo *
816drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
817				  const char *name,
818				  unsigned long size,
819				  unsigned int alignment)
820{
821	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
822					       BO_ALLOC_FOR_RENDER,
823					       I915_TILING_NONE, 0,
824					       alignment);
825}
826
827static drm_intel_bo *
828drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
829		       const char *name,
830		       unsigned long size,
831		       unsigned int alignment)
832{
833	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
834					       I915_TILING_NONE, 0, 0);
835}
836
837static drm_intel_bo *
838drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
839			     int x, int y, int cpp, uint32_t *tiling_mode,
840			     unsigned long *pitch, unsigned long flags)
841{
842	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
843	unsigned long size, stride;
844	uint32_t tiling;
845
846	do {
847		unsigned long aligned_y, height_alignment;
848
849		tiling = *tiling_mode;
850
851		/* If we're tiled, our allocations are in 8 or 32-row blocks,
852		 * so failure to align our height means that we won't allocate
853		 * enough pages.
854		 *
855		 * If we're untiled, we still have to align to 2 rows high
856		 * because the data port accesses 2x2 blocks even if the
857		 * bottom row isn't to be rendered, so failure to align means
858		 * we could walk off the end of the GTT and fault.  This is
859		 * documented on 965, and may be the case on older chipsets
860		 * too so we try to be careful.
861		 */
862		aligned_y = y;
863		height_alignment = 2;
864
865		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
866			height_alignment = 16;
867		else if (tiling == I915_TILING_X
868			|| (IS_915(bufmgr_gem->pci_device)
869			    && tiling == I915_TILING_Y))
870			height_alignment = 8;
871		else if (tiling == I915_TILING_Y)
872			height_alignment = 32;
873		aligned_y = ALIGN(y, height_alignment);
874
875		stride = x * cpp;
876		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
877		size = stride * aligned_y;
878		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
879	} while (*tiling_mode != tiling);
880	*pitch = stride;
881
882	if (tiling == I915_TILING_NONE)
883		stride = 0;
884
885	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
886					       tiling, stride, 0);
887}
888
889static drm_intel_bo *
890drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
891				const char *name,
892				void *addr,
893				uint32_t tiling_mode,
894				uint32_t stride,
895				unsigned long size,
896				unsigned long flags)
897{
898	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
899	drm_intel_bo_gem *bo_gem;
900	int ret;
901	struct drm_i915_gem_userptr userptr;
902
903	/* Tiling with userptr surfaces is not supported
904	 * on all hardware so refuse it for time being.
905	 */
906	if (tiling_mode != I915_TILING_NONE)
907		return NULL;
908
909	bo_gem = calloc(1, sizeof(*bo_gem));
910	if (!bo_gem)
911		return NULL;
912
913	atomic_set(&bo_gem->refcount, 1);
914	DRMINITLISTHEAD(&bo_gem->vma_list);
915
916	bo_gem->bo.size = size;
917
918	memclear(userptr);
919	userptr.user_ptr = (__u64)((unsigned long)addr);
920	userptr.user_size = size;
921	userptr.flags = flags;
922
923	ret = drmIoctl(bufmgr_gem->fd,
924			DRM_IOCTL_I915_GEM_USERPTR,
925			&userptr);
926	if (ret != 0) {
927		DBG("bo_create_userptr: "
928		    "ioctl failed with user ptr %p size 0x%lx, "
929		    "user flags 0x%lx\n", addr, size, flags);
930		free(bo_gem);
931		return NULL;
932	}
933
934	pthread_mutex_lock(&bufmgr_gem->lock);
935
936	bo_gem->gem_handle = userptr.handle;
937	bo_gem->bo.handle = bo_gem->gem_handle;
938	bo_gem->bo.bufmgr    = bufmgr;
939	bo_gem->is_userptr   = true;
940	bo_gem->bo.virtual   = addr;
941	/* Save the address provided by user */
942	bo_gem->user_virtual = addr;
943	bo_gem->tiling_mode  = I915_TILING_NONE;
944	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
945	bo_gem->stride       = 0;
946
947	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
948		 gem_handle, sizeof(bo_gem->gem_handle),
949		 bo_gem);
950
951	bo_gem->name = name;
952	bo_gem->validate_index = -1;
953	bo_gem->reloc_tree_fences = 0;
954	bo_gem->used_as_reloc_target = false;
955	bo_gem->has_error = false;
956	bo_gem->reusable = false;
957
958	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
959	pthread_mutex_unlock(&bufmgr_gem->lock);
960
961	DBG("bo_create_userptr: "
962	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
963		addr, bo_gem->gem_handle, bo_gem->name,
964		size, stride, tiling_mode);
965
966	return &bo_gem->bo;
967}
968
969static bool
970has_userptr(drm_intel_bufmgr_gem *bufmgr_gem)
971{
972	int ret;
973	void *ptr;
974	long pgsz;
975	struct drm_i915_gem_userptr userptr;
976
977	pgsz = sysconf(_SC_PAGESIZE);
978	assert(pgsz > 0);
979
980	ret = posix_memalign(&ptr, pgsz, pgsz);
981	if (ret) {
982		DBG("Failed to get a page (%ld) for userptr detection!\n",
983			pgsz);
984		return false;
985	}
986
987	memclear(userptr);
988	userptr.user_ptr = (__u64)(unsigned long)ptr;
989	userptr.user_size = pgsz;
990
991retry:
992	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
993	if (ret) {
994		if (errno == ENODEV && userptr.flags == 0) {
995			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
996			goto retry;
997		}
998		free(ptr);
999		return false;
1000	}
1001
1002	/* We don't release the userptr bo here as we want to keep the
1003	 * kernel mm tracking alive for our lifetime. The first time we
1004	 * create a userptr object the kernel has to install a mmu_notifer
1005	 * which is a heavyweight operation (e.g. it requires taking all
1006	 * mm_locks and stop_machine()).
1007	 */
1008
1009	bufmgr_gem->userptr_active.ptr = ptr;
1010	bufmgr_gem->userptr_active.handle = userptr.handle;
1011
1012	return true;
1013}
1014
1015static drm_intel_bo *
1016check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
1017		       const char *name,
1018		       void *addr,
1019		       uint32_t tiling_mode,
1020		       uint32_t stride,
1021		       unsigned long size,
1022		       unsigned long flags)
1023{
1024	if (has_userptr((drm_intel_bufmgr_gem *)bufmgr))
1025		bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr;
1026	else
1027		bufmgr->bo_alloc_userptr = NULL;
1028
1029	return drm_intel_bo_alloc_userptr(bufmgr, name, addr,
1030					  tiling_mode, stride, size, flags);
1031}
1032
1033static int get_tiling_mode(drm_intel_bufmgr_gem *bufmgr_gem,
1034			   uint32_t gem_handle,
1035			   uint32_t *tiling_mode,
1036			   uint32_t *swizzle_mode)
1037{
1038	struct drm_i915_gem_get_tiling get_tiling = {
1039		.handle = gem_handle,
1040	};
1041	int ret;
1042
1043	ret = drmIoctl(bufmgr_gem->fd,
1044		       DRM_IOCTL_I915_GEM_GET_TILING,
1045		       &get_tiling);
1046	if (ret != 0 && errno != EOPNOTSUPP)
1047		return ret;
1048
1049	*tiling_mode = get_tiling.tiling_mode;
1050	*swizzle_mode = get_tiling.swizzle_mode;
1051
1052	return 0;
1053}
1054
1055/**
1056 * Returns a drm_intel_bo wrapping the given buffer object handle.
1057 *
1058 * This can be used when one application needs to pass a buffer object
1059 * to another.
1060 */
1061drm_public drm_intel_bo *
1062drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
1063				  const char *name,
1064				  unsigned int handle)
1065{
1066	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1067	drm_intel_bo_gem *bo_gem;
1068	int ret;
1069	struct drm_gem_open open_arg;
1070
1071	/* At the moment most applications only have a few named bo.
1072	 * For instance, in a DRI client only the render buffers passed
1073	 * between X and the client are named. And since X returns the
1074	 * alternating names for the front/back buffer a linear search
1075	 * provides a sufficiently fast match.
1076	 */
1077	pthread_mutex_lock(&bufmgr_gem->lock);
1078	HASH_FIND(name_hh, bufmgr_gem->name_table,
1079		  &handle, sizeof(handle), bo_gem);
1080	if (bo_gem) {
1081		drm_intel_gem_bo_reference(&bo_gem->bo);
1082		goto out;
1083	}
1084
1085	memclear(open_arg);
1086	open_arg.name = handle;
1087	ret = drmIoctl(bufmgr_gem->fd,
1088		       DRM_IOCTL_GEM_OPEN,
1089		       &open_arg);
1090	if (ret != 0) {
1091		DBG("Couldn't reference %s handle 0x%08x: %s\n",
1092		    name, handle, strerror(errno));
1093		bo_gem = NULL;
1094		goto out;
1095	}
1096        /* Now see if someone has used a prime handle to get this
1097         * object from the kernel before by looking through the list
1098         * again for a matching gem_handle
1099         */
1100	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
1101		  &open_arg.handle, sizeof(open_arg.handle), bo_gem);
1102	if (bo_gem) {
1103		drm_intel_gem_bo_reference(&bo_gem->bo);
1104		goto out;
1105	}
1106
1107	bo_gem = calloc(1, sizeof(*bo_gem));
1108	if (!bo_gem)
1109		goto out;
1110
1111	atomic_set(&bo_gem->refcount, 1);
1112	DRMINITLISTHEAD(&bo_gem->vma_list);
1113
1114	bo_gem->bo.size = open_arg.size;
1115	bo_gem->bo.offset = 0;
1116	bo_gem->bo.offset64 = 0;
1117	bo_gem->bo.virtual = NULL;
1118	bo_gem->bo.bufmgr = bufmgr;
1119	bo_gem->name = name;
1120	bo_gem->validate_index = -1;
1121	bo_gem->gem_handle = open_arg.handle;
1122	bo_gem->bo.handle = open_arg.handle;
1123	bo_gem->global_name = handle;
1124	bo_gem->reusable = false;
1125
1126	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
1127		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
1128	HASH_ADD(name_hh, bufmgr_gem->name_table,
1129		 global_name, sizeof(bo_gem->global_name), bo_gem);
1130
1131	ret = get_tiling_mode(bufmgr_gem, bo_gem->gem_handle,
1132			      &bo_gem->tiling_mode, &bo_gem->swizzle_mode);
1133	if (ret != 0)
1134		goto err_unref;
1135
1136	/* XXX stride is unknown */
1137	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
1138	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
1139
1140out:
1141	pthread_mutex_unlock(&bufmgr_gem->lock);
1142	return &bo_gem->bo;
1143
1144err_unref:
1145	drm_intel_gem_bo_free(&bo_gem->bo);
1146	pthread_mutex_unlock(&bufmgr_gem->lock);
1147	return NULL;
1148}
1149
1150static void
1151drm_intel_gem_bo_free(drm_intel_bo *bo)
1152{
1153	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1154	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1155	int ret;
1156
1157	DRMLISTDEL(&bo_gem->vma_list);
1158	if (bo_gem->mem_virtual) {
1159		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1160		drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1161		bufmgr_gem->vma_count--;
1162	}
1163	if (bo_gem->wc_virtual) {
1164		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
1165		drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1166		bufmgr_gem->vma_count--;
1167	}
1168	if (bo_gem->gtt_virtual) {
1169		drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1170		bufmgr_gem->vma_count--;
1171	}
1172
1173	if (bo_gem->global_name)
1174		HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem);
1175	HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem);
1176
1177	/* Close this object */
1178	ret = drmCloseBufferHandle(bufmgr_gem->fd, bo_gem->gem_handle);
1179	if (ret != 0) {
1180		DBG("drmCloseBufferHandle %d failed (%s): %s\n",
1181		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
1182	}
1183	free(bo);
1184}
1185
1186static void
1187drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
1188{
1189#if HAVE_VALGRIND
1190	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1191
1192	if (bo_gem->mem_virtual)
1193		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
1194
1195	if (bo_gem->wc_virtual)
1196		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
1197
1198	if (bo_gem->gtt_virtual)
1199		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
1200#endif
1201}
1202
1203/** Frees all cached buffers significantly older than @time. */
1204static void
1205drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
1206{
1207	int i;
1208
1209	if (bufmgr_gem->time == time)
1210		return;
1211
1212	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1213		struct drm_intel_gem_bo_bucket *bucket =
1214		    &bufmgr_gem->cache_bucket[i];
1215
1216		while (!DRMLISTEMPTY(&bucket->head)) {
1217			drm_intel_bo_gem *bo_gem;
1218
1219			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1220					      bucket->head.next, head);
1221			if (time - bo_gem->free_time <= 1)
1222				break;
1223
1224			DRMLISTDEL(&bo_gem->head);
1225
1226			drm_intel_gem_bo_free(&bo_gem->bo);
1227		}
1228	}
1229
1230	bufmgr_gem->time = time;
1231}
1232
1233static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
1234{
1235	int limit;
1236
1237	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1238	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
1239
1240	if (bufmgr_gem->vma_max < 0)
1241		return;
1242
1243	/* We may need to evict a few entries in order to create new mmaps */
1244	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
1245	if (limit < 0)
1246		limit = 0;
1247
1248	while (bufmgr_gem->vma_count > limit) {
1249		drm_intel_bo_gem *bo_gem;
1250
1251		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1252				      bufmgr_gem->vma_cache.next,
1253				      vma_list);
1254		assert(bo_gem->map_count == 0);
1255		DRMLISTDELINIT(&bo_gem->vma_list);
1256
1257		if (bo_gem->mem_virtual) {
1258			drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1259			bo_gem->mem_virtual = NULL;
1260			bufmgr_gem->vma_count--;
1261		}
1262		if (bo_gem->wc_virtual) {
1263			drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1264			bo_gem->wc_virtual = NULL;
1265			bufmgr_gem->vma_count--;
1266		}
1267		if (bo_gem->gtt_virtual) {
1268			drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1269			bo_gem->gtt_virtual = NULL;
1270			bufmgr_gem->vma_count--;
1271		}
1272	}
1273}
1274
1275static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1276				       drm_intel_bo_gem *bo_gem)
1277{
1278	bufmgr_gem->vma_open--;
1279	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
1280	if (bo_gem->mem_virtual)
1281		bufmgr_gem->vma_count++;
1282	if (bo_gem->wc_virtual)
1283		bufmgr_gem->vma_count++;
1284	if (bo_gem->gtt_virtual)
1285		bufmgr_gem->vma_count++;
1286	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1287}
1288
1289static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1290				      drm_intel_bo_gem *bo_gem)
1291{
1292	bufmgr_gem->vma_open++;
1293	DRMLISTDEL(&bo_gem->vma_list);
1294	if (bo_gem->mem_virtual)
1295		bufmgr_gem->vma_count--;
1296	if (bo_gem->wc_virtual)
1297		bufmgr_gem->vma_count--;
1298	if (bo_gem->gtt_virtual)
1299		bufmgr_gem->vma_count--;
1300	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1301}
1302
1303static void
1304drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
1305{
1306	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1307	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1308	struct drm_intel_gem_bo_bucket *bucket;
1309	int i;
1310
1311	/* Unreference all the target buffers */
1312	for (i = 0; i < bo_gem->reloc_count; i++) {
1313		if (bo_gem->reloc_target_info[i].bo != bo) {
1314			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1315								  reloc_target_info[i].bo,
1316								  time);
1317		}
1318	}
1319	for (i = 0; i < bo_gem->softpin_target_count; i++)
1320		drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
1321								  time);
1322	bo_gem->kflags = 0;
1323	bo_gem->reloc_count = 0;
1324	bo_gem->used_as_reloc_target = false;
1325	bo_gem->softpin_target_count = 0;
1326
1327	DBG("bo_unreference final: %d (%s)\n",
1328	    bo_gem->gem_handle, bo_gem->name);
1329
1330	/* release memory associated with this object */
1331	if (bo_gem->reloc_target_info) {
1332		free(bo_gem->reloc_target_info);
1333		bo_gem->reloc_target_info = NULL;
1334	}
1335	if (bo_gem->relocs) {
1336		free(bo_gem->relocs);
1337		bo_gem->relocs = NULL;
1338	}
1339	if (bo_gem->softpin_target) {
1340		free(bo_gem->softpin_target);
1341		bo_gem->softpin_target = NULL;
1342		bo_gem->softpin_target_size = 0;
1343	}
1344
1345	/* Clear any left-over mappings */
1346	if (bo_gem->map_count) {
1347		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1348		bo_gem->map_count = 0;
1349		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1350		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1351	}
1352
1353	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
1354	/* Put the buffer into our internal cache for reuse if we can. */
1355	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
1356	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
1357					      I915_MADV_DONTNEED)) {
1358		bo_gem->free_time = time;
1359
1360		bo_gem->name = NULL;
1361		bo_gem->validate_index = -1;
1362
1363		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
1364	} else {
1365		drm_intel_gem_bo_free(bo);
1366	}
1367}
1368
1369static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
1370						      time_t time)
1371{
1372	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1373
1374	assert(atomic_read(&bo_gem->refcount) > 0);
1375	if (atomic_dec_and_test(&bo_gem->refcount))
1376		drm_intel_gem_bo_unreference_final(bo, time);
1377}
1378
1379static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
1380{
1381	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1382
1383	assert(atomic_read(&bo_gem->refcount) > 0);
1384
1385	if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1386		drm_intel_bufmgr_gem *bufmgr_gem =
1387		    (drm_intel_bufmgr_gem *) bo->bufmgr;
1388		struct timespec time;
1389
1390		clock_gettime(CLOCK_MONOTONIC, &time);
1391
1392		pthread_mutex_lock(&bufmgr_gem->lock);
1393
1394		if (atomic_dec_and_test(&bo_gem->refcount)) {
1395			drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
1396			drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1397		}
1398
1399		pthread_mutex_unlock(&bufmgr_gem->lock);
1400	}
1401}
1402
1403static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
1404{
1405	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1406	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1407	struct drm_i915_gem_set_domain set_domain;
1408	int ret;
1409
1410	if (bo_gem->is_userptr) {
1411		/* Return the same user ptr */
1412		bo->virtual = bo_gem->user_virtual;
1413		return 0;
1414	}
1415
1416	pthread_mutex_lock(&bufmgr_gem->lock);
1417
1418	if (bo_gem->map_count++ == 0)
1419		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1420
1421	if (!bo_gem->mem_virtual) {
1422		struct drm_i915_gem_mmap mmap_arg;
1423
1424		DBG("bo_map: %d (%s), map_count=%d\n",
1425		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1426
1427		memclear(mmap_arg);
1428		mmap_arg.handle = bo_gem->gem_handle;
1429		mmap_arg.size = bo->size;
1430		ret = drmIoctl(bufmgr_gem->fd,
1431			       DRM_IOCTL_I915_GEM_MMAP,
1432			       &mmap_arg);
1433		if (ret != 0) {
1434			ret = -errno;
1435			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1436			    __FILE__, __LINE__, bo_gem->gem_handle,
1437			    bo_gem->name, strerror(errno));
1438			if (--bo_gem->map_count == 0)
1439				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1440			pthread_mutex_unlock(&bufmgr_gem->lock);
1441			return ret;
1442		}
1443		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1444		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1445	}
1446	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1447	    bo_gem->mem_virtual);
1448	bo->virtual = bo_gem->mem_virtual;
1449
1450	memclear(set_domain);
1451	set_domain.handle = bo_gem->gem_handle;
1452	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1453	if (write_enable)
1454		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1455	else
1456		set_domain.write_domain = 0;
1457	ret = drmIoctl(bufmgr_gem->fd,
1458		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1459		       &set_domain);
1460	if (ret != 0) {
1461		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1462		    __FILE__, __LINE__, bo_gem->gem_handle,
1463		    strerror(errno));
1464	}
1465
1466	if (write_enable)
1467		bo_gem->mapped_cpu_write = true;
1468
1469	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1470	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1471	pthread_mutex_unlock(&bufmgr_gem->lock);
1472
1473	return 0;
1474}
1475
1476static int
1477map_gtt(drm_intel_bo *bo)
1478{
1479	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1480	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1481	int ret;
1482
1483	if (bo_gem->is_userptr)
1484		return -EINVAL;
1485
1486	if (bo_gem->map_count++ == 0)
1487		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1488
1489	/* Get a mapping of the buffer if we haven't before. */
1490	if (bo_gem->gtt_virtual == NULL) {
1491		struct drm_i915_gem_mmap_gtt mmap_arg;
1492
1493		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1494		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1495
1496		memclear(mmap_arg);
1497		mmap_arg.handle = bo_gem->gem_handle;
1498
1499		/* Get the fake offset back... */
1500		ret = drmIoctl(bufmgr_gem->fd,
1501			       DRM_IOCTL_I915_GEM_MMAP_GTT,
1502			       &mmap_arg);
1503		if (ret != 0) {
1504			ret = -errno;
1505			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1506			    __FILE__, __LINE__,
1507			    bo_gem->gem_handle, bo_gem->name,
1508			    strerror(errno));
1509			if (--bo_gem->map_count == 0)
1510				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1511			return ret;
1512		}
1513
1514		/* and mmap it */
1515		bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1516					       MAP_SHARED, bufmgr_gem->fd,
1517					       mmap_arg.offset);
1518		if (bo_gem->gtt_virtual == MAP_FAILED) {
1519			bo_gem->gtt_virtual = NULL;
1520			ret = -errno;
1521			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1522			    __FILE__, __LINE__,
1523			    bo_gem->gem_handle, bo_gem->name,
1524			    strerror(errno));
1525			if (--bo_gem->map_count == 0)
1526				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1527			return ret;
1528		}
1529	}
1530
1531	bo->virtual = bo_gem->gtt_virtual;
1532
1533	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1534	    bo_gem->gtt_virtual);
1535
1536	return 0;
1537}
1538
1539drm_public int
1540drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
1541{
1542	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1543	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1544	struct drm_i915_gem_set_domain set_domain;
1545	int ret;
1546
1547	pthread_mutex_lock(&bufmgr_gem->lock);
1548
1549	ret = map_gtt(bo);
1550	if (ret) {
1551		pthread_mutex_unlock(&bufmgr_gem->lock);
1552		return ret;
1553	}
1554
1555	/* Now move it to the GTT domain so that the GPU and CPU
1556	 * caches are flushed and the GPU isn't actively using the
1557	 * buffer.
1558	 *
1559	 * The pagefault handler does this domain change for us when
1560	 * it has unbound the BO from the GTT, but it's up to us to
1561	 * tell it when we're about to use things if we had done
1562	 * rendering and it still happens to be bound to the GTT.
1563	 */
1564	memclear(set_domain);
1565	set_domain.handle = bo_gem->gem_handle;
1566	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1567	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1568	ret = drmIoctl(bufmgr_gem->fd,
1569		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1570		       &set_domain);
1571	if (ret != 0) {
1572		DBG("%s:%d: Error setting domain %d: %s\n",
1573		    __FILE__, __LINE__, bo_gem->gem_handle,
1574		    strerror(errno));
1575	}
1576
1577	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1578	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1579	pthread_mutex_unlock(&bufmgr_gem->lock);
1580
1581	return 0;
1582}
1583
1584/**
1585 * Performs a mapping of the buffer object like the normal GTT
1586 * mapping, but avoids waiting for the GPU to be done reading from or
1587 * rendering to the buffer.
1588 *
1589 * This is used in the implementation of GL_ARB_map_buffer_range: The
1590 * user asks to create a buffer, then does a mapping, fills some
1591 * space, runs a drawing command, then asks to map it again without
1592 * synchronizing because it guarantees that it won't write over the
1593 * data that the GPU is busy using (or, more specifically, that if it
1594 * does write over the data, it acknowledges that rendering is
1595 * undefined).
1596 */
1597
1598drm_public int
1599drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
1600{
1601	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1602#if HAVE_VALGRIND
1603	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1604#endif
1605	int ret;
1606
1607	/* If the CPU cache isn't coherent with the GTT, then use a
1608	 * regular synchronized mapping.  The problem is that we don't
1609	 * track where the buffer was last used on the CPU side in
1610	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
1611	 * we would potentially corrupt the buffer even when the user
1612	 * does reasonable things.
1613	 */
1614	if (!bufmgr_gem->has_llc)
1615		return drm_intel_gem_bo_map_gtt(bo);
1616
1617	pthread_mutex_lock(&bufmgr_gem->lock);
1618
1619	ret = map_gtt(bo);
1620	if (ret == 0) {
1621		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1622		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1623	}
1624
1625	pthread_mutex_unlock(&bufmgr_gem->lock);
1626
1627	return ret;
1628}
1629
1630static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
1631{
1632	drm_intel_bufmgr_gem *bufmgr_gem;
1633	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1634	int ret = 0;
1635
1636	if (bo == NULL)
1637		return 0;
1638
1639	if (bo_gem->is_userptr)
1640		return 0;
1641
1642	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1643
1644	pthread_mutex_lock(&bufmgr_gem->lock);
1645
1646	if (bo_gem->map_count <= 0) {
1647		DBG("attempted to unmap an unmapped bo\n");
1648		pthread_mutex_unlock(&bufmgr_gem->lock);
1649		/* Preserve the old behaviour of just treating this as a
1650		 * no-op rather than reporting the error.
1651		 */
1652		return 0;
1653	}
1654
1655	if (bo_gem->mapped_cpu_write) {
1656		struct drm_i915_gem_sw_finish sw_finish;
1657
1658		/* Cause a flush to happen if the buffer's pinned for
1659		 * scanout, so the results show up in a timely manner.
1660		 * Unlike GTT set domains, this only does work if the
1661		 * buffer should be scanout-related.
1662		 */
1663		memclear(sw_finish);
1664		sw_finish.handle = bo_gem->gem_handle;
1665		ret = drmIoctl(bufmgr_gem->fd,
1666			       DRM_IOCTL_I915_GEM_SW_FINISH,
1667			       &sw_finish);
1668		ret = ret == -1 ? -errno : 0;
1669
1670		bo_gem->mapped_cpu_write = false;
1671	}
1672
1673	/* We need to unmap after every innovation as we cannot track
1674	 * an open vma for every bo as that will exhaust the system
1675	 * limits and cause later failures.
1676	 */
1677	if (--bo_gem->map_count == 0) {
1678		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1679		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1680		bo->virtual = NULL;
1681	}
1682	pthread_mutex_unlock(&bufmgr_gem->lock);
1683
1684	return ret;
1685}
1686
1687drm_public int
1688drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
1689{
1690	return drm_intel_gem_bo_unmap(bo);
1691}
1692
1693static bool is_cache_coherent(drm_intel_bo *bo)
1694{
1695	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1696	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1697	struct drm_i915_gem_caching arg = {};
1698
1699	arg.handle = bo_gem->gem_handle;
1700	if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_CACHING, &arg))
1701		assert(false);
1702	return arg.caching != I915_CACHING_NONE;
1703}
1704
1705static void set_domain(drm_intel_bo *bo, uint32_t read, uint32_t write)
1706{
1707	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1708	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1709	struct drm_i915_gem_set_domain arg = {};
1710
1711	arg.handle = bo_gem->gem_handle;
1712	arg.read_domains = read;
1713	arg.write_domain = write;
1714	if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &arg))
1715		assert(false);
1716}
1717
1718static int mmap_write(drm_intel_bo *bo, unsigned long offset,
1719		      unsigned long length, const void *buf)
1720{
1721	void *map = NULL;
1722
1723	if (!length)
1724		return 0;
1725
1726	if (is_cache_coherent(bo)) {
1727		map = drm_intel_gem_bo_map__cpu(bo);
1728		if (map)
1729			set_domain(bo, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
1730	}
1731	if (!map) {
1732		map = drm_intel_gem_bo_map__wc(bo);
1733		if (map)
1734			set_domain(bo, I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
1735	}
1736
1737	assert(map);
1738	memcpy((char *)map + offset, buf, length);
1739	drm_intel_gem_bo_unmap(bo);
1740	return 0;
1741}
1742
1743static int mmap_read(drm_intel_bo *bo, unsigned long offset,
1744		      unsigned long length, void *buf)
1745{
1746	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1747	void *map = NULL;
1748
1749	if (!length)
1750		return 0;
1751
1752	if (bufmgr_gem->has_llc || is_cache_coherent(bo)) {
1753		map = drm_intel_gem_bo_map__cpu(bo);
1754		if (map)
1755			set_domain(bo, I915_GEM_DOMAIN_CPU, 0);
1756	}
1757	if (!map) {
1758		map = drm_intel_gem_bo_map__wc(bo);
1759		if (map)
1760			set_domain(bo, I915_GEM_DOMAIN_WC, 0);
1761	}
1762
1763	assert(map);
1764	memcpy(buf, (char *)map + offset, length);
1765	drm_intel_gem_bo_unmap(bo);
1766	return 0;
1767}
1768
1769static int
1770drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1771			 unsigned long size, const void *data)
1772{
1773	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1774	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1775	struct drm_i915_gem_pwrite pwrite;
1776	int ret;
1777
1778	if (bo_gem->is_userptr)
1779		return -EINVAL;
1780
1781	memclear(pwrite);
1782	pwrite.handle = bo_gem->gem_handle;
1783	pwrite.offset = offset;
1784	pwrite.size = size;
1785	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1786	ret = drmIoctl(bufmgr_gem->fd,
1787		       DRM_IOCTL_I915_GEM_PWRITE,
1788		       &pwrite);
1789	if (ret)
1790		ret = -errno;
1791
1792	if (ret != 0 && ret != -EOPNOTSUPP) {
1793		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1794		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1795		    (int)size, strerror(errno));
1796		return ret;
1797	}
1798
1799	if (ret == -EOPNOTSUPP)
1800		mmap_write(bo, offset, size, data);
1801
1802	return 0;
1803}
1804
1805static int
1806drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
1807{
1808	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1809	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1810	int ret;
1811
1812	memclear(get_pipe_from_crtc_id);
1813	get_pipe_from_crtc_id.crtc_id = crtc_id;
1814	ret = drmIoctl(bufmgr_gem->fd,
1815		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1816		       &get_pipe_from_crtc_id);
1817	if (ret != 0) {
1818		/* We return -1 here to signal that we don't
1819		 * know which pipe is associated with this crtc.
1820		 * This lets the caller know that this information
1821		 * isn't available; using the wrong pipe for
1822		 * vblank waiting can cause the chipset to lock up
1823		 */
1824		return -1;
1825	}
1826
1827	return get_pipe_from_crtc_id.pipe;
1828}
1829
1830static int
1831drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
1832			     unsigned long size, void *data)
1833{
1834	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1835	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1836	struct drm_i915_gem_pread pread;
1837	int ret;
1838
1839	if (bo_gem->is_userptr)
1840		return -EINVAL;
1841
1842	memclear(pread);
1843	pread.handle = bo_gem->gem_handle;
1844	pread.offset = offset;
1845	pread.size = size;
1846	pread.data_ptr = (uint64_t) (uintptr_t) data;
1847	ret = drmIoctl(bufmgr_gem->fd,
1848		       DRM_IOCTL_I915_GEM_PREAD,
1849		       &pread);
1850	if (ret)
1851		ret = -errno;
1852
1853	if (ret != 0 && ret != -EOPNOTSUPP) {
1854		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1855		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1856		    (int)size, strerror(errno));
1857		return ret;
1858	}
1859
1860	if (ret == -EOPNOTSUPP)
1861		mmap_read(bo, offset, size, data);
1862
1863	return 0;
1864}
1865
1866/** Waits for all GPU rendering with the object to have completed. */
1867static void
1868drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
1869{
1870	drm_intel_gem_bo_start_gtt_access(bo, 1);
1871}
1872
1873/**
1874 * Waits on a BO for the given amount of time.
1875 *
1876 * @bo: buffer object to wait for
1877 * @timeout_ns: amount of time to wait in nanoseconds.
1878 *   If value is less than 0, an infinite wait will occur.
1879 *
1880 * Returns 0 if the wait was successful ie. the last batch referencing the
1881 * object has completed within the allotted time. Otherwise some negative return
1882 * value describes the error. Of particular interest is -ETIME when the wait has
1883 * failed to yield the desired result.
1884 *
1885 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
1886 * the operation to give up after a certain amount of time. Another subtle
1887 * difference is the internal locking semantics are different (this variant does
1888 * not hold the lock for the duration of the wait). This makes the wait subject
1889 * to a larger userspace race window.
1890 *
1891 * The implementation shall wait until the object is no longer actively
1892 * referenced within a batch buffer at the time of the call. The wait will
1893 * not guarantee that the buffer is re-issued via another thread, or an flinked
1894 * handle. Userspace must make sure this race does not occur if such precision
1895 * is important.
1896 *
1897 * Note that some kernels have broken the inifite wait for negative values
1898 * promise, upgrade to latest stable kernels if this is the case.
1899 */
1900drm_public int
1901drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
1902{
1903	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1904	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1905	struct drm_i915_gem_wait wait;
1906	int ret;
1907
1908	if (!bufmgr_gem->has_wait_timeout) {
1909		DBG("%s:%d: Timed wait is not supported. Falling back to "
1910		    "infinite wait\n", __FILE__, __LINE__);
1911		if (timeout_ns) {
1912			drm_intel_gem_bo_wait_rendering(bo);
1913			return 0;
1914		} else {
1915			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
1916		}
1917	}
1918
1919	memclear(wait);
1920	wait.bo_handle = bo_gem->gem_handle;
1921	wait.timeout_ns = timeout_ns;
1922	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1923	if (ret == -1)
1924		return -errno;
1925
1926	return ret;
1927}
1928
1929/**
1930 * Sets the object to the GTT read and possibly write domain, used by the X
1931 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
1932 *
1933 * In combination with drm_intel_gem_bo_pin() and manual fence management, we
1934 * can do tiled pixmaps this way.
1935 */
1936drm_public void
1937drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
1938{
1939	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1940	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1941	struct drm_i915_gem_set_domain set_domain;
1942	int ret;
1943
1944	memclear(set_domain);
1945	set_domain.handle = bo_gem->gem_handle;
1946	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1947	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1948	ret = drmIoctl(bufmgr_gem->fd,
1949		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1950		       &set_domain);
1951	if (ret != 0) {
1952		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1953		    __FILE__, __LINE__, bo_gem->gem_handle,
1954		    set_domain.read_domains, set_domain.write_domain,
1955		    strerror(errno));
1956	}
1957}
1958
1959static void
1960drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
1961{
1962	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1963	int i, ret;
1964
1965	free(bufmgr_gem->exec2_objects);
1966	free(bufmgr_gem->exec_bos);
1967
1968	pthread_mutex_destroy(&bufmgr_gem->lock);
1969
1970	/* Free any cached buffer objects we were going to reuse */
1971	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1972		struct drm_intel_gem_bo_bucket *bucket =
1973		    &bufmgr_gem->cache_bucket[i];
1974		drm_intel_bo_gem *bo_gem;
1975
1976		while (!DRMLISTEMPTY(&bucket->head)) {
1977			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1978					      bucket->head.next, head);
1979			DRMLISTDEL(&bo_gem->head);
1980
1981			drm_intel_gem_bo_free(&bo_gem->bo);
1982		}
1983	}
1984
1985	/* Release userptr bo kept hanging around for optimisation. */
1986	if (bufmgr_gem->userptr_active.ptr) {
1987		ret = drmCloseBufferHandle(bufmgr_gem->fd,
1988					   bufmgr_gem->userptr_active.handle);
1989		free(bufmgr_gem->userptr_active.ptr);
1990		if (ret)
1991			fprintf(stderr,
1992				"Failed to release test userptr object! (%d) "
1993				"i915 kernel driver may not be sane!\n", errno);
1994	}
1995
1996	free(bufmgr);
1997}
1998
1999/**
2000 * Adds the target buffer to the validation list and adds the relocation
2001 * to the reloc_buffer's relocation list.
2002 *
2003 * The relocation entry at the given offset must already contain the
2004 * precomputed relocation value, because the kernel will optimize out
2005 * the relocation entry write when the buffer hasn't moved from the
2006 * last known offset in target_bo.
2007 */
2008static int
2009do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
2010		 drm_intel_bo *target_bo, uint32_t target_offset,
2011		 uint32_t read_domains, uint32_t write_domain,
2012		 bool need_fence)
2013{
2014	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2015	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2016	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2017	bool fenced_command;
2018
2019	if (bo_gem->has_error)
2020		return -ENOMEM;
2021
2022	if (target_bo_gem->has_error) {
2023		bo_gem->has_error = true;
2024		return -ENOMEM;
2025	}
2026
2027	/* We never use HW fences for rendering on 965+ */
2028	if (bufmgr_gem->gen >= 4)
2029		need_fence = false;
2030
2031	fenced_command = need_fence;
2032	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
2033		need_fence = false;
2034
2035	/* Create a new relocation list if needed */
2036	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
2037		return -ENOMEM;
2038
2039	/* Check overflow */
2040	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
2041
2042	/* Check args */
2043	assert(offset <= bo->size - 4);
2044	assert((write_domain & (write_domain - 1)) == 0);
2045
2046	/* An object needing a fence is a tiled buffer, so it won't have
2047	 * relocs to other buffers.
2048	 */
2049	if (need_fence) {
2050		assert(target_bo_gem->reloc_count == 0);
2051		target_bo_gem->reloc_tree_fences = 1;
2052	}
2053
2054	/* Make sure that we're not adding a reloc to something whose size has
2055	 * already been accounted for.
2056	 */
2057	assert(!bo_gem->used_as_reloc_target);
2058	if (target_bo_gem != bo_gem) {
2059		target_bo_gem->used_as_reloc_target = true;
2060		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
2061		bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
2062	}
2063
2064	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
2065	if (target_bo != bo)
2066		drm_intel_gem_bo_reference(target_bo);
2067	if (fenced_command)
2068		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
2069			DRM_INTEL_RELOC_FENCE;
2070	else
2071		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
2072
2073	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
2074	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
2075	bo_gem->relocs[bo_gem->reloc_count].target_handle =
2076	    target_bo_gem->gem_handle;
2077	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
2078	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
2079	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
2080	bo_gem->reloc_count++;
2081
2082	return 0;
2083}
2084
2085static void
2086drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
2087{
2088	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2089
2090	if (enable)
2091		bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2092	else
2093		bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2094}
2095
2096static int
2097drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo)
2098{
2099	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2100	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2101	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2102	if (bo_gem->has_error)
2103		return -ENOMEM;
2104
2105	if (target_bo_gem->has_error) {
2106		bo_gem->has_error = true;
2107		return -ENOMEM;
2108	}
2109
2110	if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED))
2111		return -EINVAL;
2112	if (target_bo_gem == bo_gem)
2113		return -EINVAL;
2114
2115	if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
2116		int new_size = bo_gem->softpin_target_size * 2;
2117		if (new_size == 0)
2118			new_size = bufmgr_gem->max_relocs;
2119
2120		bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
2121				sizeof(drm_intel_bo *));
2122		if (!bo_gem->softpin_target)
2123			return -ENOMEM;
2124
2125		bo_gem->softpin_target_size = new_size;
2126	}
2127	bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
2128	drm_intel_gem_bo_reference(target_bo);
2129	bo_gem->softpin_target_count++;
2130
2131	return 0;
2132}
2133
2134static int
2135drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
2136			    drm_intel_bo *target_bo, uint32_t target_offset,
2137			    uint32_t read_domains, uint32_t write_domain)
2138{
2139	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2140	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo;
2141
2142	if (target_bo_gem->kflags & EXEC_OBJECT_PINNED)
2143		return drm_intel_gem_bo_add_softpin_target(bo, target_bo);
2144	else
2145		return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2146					read_domains, write_domain,
2147					!bufmgr_gem->fenced_relocs);
2148}
2149
2150static int
2151drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
2152				  drm_intel_bo *target_bo,
2153				  uint32_t target_offset,
2154				  uint32_t read_domains, uint32_t write_domain)
2155{
2156	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2157				read_domains, write_domain, true);
2158}
2159
2160drm_public int
2161drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
2162{
2163	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2164
2165	return bo_gem->reloc_count;
2166}
2167
2168/**
2169 * Removes existing relocation entries in the BO after "start".
2170 *
2171 * This allows a user to avoid a two-step process for state setup with
2172 * counting up all the buffer objects and doing a
2173 * drm_intel_bufmgr_check_aperture_space() before emitting any of the
2174 * relocations for the state setup.  Instead, save the state of the
2175 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
2176 * state, and then check if it still fits in the aperture.
2177 *
2178 * Any further drm_intel_bufmgr_check_aperture_space() queries
2179 * involving this buffer in the tree are undefined after this call.
2180 *
2181 * This also removes all softpinned targets being referenced by the BO.
2182 */
2183drm_public void
2184drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
2185{
2186	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2187	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2188	int i;
2189	struct timespec time;
2190
2191	clock_gettime(CLOCK_MONOTONIC, &time);
2192
2193	assert(bo_gem->reloc_count >= start);
2194
2195	/* Unreference the cleared target buffers */
2196	pthread_mutex_lock(&bufmgr_gem->lock);
2197
2198	for (i = start; i < bo_gem->reloc_count; i++) {
2199		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
2200		if (&target_bo_gem->bo != bo) {
2201			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
2202			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
2203								  time.tv_sec);
2204		}
2205	}
2206	bo_gem->reloc_count = start;
2207
2208	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2209		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i];
2210		drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
2211	}
2212	bo_gem->softpin_target_count = 0;
2213
2214	pthread_mutex_unlock(&bufmgr_gem->lock);
2215
2216}
2217
2218/**
2219 * Walk the tree of relocations rooted at BO and accumulate the list of
2220 * validations to be performed and update the relocation buffers with
2221 * index values into the validation list.
2222 */
2223static void
2224drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
2225{
2226	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2227	int i;
2228
2229	if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
2230		return;
2231
2232	for (i = 0; i < bo_gem->reloc_count; i++) {
2233		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2234		int need_fence;
2235
2236		if (target_bo == bo)
2237			continue;
2238
2239		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2240
2241		/* Continue walking the tree depth-first. */
2242		drm_intel_gem_bo_process_reloc2(target_bo);
2243
2244		need_fence = (bo_gem->reloc_target_info[i].flags &
2245			      DRM_INTEL_RELOC_FENCE);
2246
2247		/* Add the target to the validate list */
2248		drm_intel_add_validate_buffer2(target_bo, need_fence);
2249	}
2250
2251	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2252		drm_intel_bo *target_bo = bo_gem->softpin_target[i];
2253
2254		if (target_bo == bo)
2255			continue;
2256
2257		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2258		drm_intel_gem_bo_process_reloc2(target_bo);
2259		drm_intel_add_validate_buffer2(target_bo, false);
2260	}
2261}
2262
2263static void
2264drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
2265{
2266	int i;
2267
2268	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2269		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2270		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2271
2272		/* Update the buffer offset */
2273		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
2274			/* If we're seeing softpinned object here it means that the kernel
2275			 * has relocated our object... Indicating a programming error
2276			 */
2277			assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED));
2278			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2279			    bo_gem->gem_handle, bo_gem->name,
2280			    upper_32_bits(bo->offset64),
2281			    lower_32_bits(bo->offset64),
2282			    upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
2283			    lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
2284			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
2285			bo->offset = bufmgr_gem->exec2_objects[i].offset;
2286		}
2287	}
2288}
2289
2290drm_public void
2291drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
2292			      int x1, int y1, int width, int height,
2293			      enum aub_dump_bmp_format format,
2294			      int pitch, int offset)
2295{
2296}
2297
2298static int
2299do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
2300	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2301	 int in_fence, int *out_fence,
2302	 unsigned int flags)
2303{
2304	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2305	struct drm_i915_gem_execbuffer2 execbuf;
2306	int ret = 0;
2307	int i;
2308
2309	if (to_bo_gem(bo)->has_error)
2310		return -ENOMEM;
2311
2312	switch (flags & 0x7) {
2313	default:
2314		return -EINVAL;
2315	case I915_EXEC_BLT:
2316		if (!bufmgr_gem->has_blt)
2317			return -EINVAL;
2318		break;
2319	case I915_EXEC_BSD:
2320		if (!bufmgr_gem->has_bsd)
2321			return -EINVAL;
2322		break;
2323	case I915_EXEC_VEBOX:
2324		if (!bufmgr_gem->has_vebox)
2325			return -EINVAL;
2326		break;
2327	case I915_EXEC_RENDER:
2328	case I915_EXEC_DEFAULT:
2329		break;
2330	}
2331
2332	pthread_mutex_lock(&bufmgr_gem->lock);
2333	/* Update indices and set up the validate list. */
2334	drm_intel_gem_bo_process_reloc2(bo);
2335
2336	/* Add the batch buffer to the validation list.  There are no relocations
2337	 * pointing to it.
2338	 */
2339	drm_intel_add_validate_buffer2(bo, 0);
2340
2341	memclear(execbuf);
2342	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
2343	execbuf.buffer_count = bufmgr_gem->exec_count;
2344	execbuf.batch_start_offset = 0;
2345	execbuf.batch_len = used;
2346	execbuf.cliprects_ptr = (uintptr_t)cliprects;
2347	execbuf.num_cliprects = num_cliprects;
2348	execbuf.DR1 = 0;
2349	execbuf.DR4 = DR4;
2350	execbuf.flags = flags;
2351	if (ctx == NULL)
2352		i915_execbuffer2_set_context_id(execbuf, 0);
2353	else
2354		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2355	execbuf.rsvd2 = 0;
2356	if (in_fence != -1) {
2357		execbuf.rsvd2 = in_fence;
2358		execbuf.flags |= I915_EXEC_FENCE_IN;
2359	}
2360	if (out_fence != NULL) {
2361		*out_fence = -1;
2362		execbuf.flags |= I915_EXEC_FENCE_OUT;
2363	}
2364
2365	if (bufmgr_gem->no_exec)
2366		goto skip_execution;
2367
2368	ret = drmIoctl(bufmgr_gem->fd,
2369		       DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
2370		       &execbuf);
2371	if (ret != 0) {
2372		ret = -errno;
2373		if (ret == -ENOSPC) {
2374			DBG("Execbuffer fails to pin. "
2375			    "Estimate: %u. Actual: %u. Available: %u\n",
2376			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2377							       bufmgr_gem->exec_count),
2378			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2379							      bufmgr_gem->exec_count),
2380			    (unsigned int) bufmgr_gem->gtt_size);
2381		}
2382	}
2383	drm_intel_update_buffer_offsets2(bufmgr_gem);
2384
2385	if (ret == 0 && out_fence != NULL)
2386		*out_fence = execbuf.rsvd2 >> 32;
2387
2388skip_execution:
2389	if (bufmgr_gem->bufmgr.debug)
2390		drm_intel_gem_dump_validation_list(bufmgr_gem);
2391
2392	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2393		drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2394
2395		bo_gem->idle = false;
2396
2397		/* Disconnect the buffer from the validate list */
2398		bo_gem->validate_index = -1;
2399		bufmgr_gem->exec_bos[i] = NULL;
2400	}
2401	bufmgr_gem->exec_count = 0;
2402	pthread_mutex_unlock(&bufmgr_gem->lock);
2403
2404	return ret;
2405}
2406
2407static int
2408drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
2409		       drm_clip_rect_t *cliprects, int num_cliprects,
2410		       int DR4)
2411{
2412	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2413			-1, NULL, I915_EXEC_RENDER);
2414}
2415
2416static int
2417drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
2418			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2419			unsigned int flags)
2420{
2421	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2422			-1, NULL, flags);
2423}
2424
2425drm_public int
2426drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
2427			      int used, unsigned int flags)
2428{
2429	return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags);
2430}
2431
2432drm_public int
2433drm_intel_gem_bo_fence_exec(drm_intel_bo *bo,
2434			    drm_intel_context *ctx,
2435			    int used,
2436			    int in_fence,
2437			    int *out_fence,
2438			    unsigned int flags)
2439{
2440	return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags);
2441}
2442
2443static int
2444drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
2445{
2446	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2447	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2448	struct drm_i915_gem_pin pin;
2449	int ret;
2450
2451	memclear(pin);
2452	pin.handle = bo_gem->gem_handle;
2453	pin.alignment = alignment;
2454
2455	ret = drmIoctl(bufmgr_gem->fd,
2456		       DRM_IOCTL_I915_GEM_PIN,
2457		       &pin);
2458	if (ret != 0)
2459		return -errno;
2460
2461	bo->offset64 = pin.offset;
2462	bo->offset = pin.offset;
2463	return 0;
2464}
2465
2466static int
2467drm_intel_gem_bo_unpin(drm_intel_bo *bo)
2468{
2469	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2470	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2471	struct drm_i915_gem_unpin unpin;
2472	int ret;
2473
2474	memclear(unpin);
2475	unpin.handle = bo_gem->gem_handle;
2476
2477	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
2478	if (ret != 0)
2479		return -errno;
2480
2481	return 0;
2482}
2483
2484static int
2485drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
2486				     uint32_t tiling_mode,
2487				     uint32_t stride)
2488{
2489	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2490	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2491	struct drm_i915_gem_set_tiling set_tiling;
2492	int ret;
2493
2494	if (bo_gem->global_name == 0 &&
2495	    tiling_mode == bo_gem->tiling_mode &&
2496	    stride == bo_gem->stride)
2497		return 0;
2498
2499	memset(&set_tiling, 0, sizeof(set_tiling));
2500	do {
2501		/* set_tiling is slightly broken and overwrites the
2502		 * input on the error path, so we have to open code
2503		 * rmIoctl.
2504		 */
2505		set_tiling.handle = bo_gem->gem_handle;
2506		set_tiling.tiling_mode = tiling_mode;
2507		set_tiling.stride = stride;
2508
2509		ret = ioctl(bufmgr_gem->fd,
2510			    DRM_IOCTL_I915_GEM_SET_TILING,
2511			    &set_tiling);
2512	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2513	if (ret == -1)
2514		return -errno;
2515
2516	bo_gem->tiling_mode = set_tiling.tiling_mode;
2517	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2518	bo_gem->stride = set_tiling.stride;
2519	return 0;
2520}
2521
2522static int
2523drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2524			    uint32_t stride)
2525{
2526	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2527	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2528	int ret;
2529
2530	/* Tiling with userptr surfaces is not supported
2531	 * on all hardware so refuse it for time being.
2532	 */
2533	if (bo_gem->is_userptr)
2534		return -EINVAL;
2535
2536	/* Linear buffers have no stride. By ensuring that we only ever use
2537	 * stride 0 with linear buffers, we simplify our code.
2538	 */
2539	if (*tiling_mode == I915_TILING_NONE)
2540		stride = 0;
2541
2542	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2543	if (ret == 0)
2544		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2545
2546	*tiling_mode = bo_gem->tiling_mode;
2547	return ret;
2548}
2549
2550static int
2551drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2552			    uint32_t * swizzle_mode)
2553{
2554	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2555
2556	*tiling_mode = bo_gem->tiling_mode;
2557	*swizzle_mode = bo_gem->swizzle_mode;
2558	return 0;
2559}
2560
2561static int
2562drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
2563{
2564	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2565
2566	bo->offset64 = offset;
2567	bo->offset = offset;
2568	bo_gem->kflags |= EXEC_OBJECT_PINNED;
2569
2570	return 0;
2571}
2572
2573drm_public drm_intel_bo *
2574drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
2575{
2576	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2577	int ret;
2578	uint32_t handle;
2579	drm_intel_bo_gem *bo_gem;
2580
2581	pthread_mutex_lock(&bufmgr_gem->lock);
2582	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
2583	if (ret) {
2584		DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
2585		pthread_mutex_unlock(&bufmgr_gem->lock);
2586		return NULL;
2587	}
2588
2589	/*
2590	 * See if the kernel has already returned this buffer to us. Just as
2591	 * for named buffers, we must not create two bo's pointing at the same
2592	 * kernel object
2593	 */
2594	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
2595		  &handle, sizeof(handle), bo_gem);
2596	if (bo_gem) {
2597		drm_intel_gem_bo_reference(&bo_gem->bo);
2598		goto out;
2599	}
2600
2601	bo_gem = calloc(1, sizeof(*bo_gem));
2602	if (!bo_gem)
2603		goto out;
2604
2605	atomic_set(&bo_gem->refcount, 1);
2606	DRMINITLISTHEAD(&bo_gem->vma_list);
2607
2608	/* Determine size of bo.  The fd-to-handle ioctl really should
2609	 * return the size, but it doesn't.  If we have kernel 3.12 or
2610	 * later, we can lseek on the prime fd to get the size.  Older
2611	 * kernels will just fail, in which case we fall back to the
2612	 * provided (estimated or guess size). */
2613	ret = lseek(prime_fd, 0, SEEK_END);
2614	if (ret != -1)
2615		bo_gem->bo.size = ret;
2616	else
2617		bo_gem->bo.size = size;
2618
2619	bo_gem->bo.handle = handle;
2620	bo_gem->bo.bufmgr = bufmgr;
2621
2622	bo_gem->gem_handle = handle;
2623	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
2624		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
2625
2626	bo_gem->name = "prime";
2627	bo_gem->validate_index = -1;
2628	bo_gem->reloc_tree_fences = 0;
2629	bo_gem->used_as_reloc_target = false;
2630	bo_gem->has_error = false;
2631	bo_gem->reusable = false;
2632
2633	ret = get_tiling_mode(bufmgr_gem, handle,
2634			      &bo_gem->tiling_mode, &bo_gem->swizzle_mode);
2635	if (ret)
2636		goto err;
2637
2638	/* XXX stride is unknown */
2639	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2640
2641out:
2642	pthread_mutex_unlock(&bufmgr_gem->lock);
2643	return &bo_gem->bo;
2644
2645err:
2646	drm_intel_gem_bo_free(&bo_gem->bo);
2647	pthread_mutex_unlock(&bufmgr_gem->lock);
2648	return NULL;
2649}
2650
2651drm_public int
2652drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
2653{
2654	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2655	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2656
2657	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
2658			       DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
2659		return -errno;
2660
2661	bo_gem->reusable = false;
2662
2663	return 0;
2664}
2665
2666static int
2667drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
2668{
2669	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2670	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2671
2672	if (!bo_gem->global_name) {
2673		struct drm_gem_flink flink;
2674
2675		memclear(flink);
2676		flink.handle = bo_gem->gem_handle;
2677		if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink))
2678			return -errno;
2679
2680		pthread_mutex_lock(&bufmgr_gem->lock);
2681		if (!bo_gem->global_name) {
2682			bo_gem->global_name = flink.name;
2683			bo_gem->reusable = false;
2684
2685			HASH_ADD(name_hh, bufmgr_gem->name_table,
2686				 global_name, sizeof(bo_gem->global_name),
2687				 bo_gem);
2688		}
2689		pthread_mutex_unlock(&bufmgr_gem->lock);
2690	}
2691
2692	*name = bo_gem->global_name;
2693	return 0;
2694}
2695
2696/**
2697 * Enables unlimited caching of buffer objects for reuse.
2698 *
2699 * This is potentially very memory expensive, as the cache at each bucket
2700 * size is only bounded by how many buffers of that size we've managed to have
2701 * in flight at once.
2702 */
2703drm_public void
2704drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
2705{
2706	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2707
2708	bufmgr_gem->bo_reuse = true;
2709}
2710
2711/**
2712 * Disables implicit synchronisation before executing the bo
2713 *
2714 * This will cause rendering corruption unless you correctly manage explicit
2715 * fences for all rendering involving this buffer - including use by others.
2716 * Disabling the implicit serialisation is only required if that serialisation
2717 * is too coarse (for example, you have split the buffer into many
2718 * non-overlapping regions and are sharing the whole buffer between concurrent
2719 * independent command streams).
2720 *
2721 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2722 * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync,
2723 * or subsequent execbufs involving the bo will generate EINVAL.
2724 */
2725drm_public void
2726drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo)
2727{
2728	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2729
2730	bo_gem->kflags |= EXEC_OBJECT_ASYNC;
2731}
2732
2733/**
2734 * Enables implicit synchronisation before executing the bo
2735 *
2736 * This is the default behaviour of the kernel, to wait upon prior writes
2737 * completing on the object before rendering with it, or to wait for prior
2738 * reads to complete before writing into the object.
2739 * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2740 * the kernel never to insert a stall before using the object. Then this
2741 * function can be used to restore the implicit sync before subsequent
2742 * rendering.
2743 */
2744drm_public void
2745drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo)
2746{
2747	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2748
2749	bo_gem->kflags &= ~EXEC_OBJECT_ASYNC;
2750}
2751
2752/**
2753 * Query whether the kernel supports disabling of its implicit synchronisation
2754 * before execbuf. See drm_intel_gem_bo_disable_implicit_sync()
2755 */
2756drm_public int
2757drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr)
2758{
2759	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2760
2761	return bufmgr_gem->has_exec_async;
2762}
2763
2764/**
2765 * Enable use of fenced reloc type.
2766 *
2767 * New code should enable this to avoid unnecessary fence register
2768 * allocation.  If this option is not enabled, all relocs will have fence
2769 * register allocated.
2770 */
2771drm_public void
2772drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
2773{
2774	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2775	bufmgr_gem->fenced_relocs = true;
2776}
2777
2778/**
2779 * Return the additional aperture space required by the tree of buffer objects
2780 * rooted at bo.
2781 */
2782static int
2783drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
2784{
2785	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2786	int i;
2787	int total = 0;
2788
2789	if (bo == NULL || bo_gem->included_in_check_aperture)
2790		return 0;
2791
2792	total += bo->size;
2793	bo_gem->included_in_check_aperture = true;
2794
2795	for (i = 0; i < bo_gem->reloc_count; i++)
2796		total +=
2797		    drm_intel_gem_bo_get_aperture_space(bo_gem->
2798							reloc_target_info[i].bo);
2799
2800	return total;
2801}
2802
2803/**
2804 * Count the number of buffers in this list that need a fence reg
2805 *
2806 * If the count is greater than the number of available regs, we'll have
2807 * to ask the caller to resubmit a batch with fewer tiled buffers.
2808 *
2809 * This function over-counts if the same buffer is used multiple times.
2810 */
2811static unsigned int
2812drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
2813{
2814	int i;
2815	unsigned int total = 0;
2816
2817	for (i = 0; i < count; i++) {
2818		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2819
2820		if (bo_gem == NULL)
2821			continue;
2822
2823		total += bo_gem->reloc_tree_fences;
2824	}
2825	return total;
2826}
2827
2828/**
2829 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
2830 * for the next drm_intel_bufmgr_check_aperture_space() call.
2831 */
2832static void
2833drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
2834{
2835	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2836	int i;
2837
2838	if (bo == NULL || !bo_gem->included_in_check_aperture)
2839		return;
2840
2841	bo_gem->included_in_check_aperture = false;
2842
2843	for (i = 0; i < bo_gem->reloc_count; i++)
2844		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
2845							   reloc_target_info[i].bo);
2846}
2847
2848/**
2849 * Return a conservative estimate for the amount of aperture required
2850 * for a collection of buffers. This may double-count some buffers.
2851 */
2852static unsigned int
2853drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
2854{
2855	int i;
2856	unsigned int total = 0;
2857
2858	for (i = 0; i < count; i++) {
2859		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2860		if (bo_gem != NULL)
2861			total += bo_gem->reloc_tree_size;
2862	}
2863	return total;
2864}
2865
2866/**
2867 * Return the amount of aperture needed for a collection of buffers.
2868 * This avoids double counting any buffers, at the cost of looking
2869 * at every buffer in the set.
2870 */
2871static unsigned int
2872drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
2873{
2874	int i;
2875	unsigned int total = 0;
2876
2877	for (i = 0; i < count; i++) {
2878		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
2879		/* For the first buffer object in the array, we get an
2880		 * accurate count back for its reloc_tree size (since nothing
2881		 * had been flagged as being counted yet).  We can save that
2882		 * value out as a more conservative reloc_tree_size that
2883		 * avoids double-counting target buffers.  Since the first
2884		 * buffer happens to usually be the batch buffer in our
2885		 * callers, this can pull us back from doing the tree
2886		 * walk on every new batch emit.
2887		 */
2888		if (i == 0) {
2889			drm_intel_bo_gem *bo_gem =
2890			    (drm_intel_bo_gem *) bo_array[i];
2891			bo_gem->reloc_tree_size = total;
2892		}
2893	}
2894
2895	for (i = 0; i < count; i++)
2896		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
2897	return total;
2898}
2899
2900/**
2901 * Return -1 if the batchbuffer should be flushed before attempting to
2902 * emit rendering referencing the buffers pointed to by bo_array.
2903 *
2904 * This is required because if we try to emit a batchbuffer with relocations
2905 * to a tree of buffers that won't simultaneously fit in the aperture,
2906 * the rendering will return an error at a point where the software is not
2907 * prepared to recover from it.
2908 *
2909 * However, we also want to emit the batchbuffer significantly before we reach
2910 * the limit, as a series of batchbuffers each of which references buffers
2911 * covering almost all of the aperture means that at each emit we end up
2912 * waiting to evict a buffer from the last rendering, and we get synchronous
2913 * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
2914 * get better parallelism.
2915 */
2916static int
2917drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
2918{
2919	drm_intel_bufmgr_gem *bufmgr_gem =
2920	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
2921	unsigned int total = 0;
2922	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
2923	int total_fences;
2924
2925	/* Check for fence reg constraints if necessary */
2926	if (bufmgr_gem->available_fences) {
2927		total_fences = drm_intel_gem_total_fences(bo_array, count);
2928		if (total_fences > bufmgr_gem->available_fences)
2929			return -ENOSPC;
2930	}
2931
2932	total = drm_intel_gem_estimate_batch_space(bo_array, count);
2933
2934	if (total > threshold)
2935		total = drm_intel_gem_compute_batch_space(bo_array, count);
2936
2937	if (total > threshold) {
2938		DBG("check_space: overflowed available aperture, "
2939		    "%dkb vs %dkb\n",
2940		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
2941		return -ENOSPC;
2942	} else {
2943		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
2944		    (int)bufmgr_gem->gtt_size / 1024);
2945		return 0;
2946	}
2947}
2948
2949/*
2950 * Disable buffer reuse for objects which are shared with the kernel
2951 * as scanout buffers
2952 */
2953static int
2954drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
2955{
2956	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2957
2958	bo_gem->reusable = false;
2959	return 0;
2960}
2961
2962static int
2963drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
2964{
2965	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2966
2967	return bo_gem->reusable;
2968}
2969
2970static int
2971_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2972{
2973	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2974	int i;
2975
2976	for (i = 0; i < bo_gem->reloc_count; i++) {
2977		if (bo_gem->reloc_target_info[i].bo == target_bo)
2978			return 1;
2979		if (bo == bo_gem->reloc_target_info[i].bo)
2980			continue;
2981		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
2982						target_bo))
2983			return 1;
2984	}
2985
2986	for (i = 0; i< bo_gem->softpin_target_count; i++) {
2987		if (bo_gem->softpin_target[i] == target_bo)
2988			return 1;
2989		if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo))
2990			return 1;
2991	}
2992
2993	return 0;
2994}
2995
2996/** Return true if target_bo is referenced by bo's relocation tree. */
2997static int
2998drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2999{
3000	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
3001
3002	if (bo == NULL || target_bo == NULL)
3003		return 0;
3004	if (target_bo_gem->used_as_reloc_target)
3005		return _drm_intel_gem_bo_references(bo, target_bo);
3006	return 0;
3007}
3008
3009static void
3010add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
3011{
3012	unsigned int i = bufmgr_gem->num_buckets;
3013
3014	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
3015
3016	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
3017	bufmgr_gem->cache_bucket[i].size = size;
3018	bufmgr_gem->num_buckets++;
3019}
3020
3021static void
3022init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
3023{
3024	unsigned long size, cache_max_size = 64 * 1024 * 1024;
3025
3026	/* OK, so power of two buckets was too wasteful of memory.
3027	 * Give 3 other sizes between each power of two, to hopefully
3028	 * cover things accurately enough.  (The alternative is
3029	 * probably to just go for exact matching of sizes, and assume
3030	 * that for things like composited window resize the tiled
3031	 * width/height alignment and rounding of sizes to pages will
3032	 * get us useful cache hit rates anyway)
3033	 */
3034	add_bucket(bufmgr_gem, 4096);
3035	add_bucket(bufmgr_gem, 4096 * 2);
3036	add_bucket(bufmgr_gem, 4096 * 3);
3037
3038	/* Initialize the linked lists for BO reuse cache. */
3039	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
3040		add_bucket(bufmgr_gem, size);
3041
3042		add_bucket(bufmgr_gem, size + size * 1 / 4);
3043		add_bucket(bufmgr_gem, size + size * 2 / 4);
3044		add_bucket(bufmgr_gem, size + size * 3 / 4);
3045	}
3046}
3047
3048drm_public void
3049drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
3050{
3051	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3052
3053	bufmgr_gem->vma_max = limit;
3054
3055	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
3056}
3057
3058static int
3059parse_devid_override(const char *devid_override)
3060{
3061	static const struct {
3062		const char *name;
3063		int pci_id;
3064	} name_map[] = {
3065		{ "brw", PCI_CHIP_I965_GM },
3066		{ "g4x", PCI_CHIP_GM45_GM },
3067		{ "ilk", PCI_CHIP_ILD_G },
3068		{ "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS },
3069		{ "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 },
3070		{ "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 },
3071		{ "byt", PCI_CHIP_VALLEYVIEW_3 },
3072		{ "bdw", 0x1620 | BDW_ULX },
3073		{ "skl", PCI_CHIP_SKYLAKE_DT_GT2 },
3074		{ "kbl", PCI_CHIP_KABYLAKE_DT_GT2 },
3075	};
3076	unsigned int i;
3077
3078	for (i = 0; i < ARRAY_SIZE(name_map); i++) {
3079		if (!strcmp(name_map[i].name, devid_override))
3080			return name_map[i].pci_id;
3081	}
3082
3083	return strtod(devid_override, NULL);
3084}
3085
3086/**
3087 * Get the PCI ID for the device.  This can be overridden by setting the
3088 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
3089 */
3090static int
3091get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
3092{
3093	char *devid_override;
3094	int devid = 0;
3095	int ret;
3096	drm_i915_getparam_t gp;
3097
3098	if (geteuid() == getuid()) {
3099		devid_override = getenv("INTEL_DEVID_OVERRIDE");
3100		if (devid_override) {
3101			bufmgr_gem->no_exec = true;
3102			return parse_devid_override(devid_override);
3103		}
3104	}
3105
3106	memclear(gp);
3107	gp.param = I915_PARAM_CHIPSET_ID;
3108	gp.value = &devid;
3109	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3110	if (ret) {
3111		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
3112		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
3113	}
3114	return devid;
3115}
3116
3117drm_public int
3118drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
3119{
3120	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3121
3122	return bufmgr_gem->pci_device;
3123}
3124
3125/**
3126 * Sets the AUB filename.
3127 *
3128 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump()
3129 * for it to have any effect.
3130 */
3131drm_public void
3132drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr,
3133				      const char *filename)
3134{
3135}
3136
3137/**
3138 * Sets up AUB dumping.
3139 *
3140 * This is a trace file format that can be used with the simulator.
3141 * Packets are emitted in a format somewhat like GPU command packets.
3142 * You can set up a GTT and upload your objects into the referenced
3143 * space, then send off batchbuffers and get BMPs out the other end.
3144 */
3145drm_public void
3146drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
3147{
3148	fprintf(stderr, "libdrm aub dumping is deprecated.\n\n"
3149		"Use intel_aubdump from intel-gpu-tools instead.  Install intel-gpu-tools,\n"
3150		"then run (for example)\n\n"
3151		"\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n"
3152		"See the intel_aubdump man page for more details.\n");
3153}
3154
3155drm_public drm_intel_context *
3156drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
3157{
3158	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3159	struct drm_i915_gem_context_create create;
3160	drm_intel_context *context = NULL;
3161	int ret;
3162
3163	context = calloc(1, sizeof(*context));
3164	if (!context)
3165		return NULL;
3166
3167	memclear(create);
3168	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
3169	if (ret != 0) {
3170		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
3171		    strerror(errno));
3172		free(context);
3173		return NULL;
3174	}
3175
3176	context->ctx_id = create.ctx_id;
3177	context->bufmgr = bufmgr;
3178
3179	return context;
3180}
3181
3182drm_public int
3183drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id)
3184{
3185	if (ctx == NULL)
3186		return -EINVAL;
3187
3188	*ctx_id = ctx->ctx_id;
3189
3190	return 0;
3191}
3192
3193drm_public void
3194drm_intel_gem_context_destroy(drm_intel_context *ctx)
3195{
3196	drm_intel_bufmgr_gem *bufmgr_gem;
3197	struct drm_i915_gem_context_destroy destroy;
3198	int ret;
3199
3200	if (ctx == NULL)
3201		return;
3202
3203	memclear(destroy);
3204
3205	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3206	destroy.ctx_id = ctx->ctx_id;
3207	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
3208		       &destroy);
3209	if (ret != 0)
3210		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
3211			strerror(errno));
3212
3213	free(ctx);
3214}
3215
3216drm_public int
3217drm_intel_get_reset_stats(drm_intel_context *ctx,
3218			  uint32_t *reset_count,
3219			  uint32_t *active,
3220			  uint32_t *pending)
3221{
3222	drm_intel_bufmgr_gem *bufmgr_gem;
3223	struct drm_i915_reset_stats stats;
3224	int ret;
3225
3226	if (ctx == NULL)
3227		return -EINVAL;
3228
3229	memclear(stats);
3230
3231	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3232	stats.ctx_id = ctx->ctx_id;
3233	ret = drmIoctl(bufmgr_gem->fd,
3234		       DRM_IOCTL_I915_GET_RESET_STATS,
3235		       &stats);
3236	if (ret == 0) {
3237		if (reset_count != NULL)
3238			*reset_count = stats.reset_count;
3239
3240		if (active != NULL)
3241			*active = stats.batch_active;
3242
3243		if (pending != NULL)
3244			*pending = stats.batch_pending;
3245	}
3246
3247	return ret;
3248}
3249
3250drm_public int
3251drm_intel_reg_read(drm_intel_bufmgr *bufmgr,
3252		   uint32_t offset,
3253		   uint64_t *result)
3254{
3255	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3256	struct drm_i915_reg_read reg_read;
3257	int ret;
3258
3259	memclear(reg_read);
3260	reg_read.offset = offset;
3261
3262	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
3263
3264	*result = reg_read.val;
3265	return ret;
3266}
3267
3268drm_public int
3269drm_intel_get_subslice_total(int fd, unsigned int *subslice_total)
3270{
3271	drm_i915_getparam_t gp;
3272	int ret;
3273
3274	memclear(gp);
3275	gp.value = (int*)subslice_total;
3276	gp.param = I915_PARAM_SUBSLICE_TOTAL;
3277	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3278	if (ret)
3279		return -errno;
3280
3281	return 0;
3282}
3283
3284drm_public int
3285drm_intel_get_eu_total(int fd, unsigned int *eu_total)
3286{
3287	drm_i915_getparam_t gp;
3288	int ret;
3289
3290	memclear(gp);
3291	gp.value = (int*)eu_total;
3292	gp.param = I915_PARAM_EU_TOTAL;
3293	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3294	if (ret)
3295		return -errno;
3296
3297	return 0;
3298}
3299
3300drm_public int
3301drm_intel_get_pooled_eu(int fd)
3302{
3303	drm_i915_getparam_t gp;
3304	int ret = -1;
3305
3306	memclear(gp);
3307	gp.param = I915_PARAM_HAS_POOLED_EU;
3308	gp.value = &ret;
3309	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3310		return -errno;
3311
3312	return ret;
3313}
3314
3315drm_public int
3316drm_intel_get_min_eu_in_pool(int fd)
3317{
3318	drm_i915_getparam_t gp;
3319	int ret = -1;
3320
3321	memclear(gp);
3322	gp.param = I915_PARAM_MIN_EU_IN_POOL;
3323	gp.value = &ret;
3324	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3325		return -errno;
3326
3327	return ret;
3328}
3329
3330/**
3331 * Annotate the given bo for use in aub dumping.
3332 *
3333 * \param annotations is an array of drm_intel_aub_annotation objects
3334 * describing the type of data in various sections of the bo.  Each
3335 * element of the array specifies the type and subtype of a section of
3336 * the bo, and the past-the-end offset of that section.  The elements
3337 * of \c annotations must be sorted so that ending_offset is
3338 * increasing.
3339 *
3340 * \param count is the number of elements in the \c annotations array.
3341 * If \c count is zero, then \c annotations will not be dereferenced.
3342 *
3343 * Annotations are copied into a private data structure, so caller may
3344 * re-use the memory pointed to by \c annotations after the call
3345 * returns.
3346 *
3347 * Annotations are stored for the lifetime of the bo; to reset to the
3348 * default state (no annotations), call this function with a \c count
3349 * of zero.
3350 */
3351drm_public void drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
3352					 drm_intel_aub_annotation *annotations,
3353					 unsigned count)
3354{
3355}
3356
3357static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
3358static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
3359
3360static drm_intel_bufmgr_gem *
3361drm_intel_bufmgr_gem_find(int fd)
3362{
3363	drm_intel_bufmgr_gem *bufmgr_gem;
3364
3365	DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
3366		if (bufmgr_gem->fd == fd) {
3367			atomic_inc(&bufmgr_gem->refcount);
3368			return bufmgr_gem;
3369		}
3370	}
3371
3372	return NULL;
3373}
3374
3375static void
3376drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr)
3377{
3378	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3379
3380	if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) {
3381		pthread_mutex_lock(&bufmgr_list_mutex);
3382
3383		if (atomic_dec_and_test(&bufmgr_gem->refcount)) {
3384			DRMLISTDEL(&bufmgr_gem->managers);
3385			drm_intel_bufmgr_gem_destroy(bufmgr);
3386		}
3387
3388		pthread_mutex_unlock(&bufmgr_list_mutex);
3389	}
3390}
3391
3392drm_public void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo)
3393{
3394	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3395	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3396
3397	if (bo_gem->gtt_virtual)
3398		return bo_gem->gtt_virtual;
3399
3400	if (bo_gem->is_userptr)
3401		return NULL;
3402
3403	pthread_mutex_lock(&bufmgr_gem->lock);
3404	if (bo_gem->gtt_virtual == NULL) {
3405		struct drm_i915_gem_mmap_gtt mmap_arg;
3406		void *ptr;
3407
3408		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
3409		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3410
3411		if (bo_gem->map_count++ == 0)
3412			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3413
3414		memclear(mmap_arg);
3415		mmap_arg.handle = bo_gem->gem_handle;
3416
3417		/* Get the fake offset back... */
3418		ptr = MAP_FAILED;
3419		if (drmIoctl(bufmgr_gem->fd,
3420			     DRM_IOCTL_I915_GEM_MMAP_GTT,
3421			     &mmap_arg) == 0) {
3422			/* and mmap it */
3423			ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
3424				       MAP_SHARED, bufmgr_gem->fd,
3425				       mmap_arg.offset);
3426		}
3427		if (ptr == MAP_FAILED) {
3428			if (--bo_gem->map_count == 0)
3429				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3430			ptr = NULL;
3431		}
3432
3433		bo_gem->gtt_virtual = ptr;
3434	}
3435	pthread_mutex_unlock(&bufmgr_gem->lock);
3436
3437	return bo_gem->gtt_virtual;
3438}
3439
3440drm_public void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo)
3441{
3442	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3443	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3444
3445	if (bo_gem->mem_virtual)
3446		return bo_gem->mem_virtual;
3447
3448	if (bo_gem->is_userptr) {
3449		/* Return the same user ptr */
3450		return bo_gem->user_virtual;
3451	}
3452
3453	pthread_mutex_lock(&bufmgr_gem->lock);
3454	if (!bo_gem->mem_virtual) {
3455		struct drm_i915_gem_mmap mmap_arg;
3456
3457		if (bo_gem->map_count++ == 0)
3458			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3459
3460		DBG("bo_map: %d (%s), map_count=%d\n",
3461		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3462
3463		memclear(mmap_arg);
3464		mmap_arg.handle = bo_gem->gem_handle;
3465		mmap_arg.size = bo->size;
3466		if (drmIoctl(bufmgr_gem->fd,
3467			     DRM_IOCTL_I915_GEM_MMAP,
3468			     &mmap_arg)) {
3469			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3470			    __FILE__, __LINE__, bo_gem->gem_handle,
3471			    bo_gem->name, strerror(errno));
3472			if (--bo_gem->map_count == 0)
3473				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3474		} else {
3475			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3476			bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3477		}
3478	}
3479	pthread_mutex_unlock(&bufmgr_gem->lock);
3480
3481	return bo_gem->mem_virtual;
3482}
3483
3484drm_public void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo)
3485{
3486	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3487	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3488
3489	if (bo_gem->wc_virtual)
3490		return bo_gem->wc_virtual;
3491
3492	if (bo_gem->is_userptr)
3493		return NULL;
3494
3495	pthread_mutex_lock(&bufmgr_gem->lock);
3496	if (!bo_gem->wc_virtual) {
3497		struct drm_i915_gem_mmap mmap_arg;
3498
3499		if (bo_gem->map_count++ == 0)
3500			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3501
3502		DBG("bo_map: %d (%s), map_count=%d\n",
3503		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3504
3505		memclear(mmap_arg);
3506		mmap_arg.handle = bo_gem->gem_handle;
3507		mmap_arg.size = bo->size;
3508		mmap_arg.flags = I915_MMAP_WC;
3509		if (drmIoctl(bufmgr_gem->fd,
3510			     DRM_IOCTL_I915_GEM_MMAP,
3511			     &mmap_arg)) {
3512			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3513			    __FILE__, __LINE__, bo_gem->gem_handle,
3514			    bo_gem->name, strerror(errno));
3515			if (--bo_gem->map_count == 0)
3516				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3517		} else {
3518			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3519			bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3520		}
3521	}
3522	pthread_mutex_unlock(&bufmgr_gem->lock);
3523
3524	return bo_gem->wc_virtual;
3525}
3526
3527/**
3528 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3529 * and manage map buffer objections.
3530 *
3531 * \param fd File descriptor of the opened DRM device.
3532 */
3533drm_public drm_intel_bufmgr *
3534drm_intel_bufmgr_gem_init(int fd, int batch_size)
3535{
3536	drm_intel_bufmgr_gem *bufmgr_gem;
3537	struct drm_i915_gem_get_aperture aperture;
3538	drm_i915_getparam_t gp;
3539	int ret, tmp;
3540
3541	pthread_mutex_lock(&bufmgr_list_mutex);
3542
3543	bufmgr_gem = drm_intel_bufmgr_gem_find(fd);
3544	if (bufmgr_gem)
3545		goto exit;
3546
3547	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
3548	if (bufmgr_gem == NULL)
3549		goto exit;
3550
3551	bufmgr_gem->fd = fd;
3552	atomic_set(&bufmgr_gem->refcount, 1);
3553
3554	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
3555		free(bufmgr_gem);
3556		bufmgr_gem = NULL;
3557		goto exit;
3558	}
3559
3560	memclear(aperture);
3561	ret = drmIoctl(bufmgr_gem->fd,
3562		       DRM_IOCTL_I915_GEM_GET_APERTURE,
3563		       &aperture);
3564
3565	if (ret == 0)
3566		bufmgr_gem->gtt_size = aperture.aper_available_size;
3567	else {
3568		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
3569			strerror(errno));
3570		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
3571		fprintf(stderr, "Assuming %dkB available aperture size.\n"
3572			"May lead to reduced performance or incorrect "
3573			"rendering.\n",
3574			(int)bufmgr_gem->gtt_size / 1024);
3575	}
3576
3577	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
3578
3579	if (IS_GEN2(bufmgr_gem->pci_device))
3580		bufmgr_gem->gen = 2;
3581	else if (IS_GEN3(bufmgr_gem->pci_device))
3582		bufmgr_gem->gen = 3;
3583	else if (IS_GEN4(bufmgr_gem->pci_device))
3584		bufmgr_gem->gen = 4;
3585	else if (IS_GEN5(bufmgr_gem->pci_device))
3586		bufmgr_gem->gen = 5;
3587	else if (IS_GEN6(bufmgr_gem->pci_device))
3588		bufmgr_gem->gen = 6;
3589	else if (IS_GEN7(bufmgr_gem->pci_device))
3590		bufmgr_gem->gen = 7;
3591	else if (IS_GEN8(bufmgr_gem->pci_device))
3592		bufmgr_gem->gen = 8;
3593	else if (!intel_get_genx(bufmgr_gem->pci_device, &bufmgr_gem->gen)) {
3594		free(bufmgr_gem);
3595		bufmgr_gem = NULL;
3596		goto exit;
3597	}
3598
3599	if (IS_GEN3(bufmgr_gem->pci_device) &&
3600	    bufmgr_gem->gtt_size > 256*1024*1024) {
3601		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
3602		 * be used for tiled blits. To simplify the accounting, just
3603		 * subtract the unmappable part (fixed to 256MB on all known
3604		 * gen3 devices) if the kernel advertises it. */
3605		bufmgr_gem->gtt_size -= 256*1024*1024;
3606	}
3607
3608	memclear(gp);
3609	gp.value = &tmp;
3610
3611	gp.param = I915_PARAM_HAS_EXECBUF2;
3612	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3613	if (ret) {
3614		fprintf(stderr, "i915 does not support EXECBUFER2\n");
3615		free(bufmgr_gem);
3616		bufmgr_gem = NULL;
3617        goto exit;
3618    }
3619
3620	gp.param = I915_PARAM_HAS_BSD;
3621	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3622	bufmgr_gem->has_bsd = ret == 0;
3623
3624	gp.param = I915_PARAM_HAS_BLT;
3625	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3626	bufmgr_gem->has_blt = ret == 0;
3627
3628	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
3629	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3630	bufmgr_gem->has_relaxed_fencing = ret == 0;
3631
3632	gp.param = I915_PARAM_HAS_EXEC_ASYNC;
3633	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3634	bufmgr_gem->has_exec_async = ret == 0;
3635
3636	bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr;
3637
3638	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
3639	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3640	bufmgr_gem->has_wait_timeout = ret == 0;
3641
3642	gp.param = I915_PARAM_HAS_LLC;
3643	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3644	if (ret != 0) {
3645		/* Kernel does not supports HAS_LLC query, fallback to GPU
3646		 * generation detection and assume that we have LLC on GEN6/7
3647		 */
3648		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
3649				IS_GEN7(bufmgr_gem->pci_device));
3650	} else
3651		bufmgr_gem->has_llc = *gp.value;
3652
3653	gp.param = I915_PARAM_HAS_VEBOX;
3654	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3655	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
3656
3657	gp.param = I915_PARAM_HAS_EXEC_SOFTPIN;
3658	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3659	if (ret == 0 && *gp.value > 0)
3660		bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset;
3661
3662	if (bufmgr_gem->gen < 4) {
3663		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
3664		gp.value = &bufmgr_gem->available_fences;
3665		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3666		if (ret) {
3667			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
3668				errno);
3669			fprintf(stderr, "param: %d, val: %d\n", gp.param,
3670				*gp.value);
3671			bufmgr_gem->available_fences = 0;
3672		} else {
3673			/* XXX The kernel reports the total number of fences,
3674			 * including any that may be pinned.
3675			 *
3676			 * We presume that there will be at least one pinned
3677			 * fence for the scanout buffer, but there may be more
3678			 * than one scanout and the user may be manually
3679			 * pinning buffers. Let's move to execbuffer2 and
3680			 * thereby forget the insanity of using fences...
3681			 */
3682			bufmgr_gem->available_fences -= 2;
3683			if (bufmgr_gem->available_fences < 0)
3684				bufmgr_gem->available_fences = 0;
3685		}
3686	}
3687
3688	if (bufmgr_gem->gen >= 8) {
3689		gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
3690		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3691		if (ret == 0 && *gp.value == 3)
3692			bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range;
3693	}
3694
3695	/* Let's go with one relocation per every 2 dwords (but round down a bit
3696	 * since a power of two will mean an extra page allocation for the reloc
3697	 * buffer).
3698	 *
3699	 * Every 4 was too few for the blender benchmark.
3700	 */
3701	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
3702
3703	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
3704	bufmgr_gem->bufmgr.bo_alloc_for_render =
3705	    drm_intel_gem_bo_alloc_for_render;
3706	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
3707	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
3708	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
3709	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
3710	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
3711	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
3712	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
3713	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
3714	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
3715	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
3716	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
3717	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
3718	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
3719	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
3720	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
3721	bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
3722	bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
3723	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
3724	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
3725	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref;
3726	bufmgr_gem->bufmgr.debug = 0;
3727	bufmgr_gem->bufmgr.check_aperture_space =
3728	    drm_intel_gem_check_aperture_space;
3729	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
3730	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
3731	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
3732	    drm_intel_gem_get_pipe_from_crtc_id;
3733	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
3734
3735	init_cache_buckets(bufmgr_gem);
3736
3737	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
3738	bufmgr_gem->vma_max = -1; /* unlimited by default */
3739
3740	DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3741
3742exit:
3743	pthread_mutex_unlock(&bufmgr_list_mutex);
3744
3745	return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
3746}
3747