1/*
2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 * Copyright 2010 Marek Olšák <maraeo@gmail.com>
4 * Copyright 2018 Advanced Micro Devices, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
25
26#ifndef RADEON_WINSYS_H
27#define RADEON_WINSYS_H
28
29/* The public winsys interface header for the radeon driver. */
30
31/* Skip command submission. Same as RADEON_NOOP=1. */
32#define RADEON_FLUSH_NOOP                     (1u << 29)
33
34/* Toggle the secure submission boolean after the flush */
35#define RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION (1u << 30)
36
37/* Whether the next IB can start immediately and not wait for draws and
38 * dispatches from the current IB to finish. */
39#define RADEON_FLUSH_START_NEXT_GFX_IB_NOW    (1u << 31)
40
41#define RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW                                                   \
42   (PIPE_FLUSH_ASYNC | RADEON_FLUSH_START_NEXT_GFX_IB_NOW)
43
44#include "amd/common/ac_gpu_info.h"
45#include "amd/common/ac_surface.h"
46#include "pipebuffer/pb_buffer.h"
47
48/* Tiling flags. */
49enum radeon_bo_layout
50{
51   RADEON_LAYOUT_LINEAR = 0,
52   RADEON_LAYOUT_TILED,
53   RADEON_LAYOUT_SQUARETILED,
54
55   RADEON_LAYOUT_UNKNOWN
56};
57
58enum radeon_bo_domain
59{ /* bitfield */
60  RADEON_DOMAIN_GTT = 2,
61  RADEON_DOMAIN_VRAM = 4,
62  RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
63  RADEON_DOMAIN_GDS = 8,
64  RADEON_DOMAIN_OA = 16,
65};
66
67enum radeon_bo_flag
68{ /* bitfield */
69  RADEON_FLAG_GTT_WC = (1 << 0),
70  RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
71  RADEON_FLAG_NO_SUBALLOC = (1 << 2),
72  RADEON_FLAG_SPARSE = (1 << 3),
73  RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
74  RADEON_FLAG_READ_ONLY = (1 << 5),
75  RADEON_FLAG_32BIT = (1 << 6),
76  RADEON_FLAG_ENCRYPTED = (1 << 7),
77  RADEON_FLAG_GL2_BYPASS = (1 << 8), /* only gfx9 and newer */
78  RADEON_FLAG_DRIVER_INTERNAL = (1 << 9),
79   /* Discard on eviction (instead of moving the buffer to GTT).
80    * This guarantees that this buffer will never be moved to GTT.
81    */
82  RADEON_FLAG_DISCARDABLE = (1 << 10),
83  RADEON_FLAG_MALL_NOALLOC = (1 << 11), /* don't cache in the infinity cache */
84};
85
86enum radeon_map_flags
87{
88   /* Indicates that the caller will unmap the buffer.
89    *
90    * Not unmapping buffers is an important performance optimization for
91    * OpenGL (avoids kernel overhead for frequently mapped buffers).
92    */
93   RADEON_MAP_TEMPORARY = (PIPE_MAP_DRV_PRV << 0),
94};
95
96#define RADEON_SPARSE_PAGE_SIZE (64 * 1024)
97
98enum radeon_value_id
99{
100   RADEON_REQUESTED_VRAM_MEMORY,
101   RADEON_REQUESTED_GTT_MEMORY,
102   RADEON_MAPPED_VRAM,
103   RADEON_MAPPED_GTT,
104   RADEON_SLAB_WASTED_VRAM,
105   RADEON_SLAB_WASTED_GTT,
106   RADEON_BUFFER_WAIT_TIME_NS,
107   RADEON_NUM_MAPPED_BUFFERS,
108   RADEON_TIMESTAMP,
109   RADEON_NUM_GFX_IBS,
110   RADEON_NUM_SDMA_IBS,
111   RADEON_GFX_BO_LIST_COUNTER, /* number of BOs submitted in gfx IBs */
112   RADEON_GFX_IB_SIZE_COUNTER,
113   RADEON_NUM_BYTES_MOVED,
114   RADEON_NUM_EVICTIONS,
115   RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
116   RADEON_VRAM_USAGE,
117   RADEON_VRAM_VIS_USAGE,
118   RADEON_GTT_USAGE,
119   RADEON_GPU_TEMPERATURE,
120   RADEON_CURRENT_SCLK,
121   RADEON_CURRENT_MCLK,
122   RADEON_CS_THREAD_TIME,
123};
124
125enum radeon_ctx_priority
126{
127   RADEON_CTX_PRIORITY_LOW = 0,
128   RADEON_CTX_PRIORITY_MEDIUM,
129   RADEON_CTX_PRIORITY_HIGH,
130   RADEON_CTX_PRIORITY_REALTIME,
131};
132
133/* Each group of two has the same priority. */
134#define RADEON_PRIO_FENCE_TRACE (1 << 0)
135#define RADEON_PRIO_SO_FILLED_SIZE (1 << 1)
136
137#define RADEON_PRIO_QUERY (1 << 2)
138#define RADEON_PRIO_IB (1 << 3)
139
140#define RADEON_PRIO_DRAW_INDIRECT (1 << 4)
141#define RADEON_PRIO_INDEX_BUFFER (1 << 5)
142
143#define RADEON_PRIO_CP_DMA (1 << 6)
144#define RADEON_PRIO_BORDER_COLORS (1 << 7)
145
146#define RADEON_PRIO_CONST_BUFFER (1 << 8)
147#define RADEON_PRIO_DESCRIPTORS (1 << 9)
148
149#define RADEON_PRIO_SAMPLER_BUFFER (1 << 10)
150#define RADEON_PRIO_VERTEX_BUFFER (1 << 11)
151
152#define RADEON_PRIO_SHADER_RW_BUFFER (1 << 12)
153#define RADEON_PRIO_SAMPLER_TEXTURE (1 << 13)
154
155#define RADEON_PRIO_SHADER_RW_IMAGE (1 << 14)
156#define RADEON_PRIO_SAMPLER_TEXTURE_MSAA (1 << 15)
157
158#define RADEON_PRIO_COLOR_BUFFER (1 << 16)
159#define RADEON_PRIO_DEPTH_BUFFER (1 << 17)
160
161#define RADEON_PRIO_COLOR_BUFFER_MSAA (1 << 18)
162#define RADEON_PRIO_DEPTH_BUFFER_MSAA (1 << 19)
163
164#define RADEON_PRIO_SEPARATE_META (1 << 20)
165#define RADEON_PRIO_SHADER_BINARY (1 << 21) /* the hw can't hide instruction cache misses */
166
167#define RADEON_PRIO_SHADER_RINGS (1 << 22)
168#define RADEON_PRIO_SCRATCH_BUFFER (1 << 23)
169
170#define RADEON_ALL_PRIORITIES (RADEON_USAGE_READ - 1)
171
172/* Upper bits of priorities are used by usage flags. */
173#define RADEON_USAGE_READ (1 << 28)
174#define RADEON_USAGE_WRITE (1 << 29)
175#define RADEON_USAGE_READWRITE (RADEON_USAGE_READ | RADEON_USAGE_WRITE)
176
177/* The winsys ensures that the CS submission will be scheduled after
178 * previously flushed CSs referencing this BO in a conflicting way.
179 */
180#define RADEON_USAGE_SYNCHRONIZED (1 << 30)
181
182/* When used, an implicit sync is done to make sure a compute shader
183 * will read the written values from a previous draw.
184 */
185#define RADEON_USAGE_NEEDS_IMPLICIT_SYNC (1u << 31)
186
187struct winsys_handle;
188struct radeon_winsys_ctx;
189
190struct radeon_cmdbuf_chunk {
191   unsigned cdw;    /* Number of used dwords. */
192   unsigned max_dw; /* Maximum number of dwords. */
193   uint32_t *buf;   /* The base pointer of the chunk. */
194};
195
196struct radeon_cmdbuf {
197   struct radeon_cmdbuf_chunk current;
198   struct radeon_cmdbuf_chunk *prev;
199   uint16_t num_prev; /* Number of previous chunks. */
200   uint16_t max_prev; /* Space in array pointed to by prev. */
201   unsigned prev_dw;  /* Total number of dwords in previous chunks. */
202
203   /* Memory usage of the buffer list. These are always 0 for preamble IBs. */
204   uint32_t used_vram_kb;
205   uint32_t used_gart_kb;
206   uint64_t gpu_address;
207
208   /* Private winsys data. */
209   void *priv;
210   void *csc; /* amdgpu_cs_context */
211};
212
213/* Tiling info for display code, DRI sharing, and other data. */
214struct radeon_bo_metadata {
215   /* Tiling flags describing the texture layout for display code
216    * and DRI sharing.
217    */
218   union {
219      struct {
220         enum radeon_bo_layout microtile;
221         enum radeon_bo_layout macrotile;
222         unsigned pipe_config;
223         unsigned bankw;
224         unsigned bankh;
225         unsigned tile_split;
226         unsigned mtilea;
227         unsigned num_banks;
228         unsigned stride;
229         bool scanout;
230      } legacy;
231   } u;
232
233   enum radeon_surf_mode mode;   /* Output from buffer_get_metadata */
234
235   /* Additional metadata associated with the buffer, in bytes.
236    * The maximum size is 64 * 4. This is opaque for the winsys & kernel.
237    * Supported by amdgpu only.
238    */
239   uint32_t size_metadata;
240   uint32_t metadata[64];
241};
242
243enum radeon_feature_id
244{
245   RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */
246   RADEON_FID_R300_CMASK_ACCESS,
247};
248
249struct radeon_bo_list_item {
250   uint64_t bo_size;
251   uint64_t vm_address;
252   uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
253};
254
255struct radeon_winsys {
256   /**
257    * The screen object this winsys was created for
258    */
259   struct pipe_screen *screen;
260   /**
261    * Has the application created at least one TMZ buffer.
262    */
263   const bool uses_secure_bos;
264
265   /**
266    * Decrement the winsys reference count.
267    *
268    * \param ws  The winsys this function is called for.
269    * \return    True if the winsys and screen should be destroyed.
270    */
271   bool (*unref)(struct radeon_winsys *ws);
272
273   /**
274    * Destroy this winsys.
275    *
276    * \param ws        The winsys this function is called from.
277    */
278   void (*destroy)(struct radeon_winsys *ws);
279
280   /**
281    * Query an info structure from winsys.
282    *
283    * \param ws        The winsys this function is called from.
284    * \param info      Return structure
285    */
286   void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info,
287                      bool enable_smart_access_memory,
288                      bool disable_smart_access_memory);
289
290   /**
291    * A hint for the winsys that it should pin its execution threads to
292    * a group of cores sharing a specific L3 cache if the CPU has multiple
293    * L3 caches. This is needed for good multithreading performance on
294    * AMD Zen CPUs.
295    */
296   void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache);
297
298   /**************************************************************************
299    * Buffer management. Buffer attributes are mostly fixed over its lifetime.
300    *
301    * Remember that gallium gets to choose the interface it needs, and the
302    * window systems must then implement that interface (rather than the
303    * other way around...).
304    *************************************************************************/
305
306   /**
307    * Create a buffer object.
308    *
309    * \param ws        The winsys this function is called from.
310    * \param size      The size to allocate.
311    * \param alignment An alignment of the buffer in memory.
312    * \param use_reusable_pool Whether the cache buffer manager should be used.
313    * \param domain    A bitmask of the RADEON_DOMAIN_* flags.
314    * \return          The created buffer object.
315    */
316   struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment,
317                                      enum radeon_bo_domain domain, enum radeon_bo_flag flags);
318
319   /**
320    * Map the entire data store of a buffer object into the client's address
321    * space.
322    *
323    * Callers are expected to unmap buffers again if and only if the
324    * RADEON_MAP_TEMPORARY flag is set in \p usage.
325    *
326    * \param buf       A winsys buffer object to map.
327    * \param cs        A command stream to flush if the buffer is referenced by it.
328    * \param usage     A bitmask of the PIPE_MAP_* and RADEON_MAP_* flags.
329    * \return          The pointer at the beginning of the buffer.
330    */
331   void *(*buffer_map)(struct radeon_winsys *ws, struct pb_buffer *buf,
332                       struct radeon_cmdbuf *cs, enum pipe_map_flags usage);
333
334   /**
335    * Unmap a buffer object from the client's address space.
336    *
337    * \param buf       A winsys buffer object to unmap.
338    */
339   void (*buffer_unmap)(struct radeon_winsys *ws, struct pb_buffer *buf);
340
341   /**
342    * Wait for the buffer and return true if the buffer is not used
343    * by the device.
344    *
345    * The timeout of 0 will only return the status.
346    * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the buffer
347    * is idle.
348    */
349   bool (*buffer_wait)(struct radeon_winsys *ws, struct pb_buffer *buf,
350                       uint64_t timeout, unsigned usage);
351
352   /**
353    * Return buffer metadata.
354    * (tiling info for display code, DRI sharing, and other data)
355    *
356    * \param buf       A winsys buffer object to get the flags from.
357    * \param md        Metadata
358    */
359   void (*buffer_get_metadata)(struct radeon_winsys *ws, struct pb_buffer *buf,
360                               struct radeon_bo_metadata *md, struct radeon_surf *surf);
361
362   /**
363    * Set buffer metadata.
364    * (tiling info for display code, DRI sharing, and other data)
365    *
366    * \param buf       A winsys buffer object to set the flags for.
367    * \param md        Metadata
368    */
369   void (*buffer_set_metadata)(struct radeon_winsys *ws, struct pb_buffer *buf,
370                               struct radeon_bo_metadata *md, struct radeon_surf *surf);
371
372   /**
373    * Get a winsys buffer from a winsys handle. The internal structure
374    * of the handle is platform-specific and only a winsys should access it.
375    *
376    * \param ws        The winsys this function is called from.
377    * \param whandle   A winsys handle pointer as was received from a state
378    *                  tracker.
379    */
380   struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws, struct winsys_handle *whandle,
381                                           unsigned vm_alignment, bool is_prime_linear_buffer);
382
383   /**
384    * Get a winsys buffer from a user pointer. The resulting buffer can't
385    * be exported. Both pointer and size must be page aligned.
386    *
387    * \param ws        The winsys this function is called from.
388    * \param pointer   User pointer to turn into a buffer object.
389    * \param Size      Size in bytes for the new buffer.
390    */
391   struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer, uint64_t size, enum radeon_bo_flag flags);
392
393   /**
394    * Whether the buffer was created from a user pointer.
395    *
396    * \param buf       A winsys buffer object
397    * \return          whether \p buf was created via buffer_from_ptr
398    */
399   bool (*buffer_is_user_ptr)(struct pb_buffer *buf);
400
401   /** Whether the buffer was suballocated. */
402   bool (*buffer_is_suballocated)(struct pb_buffer *buf);
403
404   /**
405    * Get a winsys handle from a winsys buffer. The internal structure
406    * of the handle is platform-specific and only a winsys should access it.
407    *
408    * \param ws        The winsys instance for which the handle is to be valid
409    * \param buf       A winsys buffer object to get the handle from.
410    * \param whandle   A winsys handle pointer.
411    * \return          true on success.
412    */
413   bool (*buffer_get_handle)(struct radeon_winsys *ws, struct pb_buffer *buf,
414                             struct winsys_handle *whandle);
415
416   /**
417    * Change the commitment of a (64KB-page aligned) region of the given
418    * sparse buffer.
419    *
420    * \warning There is no automatic synchronization with command submission.
421    *
422    * \note Only implemented by the amdgpu winsys.
423    *
424    * \return false on out of memory or other failure, true on success.
425    */
426   bool (*buffer_commit)(struct radeon_winsys *ws, struct pb_buffer *buf,
427                         uint64_t offset, uint64_t size, bool commit);
428
429   /**
430    * Return the virtual address of a buffer.
431    *
432    * When virtual memory is not in use, this is the offset relative to the
433    * relocation base (non-zero for sub-allocated buffers).
434    *
435    * \param buf       A winsys buffer object
436    * \return          virtual address
437    */
438   uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf);
439
440   /**
441    * Return the offset of this buffer relative to the relocation base.
442    * This is only non-zero for sub-allocated buffers.
443    *
444    * This is only supported in the radeon winsys, since amdgpu uses virtual
445    * addresses in submissions even for the video engines.
446    *
447    * \param buf      A winsys buffer object
448    * \return         the offset for relocations
449    */
450   unsigned (*buffer_get_reloc_offset)(struct pb_buffer *buf);
451
452   /**
453    * Query the initial placement of the buffer from the kernel driver.
454    */
455   enum radeon_bo_domain (*buffer_get_initial_domain)(struct pb_buffer *buf);
456
457   /**
458    * Query the flags used for creation of this buffer.
459    *
460    * Note that for imported buffer this may be lossy since not all flags
461    * are passed 1:1.
462    */
463   enum radeon_bo_flag (*buffer_get_flags)(struct pb_buffer *buf);
464
465   /**************************************************************************
466    * Command submission.
467    *
468    * Each pipe context should create its own command stream and submit
469    * commands independently of other contexts.
470    *************************************************************************/
471
472   /**
473    * Create a command submission context.
474    * Various command streams can be submitted to the same context.
475    */
476   struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws,
477                                           enum radeon_ctx_priority priority);
478
479   /**
480    * Destroy a context.
481    */
482   void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
483
484   /**
485    * Query a GPU reset status.
486    */
487   enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx,
488                                                    bool full_reset_only,
489                                                    bool *needs_reset);
490
491   /**
492    * Create a command stream.
493    *
494    * \param cs        The returned structure that is initialized by cs_create.
495    * \param ctx       The submission context
496    * \param ip_type   The IP type (GFX, DMA, UVD)
497    * \param flush     Flush callback function associated with the command stream.
498    * \param user      User pointer that will be passed to the flush callback.
499    *
500    * \return true on success
501    */
502   bool (*cs_create)(struct radeon_cmdbuf *cs,
503                     struct radeon_winsys_ctx *ctx, enum amd_ip_type amd_ip_type,
504                     void (*flush)(void *ctx, unsigned flags,
505                                   struct pipe_fence_handle **fence),
506                     void *flush_ctx, bool stop_exec_on_failure);
507
508   /**
509    * Set or change the CS preamble, which is a sequence of packets that is executed before
510    * the command buffer. If the winsys doesn't support preambles, the packets are inserted
511    * into the command buffer.
512    *
513    * \param cs               Command stream
514    * \param preamble_ib      Preamble IB for the context.
515    * \param preamble_num_dw  Number of dwords in the preamble IB.
516    * \param preamble_changed Whether the preamble changed or is the same as the last one.
517    */
518   void (*cs_set_preamble)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
519                           unsigned preamble_num_dw, bool preamble_changed);
520
521   /**
522    * Set up and enable mid command buffer preemption for the command stream.
523    *
524    * \param cs               Command stream
525    * \param preamble_ib      Non-preemptible preamble IB for the context.
526    * \param preamble_num_dw  Number of dwords in the preamble IB.
527    */
528   bool (*cs_setup_preemption)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
529                               unsigned preamble_num_dw);
530
531   /**
532    * Destroy a command stream.
533    *
534    * \param cs        A command stream to destroy.
535    */
536   void (*cs_destroy)(struct radeon_cmdbuf *cs);
537
538   /**
539    * Add a buffer. Each buffer used by a CS must be added using this function.
540    *
541    * \param cs      Command stream
542    * \param buf     Buffer
543    * \param usage   Usage
544    * \param domain  Bitmask of the RADEON_DOMAIN_* flags.
545    * \return Buffer index.
546    */
547   unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf,
548                             unsigned usage, enum radeon_bo_domain domain);
549
550   /**
551    * Return the index of an already-added buffer.
552    *
553    * Not supported on amdgpu. Drivers with GPUVM should not care about
554    * buffer indices.
555    *
556    * \param cs        Command stream
557    * \param buf       Buffer
558    * \return          The buffer index, or -1 if the buffer has not been added.
559    */
560   int (*cs_lookup_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf);
561
562   /**
563    * Return true if there is enough memory in VRAM and GTT for the buffers
564    * added so far. If the validation fails, all buffers which have
565    * been added since the last call of cs_validate will be removed and
566    * the CS will be flushed (provided there are still any buffers).
567    *
568    * \param cs        A command stream to validate.
569    */
570   bool (*cs_validate)(struct radeon_cmdbuf *cs);
571
572   /**
573    * Check whether the given number of dwords is available in the IB.
574    * Optionally chain a new chunk of the IB if necessary and supported.
575    *
576    * \param cs        A command stream.
577    * \param dw        Number of CS dwords requested by the caller.
578    * \return true if there is enough space
579    */
580   bool (*cs_check_space)(struct radeon_cmdbuf *cs, unsigned dw);
581
582   /**
583    * Return the buffer list.
584    *
585    * This is the buffer list as passed to the kernel, i.e. it only contains
586    * the parent buffers of sub-allocated buffers.
587    *
588    * \param cs    Command stream
589    * \param list  Returned buffer list. Set to NULL to query the count only.
590    * \return      The buffer count.
591    */
592   unsigned (*cs_get_buffer_list)(struct radeon_cmdbuf *cs, struct radeon_bo_list_item *list);
593
594   /**
595    * Flush a command stream.
596    *
597    * \param cs          A command stream to flush.
598    * \param flags,      PIPE_FLUSH_* flags.
599    * \param fence       Pointer to a fence. If non-NULL, a fence is inserted
600    *                    after the CS and is returned through this parameter.
601    * \return Negative POSIX error code or 0 for success.
602    *         Asynchronous submissions never return an error.
603    */
604   int (*cs_flush)(struct radeon_cmdbuf *cs, unsigned flags, struct pipe_fence_handle **fence);
605
606   /**
607    * Create a fence before the CS is flushed.
608    * The user must flush manually to complete the initializaton of the fence.
609    *
610    * The fence must not be used for anything except \ref cs_add_fence_dependency
611    * before the flush.
612    */
613   struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_cmdbuf *cs);
614
615   /**
616    * Return true if a buffer is referenced by a command stream.
617    *
618    * \param cs        A command stream.
619    * \param buf       A winsys buffer.
620    */
621   bool (*cs_is_buffer_referenced)(struct radeon_cmdbuf *cs, struct pb_buffer *buf,
622                                   unsigned usage);
623
624   /**
625    * Request access to a feature for a command stream.
626    *
627    * \param cs        A command stream.
628    * \param fid       Feature ID, one of RADEON_FID_*
629    * \param enable    Whether to enable or disable the feature.
630    */
631   bool (*cs_request_feature)(struct radeon_cmdbuf *cs, enum radeon_feature_id fid, bool enable);
632   /**
633    * Make sure all asynchronous flush of the cs have completed
634    *
635    * \param cs        A command stream.
636    */
637   void (*cs_sync_flush)(struct radeon_cmdbuf *cs);
638
639   /**
640    * Add a fence dependency to the CS, so that the CS will wait for
641    * the fence before execution.
642    *
643    * \param dependency_flags  Bitmask of RADEON_DEPENDENCY_*
644    */
645   void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence,
646                                   unsigned dependency_flags);
647
648   /**
649    * Signal a syncobj when the CS finishes execution.
650    */
651   void (*cs_add_syncobj_signal)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence);
652
653   /**
654    * Wait for the fence and return true if the fence has been signalled.
655    * The timeout of 0 will only return the status.
656    * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence
657    * is signalled.
658    */
659   bool (*fence_wait)(struct radeon_winsys *ws, struct pipe_fence_handle *fence, uint64_t timeout);
660
661   /**
662    * Reference counting for fences.
663    */
664   void (*fence_reference)(struct pipe_fence_handle **dst, struct pipe_fence_handle *src);
665
666   /**
667    * Create a new fence object corresponding to the given syncobj fd.
668    */
669   struct pipe_fence_handle *(*fence_import_syncobj)(struct radeon_winsys *ws, int fd);
670
671   /**
672    * Create a new fence object corresponding to the given sync_file.
673    */
674   struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws, int fd);
675
676   /**
677    * Return a sync_file FD corresponding to the given fence object.
678    */
679   int (*fence_export_sync_file)(struct radeon_winsys *ws, struct pipe_fence_handle *fence);
680
681   /**
682    * Return a sync file FD that is already signalled.
683    */
684   int (*export_signalled_sync_file)(struct radeon_winsys *ws);
685
686   /**
687    * Initialize surface
688    *
689    * \param ws        The winsys this function is called from.
690    * \param tex       Input texture description
691    * \param flags     Bitmask of RADEON_SURF_* flags
692    * \param bpe       Bytes per pixel, it can be different for Z buffers.
693    * \param mode      Preferred tile mode. (linear, 1D, or 2D)
694    * \param surf      Output structure
695    */
696   int (*surface_init)(struct radeon_winsys *ws, const struct pipe_resource *tex, uint64_t flags,
697                       unsigned bpe, enum radeon_surf_mode mode, struct radeon_surf *surf);
698
699   uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value);
700
701   bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers,
702                          uint32_t *out);
703
704   /**
705    * Secure context
706    */
707   bool (*cs_is_secure)(struct radeon_cmdbuf *cs);
708};
709
710static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw)
711{
712   return cs && (cs->prev_dw + cs->current.cdw > num_dw);
713}
714
715static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value)
716{
717   cs->current.buf[cs->current.cdw++] = value;
718}
719
720static inline void radeon_emit_array(struct radeon_cmdbuf *cs, const uint32_t *values,
721                                     unsigned count)
722{
723   memcpy(cs->current.buf + cs->current.cdw, values, count * 4);
724   cs->current.cdw += count;
725}
726
727static inline bool radeon_uses_secure_bos(struct radeon_winsys* ws)
728{
729  return ws->uses_secure_bos;
730}
731
732static inline void
733radeon_bo_reference(struct radeon_winsys *rws, struct pb_buffer **dst, struct pb_buffer *src)
734{
735   pb_reference_with_winsys(rws, dst, src);
736}
737
738/* The following bits describe the heaps managed by slab allocators (pb_slab) and
739 * the allocation cache (pb_cache).
740 */
741#define RADEON_HEAP_BIT_VRAM           (1 << 0) /* if false, it's GTT */
742#define RADEON_HEAP_BIT_READ_ONLY      (1 << 1) /* both VRAM and GTT */
743#define RADEON_HEAP_BIT_32BIT          (1 << 2) /* both VRAM and GTT */
744#define RADEON_HEAP_BIT_ENCRYPTED      (1 << 3) /* both VRAM and GTT */
745
746#define RADEON_HEAP_BIT_NO_CPU_ACCESS  (1 << 4) /* VRAM only */
747#define RADEON_HEAP_BIT_MALL_NOALLOC   (1 << 5) /* VRAM only */
748
749#define RADEON_HEAP_BIT_WC             (1 << 4) /* GTT only, VRAM implies this to be true */
750#define RADEON_HEAP_BIT_GL2_BYPASS     (1 << 5) /* GTT only */
751
752/* The number of all possible heap descriptions using the bits above. */
753#define RADEON_NUM_HEAPS               (1 << 6)
754
755static inline enum radeon_bo_domain radeon_domain_from_heap(int heap)
756{
757   assert(heap >= 0);
758
759   if (heap & RADEON_HEAP_BIT_VRAM)
760      return RADEON_DOMAIN_VRAM;
761   else
762      return RADEON_DOMAIN_GTT;
763}
764
765static inline unsigned radeon_flags_from_heap(int heap)
766{
767   assert(heap >= 0);
768
769   unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING;
770
771   if (heap & RADEON_HEAP_BIT_READ_ONLY)
772      flags |= RADEON_FLAG_READ_ONLY;
773   if (heap & RADEON_HEAP_BIT_32BIT)
774      flags |= RADEON_FLAG_32BIT;
775   if (heap & RADEON_HEAP_BIT_ENCRYPTED)
776      flags |= RADEON_FLAG_ENCRYPTED;
777
778   if (heap & RADEON_HEAP_BIT_VRAM) {
779      flags |= RADEON_FLAG_GTT_WC;
780      if (heap & RADEON_HEAP_BIT_NO_CPU_ACCESS)
781         flags |= RADEON_FLAG_NO_CPU_ACCESS;
782      if (heap & RADEON_HEAP_BIT_MALL_NOALLOC)
783         flags |= RADEON_FLAG_MALL_NOALLOC;
784   } else {
785      /* GTT only */
786      if (heap & RADEON_HEAP_BIT_WC)
787         flags |= RADEON_FLAG_GTT_WC;
788      if (heap & RADEON_HEAP_BIT_GL2_BYPASS)
789         flags |= RADEON_FLAG_GL2_BYPASS;
790   }
791
792   return flags;
793}
794
795/* This cleans up flags, so that we can comfortably assume that no invalid flag combinations
796 * are set.
797 */
798static void radeon_canonicalize_bo_flags(enum radeon_bo_domain *_domain,
799                                         enum radeon_bo_flag *_flags)
800{
801   unsigned domain = *_domain;
802   unsigned flags = *_flags;
803
804   /* Only set 1 domain, e.g. ignore GTT if VRAM is set. */
805   if (domain)
806      domain = BITFIELD_BIT(ffs(domain) - 1);
807   else
808      domain = RADEON_DOMAIN_VRAM;
809
810   switch (domain) {
811   case RADEON_DOMAIN_VRAM:
812      flags |= RADEON_FLAG_GTT_WC;
813      flags &= ~RADEON_FLAG_GL2_BYPASS;
814      break;
815   case RADEON_DOMAIN_GTT:
816      flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
817      flags &= ~RADEON_FLAG_MALL_NOALLOC;
818      break;
819   case RADEON_DOMAIN_GDS:
820   case RADEON_DOMAIN_OA:
821      flags |= RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_NO_CPU_ACCESS;
822      flags &= ~RADEON_FLAG_SPARSE;
823      break;
824   }
825
826   /* Sparse buffers must have NO_CPU_ACCESS set. */
827   if (flags & RADEON_FLAG_SPARSE)
828      flags |= RADEON_FLAG_NO_CPU_ACCESS;
829
830   *_domain = (enum radeon_bo_domain)domain;
831   *_flags = (enum radeon_bo_flag)flags;
832}
833
834/* Return the heap index for winsys allocators, or -1 on failure. */
835static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags)
836{
837   radeon_canonicalize_bo_flags(&domain, &flags);
838
839   /* Resources with interprocess sharing don't use any winsys allocators. */
840   if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING))
841      return -1;
842
843   /* These are unsupported flags. */
844   /* RADEON_FLAG_DRIVER_INTERNAL is ignored. It doesn't affect allocators. */
845   if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE |
846                RADEON_FLAG_DISCARDABLE))
847      return -1;
848
849   int heap = 0;
850
851   if (flags & RADEON_FLAG_READ_ONLY)
852      heap |= RADEON_HEAP_BIT_READ_ONLY;
853   if (flags & RADEON_FLAG_32BIT)
854      heap |= RADEON_HEAP_BIT_32BIT;
855   if (flags & RADEON_FLAG_ENCRYPTED)
856      heap |= RADEON_HEAP_BIT_ENCRYPTED;
857
858   if (domain == RADEON_DOMAIN_VRAM) {
859      /* VRAM | GTT shouldn't occur, but if it does, ignore GTT. */
860      heap |= RADEON_HEAP_BIT_VRAM;
861      if (flags & RADEON_FLAG_NO_CPU_ACCESS)
862         heap |= RADEON_HEAP_BIT_NO_CPU_ACCESS;
863      if (flags & RADEON_FLAG_MALL_NOALLOC)
864         heap |= RADEON_HEAP_BIT_MALL_NOALLOC;
865      /* RADEON_FLAG_WC is ignored and implied to be true for VRAM */
866      /* RADEON_FLAG_GL2_BYPASS is ignored and implied to be false for VRAM */
867   } else if (domain == RADEON_DOMAIN_GTT) {
868      /* GTT is implied by RADEON_HEAP_BIT_VRAM not being set. */
869      if (flags & RADEON_FLAG_GTT_WC)
870         heap |= RADEON_HEAP_BIT_WC;
871      if (flags & RADEON_FLAG_GL2_BYPASS)
872         heap |= RADEON_HEAP_BIT_GL2_BYPASS;
873      /* RADEON_FLAG_NO_CPU_ACCESS is ignored and implied to be false for GTT */
874      /* RADEON_FLAG_MALL_NOALLOC is ignored and implied to be false for GTT */
875   } else {
876      return -1; /*  */
877   }
878
879   assert(heap < RADEON_NUM_HEAPS);
880   return heap;
881}
882
883typedef struct pipe_screen *(*radeon_screen_create_t)(struct radeon_winsys *,
884                                                      const struct pipe_screen_config *);
885
886/* These functions create the radeon_winsys instance for the corresponding kernel driver. */
887struct radeon_winsys *
888amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
889		     radeon_screen_create_t screen_create);
890struct radeon_winsys *
891radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
892			 radeon_screen_create_t screen_create);
893
894#endif
895