1/* 2 * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#ifndef FREEDRENO_RINGBUFFER_H_ 28#define FREEDRENO_RINGBUFFER_H_ 29 30#include <stdio.h> 31#include "util/u_atomic.h" 32#include "util/u_debug.h" 33#include "util/u_queue.h" 34 35#include "adreno_common.xml.h" 36#include "adreno_pm4.xml.h" 37#include "freedreno_drmif.h" 38#include "freedreno_pm4.h" 39 40#ifdef __cplusplus 41extern "C" { 42#endif 43 44struct fd_submit; 45struct fd_ringbuffer; 46 47enum fd_ringbuffer_flags { 48 49 /* Primary ringbuffer for a submit, ie. an IB1 level rb 50 * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH 51 * packets. 52 */ 53 FD_RINGBUFFER_PRIMARY = 0x1, 54 55 /* Hint that the stateobj will be used for streaming state 56 * that is used once or a few times and then discarded. 57 * 58 * For sub-allocation, non streaming stateobj's should be 59 * sub-allocated from a page size buffer, so one long lived 60 * state obj doesn't prevent other pages from being freed. 61 * (Ie. it would be no worse than allocating a page sized 62 * bo for each small non-streaming stateobj). 63 * 64 * But streaming stateobj's could be sub-allocated from a 65 * larger buffer to reduce the alloc/del overhead. 66 */ 67 FD_RINGBUFFER_STREAMING = 0x2, 68 69 /* Indicates that "growable" cmdstream can be used, 70 * consisting of multiple physical cmdstream buffers 71 */ 72 FD_RINGBUFFER_GROWABLE = 0x4, 73 74 /* Internal use only: */ 75 _FD_RINGBUFFER_OBJECT = 0x8, 76}; 77 78/* A submit object manages/tracks all the state buildup for a "submit" 79 * ioctl to the kernel. Additionally, with the exception of long-lived 80 * non-STREAMING stateobj rb's, rb's are allocated from the submit. 81 */ 82struct fd_submit *fd_submit_new(struct fd_pipe *pipe); 83 84/* NOTE: all ringbuffer's create from the submit should be unref'd 85 * before destroying the submit. 86 */ 87void fd_submit_del(struct fd_submit *submit); 88 89struct fd_submit * fd_submit_ref(struct fd_submit *submit); 90 91/* Allocate a new rb from the submit. */ 92struct fd_ringbuffer *fd_submit_new_ringbuffer(struct fd_submit *submit, 93 uint32_t size, 94 enum fd_ringbuffer_flags flags); 95 96/** 97 * Encapsulates submit out-fence(s), which consist of a 'timestamp' (per- 98 * pipe (submitqueue) sequence number) and optionally, if requested, an 99 * out-fence-fd 100 */ 101struct fd_submit_fence { 102 /** 103 * The ready fence is signaled once the submit is actually flushed down 104 * to the kernel, and fence/fence_fd are populated. You must wait for 105 * this fence to be signaled before reading fence/fence_fd. 106 */ 107 struct util_queue_fence ready; 108 109 struct fd_fence fence; 110 111 /** 112 * Optional dma_fence fd, returned by submit if use_fence_fd is true 113 */ 114 int fence_fd; 115 bool use_fence_fd; 116}; 117 118/* in_fence_fd: -1 for no in-fence, else fence fd 119 * out_fence can be NULL if no output fence is required 120 */ 121int fd_submit_flush(struct fd_submit *submit, int in_fence_fd, 122 struct fd_submit_fence *out_fence); 123 124struct fd_ringbuffer; 125struct fd_reloc; 126 127struct fd_ringbuffer_funcs { 128 void (*grow)(struct fd_ringbuffer *ring, uint32_t size); 129 void (*emit_reloc)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc); 130 uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring, 131 struct fd_ringbuffer *target, uint32_t cmd_idx); 132 uint32_t (*cmd_count)(struct fd_ringbuffer *ring); 133 bool (*check_size)(struct fd_ringbuffer *ring); 134 void (*destroy)(struct fd_ringbuffer *ring); 135}; 136 137/* the ringbuffer object is not opaque so that OUT_RING() type stuff 138 * can be inlined. Note that users should not make assumptions about 139 * the size of this struct. 140 */ 141struct fd_ringbuffer { 142 uint32_t *cur, *end, *start; 143 const struct fd_ringbuffer_funcs *funcs; 144 145 // size or end coudl probably go away 146 int size; 147 int32_t refcnt; 148 enum fd_ringbuffer_flags flags; 149}; 150 151/* Allocate a new long-lived state object, not associated with 152 * a submit: 153 */ 154struct fd_ringbuffer *fd_ringbuffer_new_object(struct fd_pipe *pipe, 155 uint32_t size); 156 157static inline void 158fd_ringbuffer_del(struct fd_ringbuffer *ring) 159{ 160 if (!p_atomic_dec_zero(&ring->refcnt)) 161 return; 162 163 ring->funcs->destroy(ring); 164} 165 166static inline struct fd_ringbuffer * 167fd_ringbuffer_ref(struct fd_ringbuffer *ring) 168{ 169 p_atomic_inc(&ring->refcnt); 170 return ring; 171} 172 173static inline void 174fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords) 175{ 176 assert(ring->funcs->grow); /* unsupported on kgsl */ 177 178 /* there is an upper bound on IB size, which appears to be 0x0fffff */ 179 ring->size = MIN2(ring->size << 1, 0x0fffff); 180 181 ring->funcs->grow(ring, ring->size); 182} 183 184static inline bool 185fd_ringbuffer_check_size(struct fd_ringbuffer *ring) 186{ 187 return ring->funcs->check_size(ring); 188} 189 190static inline void 191fd_ringbuffer_emit(struct fd_ringbuffer *ring, uint32_t data) 192{ 193 (*ring->cur++) = data; 194} 195 196struct fd_reloc { 197 struct fd_bo *bo; 198 uint64_t iova; 199 uint64_t orval; 200#define FD_RELOC_READ 0x0001 201#define FD_RELOC_WRITE 0x0002 202#define FD_RELOC_DUMP 0x0004 203 uint32_t offset; 204 int32_t shift; 205}; 206 207/* We always mark BOs for write, instead of tracking it across reloc 208 * sources in userspace. On the kernel side, this means we track a single 209 * excl fence in the BO instead of a set of read fences, which is cheaper. 210 * The downside is that a dmabuf-shared device won't be able to read in 211 * parallel with a read-only access by freedreno, but most other drivers 212 * have decided that that usecase isn't important enough to do this 213 * tracking, as well. 214 */ 215#define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE) 216 217/* NOTE: relocs are 2 dwords on a5xx+ */ 218 219static inline void 220fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc) 221{ 222 ring->funcs->emit_reloc(ring, reloc); 223} 224 225static inline uint32_t 226fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring) 227{ 228 if (!ring->funcs->cmd_count) 229 return 1; 230 return ring->funcs->cmd_count(ring); 231} 232 233static inline uint32_t 234fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring, 235 struct fd_ringbuffer *target, 236 uint32_t cmd_idx) 237{ 238 return ring->funcs->emit_reloc_ring(ring, target, cmd_idx); 239} 240 241static inline uint32_t 242offset_bytes(void *end, void *start) 243{ 244 return ((char *)end) - ((char *)start); 245} 246 247static inline uint32_t 248fd_ringbuffer_size(struct fd_ringbuffer *ring) 249{ 250 /* only really needed for stateobj ringbuffers, and won't really 251 * do what you expect for growable rb's.. so lets just restrict 252 * this to stateobj's for now: 253 */ 254 assert(!(ring->flags & FD_RINGBUFFER_GROWABLE)); 255 return offset_bytes(ring->cur, ring->start); 256} 257 258static inline bool 259fd_ringbuffer_empty(struct fd_ringbuffer *ring) 260{ 261 return (fd_ringbuffer_cmd_count(ring) == 1) && 262 (offset_bytes(ring->cur, ring->start) == 0); 263} 264 265#define LOG_DWORDS 0 266 267static inline void 268OUT_RING(struct fd_ringbuffer *ring, uint32_t data) 269{ 270 if (LOG_DWORDS) { 271 fprintf(stderr, "ring[%p]: OUT_RING %04x: %08x", ring, 272 (uint32_t)(ring->cur - ring->start), data); 273 } 274 fd_ringbuffer_emit(ring, data); 275} 276 277/* 278 * NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+ 279 */ 280static inline void 281OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset, 282 uint64_t orval, int32_t shift) 283{ 284 if (LOG_DWORDS) { 285 fprintf(stderr, "ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, 286 (uint32_t)(ring->cur - ring->start), bo, offset, shift); 287 } 288 assert(offset < fd_bo_size(bo)); 289 290 uint64_t iova = fd_bo_get_iova(bo) + offset; 291 292 if (shift < 0) 293 iova >>= -shift; 294 else 295 iova <<= shift; 296 297 iova |= orval; 298 299 struct fd_reloc reloc = { 300 .bo = bo, 301 .iova = iova, 302 .orval = orval, 303 .offset = offset, 304 .shift = shift, 305 }; 306 307 fd_ringbuffer_reloc(ring, &reloc); 308} 309 310static inline void 311OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) 312{ 313 fd_ringbuffer_emit_reloc_ring_full(ring, target, 0); 314} 315 316static inline void 317BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) 318{ 319 if (unlikely(ring->cur + ndwords > ring->end)) 320 fd_ringbuffer_grow(ring, ndwords); 321} 322 323static inline void 324OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) 325{ 326 BEGIN_RING(ring, cnt + 1); 327 OUT_RING(ring, pm4_pkt0_hdr(regindx, cnt)); 328} 329 330static inline void 331OUT_PKT2(struct fd_ringbuffer *ring) 332{ 333 BEGIN_RING(ring, 1); 334 OUT_RING(ring, CP_TYPE2_PKT); 335} 336 337static inline void 338OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) 339{ 340 BEGIN_RING(ring, cnt + 1); 341 OUT_RING(ring, CP_TYPE3_PKT | ((cnt - 1) << 16) | ((opcode & 0xFF) << 8)); 342} 343 344/* 345 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3 346 */ 347 348static inline void 349OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) 350{ 351 BEGIN_RING(ring, cnt + 1); 352 OUT_RING(ring, pm4_pkt4_hdr(regindx, cnt)); 353} 354 355static inline void 356OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) 357{ 358 BEGIN_RING(ring, cnt + 1); 359 OUT_RING(ring, pm4_pkt7_hdr(opcode, cnt)); 360} 361 362static inline void 363OUT_WFI(struct fd_ringbuffer *ring) 364{ 365 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); 366 OUT_RING(ring, 0x00000000); 367} 368 369static inline void 370OUT_WFI5(struct fd_ringbuffer *ring) 371{ 372 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); 373} 374 375#ifdef __cplusplus 376} /* end of extern "C" */ 377#endif 378 379#endif /* FREEDRENO_RINGBUFFER_H_ */ 380