1/* 2 * Copyright 2021 Alyssa Rosenzweig 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23# 24#include <stdint.h> 25#include "agx_state.h" 26#include "magic.h" 27 28/* The structures managed in this file appear to be software defined (either in 29 * the macOS kernel driver or in the AGX firmware) */ 30 31/* Odd pattern */ 32static uint64_t 33demo_unk6(struct agx_pool *pool) 34{ 35 struct agx_ptr ptr = agx_pool_alloc_aligned(pool, 0x4000 * sizeof(uint64_t), 64); 36 uint64_t *buf = ptr.cpu; 37 memset(buf, 0, sizeof(*buf)); 38 39 for (unsigned i = 1; i < 0x3ff; ++i) 40 buf[i] = (i + 1); 41 42 return ptr.gpu; 43} 44 45static uint64_t 46demo_zero(struct agx_pool *pool, unsigned count) 47{ 48 struct agx_ptr ptr = agx_pool_alloc_aligned(pool, count, 64); 49 memset(ptr.cpu, 0, count); 50 return ptr.gpu; 51} 52 53static size_t 54asahi_size_resource(struct pipe_resource *prsrc, unsigned level) 55{ 56 struct agx_resource *rsrc = agx_resource(prsrc); 57 size_t size = rsrc->slices[level].size; 58 59 if (rsrc->separate_stencil) 60 size += asahi_size_resource(&rsrc->separate_stencil->base, level); 61 62 return size; 63} 64 65static size_t 66asahi_size_surface(struct pipe_surface *surf) 67{ 68 return asahi_size_resource(surf->texture, surf->u.tex.level); 69} 70 71static size_t 72asahi_size_attachments(struct pipe_framebuffer_state *framebuffer) 73{ 74 size_t sum = 0; 75 76 for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) 77 sum += asahi_size_surface(framebuffer->cbufs[i]); 78 79 if (framebuffer->zsbuf) 80 sum += asahi_size_surface(framebuffer->zsbuf); 81 82 return sum; 83} 84 85static enum agx_iogpu_attachment_type 86asahi_classify_attachment(enum pipe_format format) 87{ 88 const struct util_format_description *desc = util_format_description(format); 89 90 if (util_format_has_depth(desc)) 91 return AGX_IOGPU_ATTACHMENT_TYPE_DEPTH; 92 else if (util_format_has_stencil(desc)) 93 return AGX_IOGPU_ATTACHMENT_TYPE_STENCIL; 94 else 95 return AGX_IOGPU_ATTACHMENT_TYPE_COLOUR; 96} 97 98static uint64_t 99agx_map_surface_resource(struct pipe_surface *surf, struct agx_resource *rsrc) 100{ 101 return agx_map_texture_gpu(rsrc, surf->u.tex.level, surf->u.tex.first_layer); 102} 103 104static uint64_t 105agx_map_surface(struct pipe_surface *surf) 106{ 107 return agx_map_surface_resource(surf, agx_resource(surf->texture)); 108} 109 110static void 111asahi_pack_iogpu_attachment(void *out, struct agx_resource *rsrc, 112 struct pipe_surface *surf, 113 unsigned total_size) 114{ 115 /* We don't support layered rendering yet */ 116 assert(surf->u.tex.first_layer == surf->u.tex.last_layer); 117 118 agx_pack(out, IOGPU_ATTACHMENT, cfg) { 119 cfg.type = asahi_classify_attachment(rsrc->base.format); 120 cfg.address = agx_map_surface_resource(surf, rsrc); 121 cfg.size = rsrc->slices[surf->u.tex.level].size; 122 cfg.percent = (100 * cfg.size) / total_size; 123 } 124} 125 126static unsigned 127asahi_pack_iogpu_attachments(void *out, struct pipe_framebuffer_state *framebuffer) 128{ 129 unsigned total_attachment_size = asahi_size_attachments(framebuffer); 130 struct agx_iogpu_attachment_packed *attachments = out; 131 unsigned nr = 0; 132 133 for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) { 134 asahi_pack_iogpu_attachment(attachments + (nr++), 135 agx_resource(framebuffer->cbufs[i]->texture), 136 framebuffer->cbufs[i], 137 total_attachment_size); 138 } 139 140 if (framebuffer->zsbuf) { 141 struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture); 142 143 asahi_pack_iogpu_attachment(attachments + (nr++), 144 rsrc, framebuffer->zsbuf, 145 total_attachment_size); 146 147 if (rsrc->separate_stencil) { 148 asahi_pack_iogpu_attachment(attachments + (nr++), 149 rsrc->separate_stencil, 150 framebuffer->zsbuf, 151 total_attachment_size); 152 } 153 } 154 155 return nr; 156} 157 158unsigned 159demo_cmdbuf(uint64_t *buf, size_t size, 160 struct agx_pool *pool, 161 struct pipe_framebuffer_state *framebuffer, 162 uint64_t encoder_ptr, 163 uint64_t encoder_id, 164 uint64_t scissor_ptr, 165 uint64_t depth_bias_ptr, 166 uint32_t pipeline_clear, 167 uint32_t pipeline_load, 168 uint32_t pipeline_store, 169 bool clear_pipeline_textures, 170 double clear_depth, 171 unsigned clear_stencil) 172{ 173 uint32_t *map = (uint32_t *) buf; 174 memset(map, 0, 518 * 4); 175 176 uint64_t deflake_buffer = demo_zero(pool, 0x7e0); 177 uint64_t deflake_1 = deflake_buffer + 0x2a0; 178 uint64_t deflake_2 = deflake_buffer + 0x20; 179 180 uint64_t unk_buffer_2 = demo_zero(pool, 0x8000); 181 182 uint64_t depth_buffer = 0; 183 uint64_t stencil_buffer = 0; 184 185 agx_pack(map + 160, IOGPU_INTERNAL_PIPELINES, cfg) { 186 cfg.clear_pipeline_bind = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0); 187 cfg.clear_pipeline = pipeline_clear; 188 189 /* store pipeline used when entire frame completes */ 190 cfg.store_pipeline_bind = 0x12; 191 cfg.store_pipeline = pipeline_store; 192 cfg.scissor_array = scissor_ptr; 193 cfg.depth_bias_array = depth_bias_ptr; 194 195 if (framebuffer->zsbuf) { 196 struct pipe_surface *zsbuf = framebuffer->zsbuf; 197 const struct util_format_description *desc = 198 util_format_description(zsbuf->texture->format); 199 200 // note: setting 0x4 bit here breaks partial render with depth 201 cfg.depth_flags = 0x80000; // no compression, clear 202 203 cfg.depth_width = framebuffer->width; 204 cfg.depth_height = framebuffer->height; 205 206 if (util_format_has_depth(desc)) { 207 depth_buffer = agx_map_surface(zsbuf); 208 } else { 209 stencil_buffer = agx_map_surface(zsbuf); 210 } 211 212 if (agx_resource(zsbuf->texture)->separate_stencil) { 213 stencil_buffer = agx_map_surface_resource(zsbuf, 214 agx_resource(zsbuf->texture)->separate_stencil); 215 } 216 217 cfg.stencil_buffer = stencil_buffer; 218 cfg.stencil_buffer_2 = stencil_buffer; 219 220 cfg.depth_buffer = depth_buffer; 221 cfg.depth_buffer_if_clearing = depth_buffer; 222 } 223 } 224 225 agx_pack(map + 228, IOGPU_AUX_FRAMEBUFFER, cfg) { 226 cfg.width = framebuffer->width; 227 cfg.height = framebuffer->height; 228 cfg.pointer = unk_buffer_2; 229 } 230 231 agx_pack(map + 292, IOGPU_CLEAR_Z_S, cfg) { 232 cfg.set_when_reloading_z_1 = clear_pipeline_textures; 233 234 cfg.depth_clear_value = fui(clear_depth); 235 cfg.stencil_clear_value = clear_stencil; 236 237 cfg.partial_reload_pipeline_bind = 0xffff8212; 238 cfg.partial_reload_pipeline = pipeline_load; 239 240 cfg.partial_store_pipeline_bind = 0x12; 241 cfg.partial_store_pipeline = pipeline_store; 242 } 243 244 agx_pack(map + 356, IOGPU_MISC, cfg) { 245 cfg.depth_buffer = depth_buffer; 246 cfg.stencil_buffer = stencil_buffer; 247 cfg.encoder_id = encoder_id; 248 cfg.unknown_buffer = demo_unk6(pool); 249 cfg.width = framebuffer->width; 250 cfg.height = framebuffer->height; 251 cfg.unk_80 = clear_pipeline_textures ? 0x0 : 0x1; 252 } 253 254 unsigned offset_unk = (484 * 4); 255 unsigned offset_attachments = (496 * 4); 256 257 unsigned nr_attachments = 258 asahi_pack_iogpu_attachments(map + (offset_attachments / 4) + 4, 259 framebuffer); 260 261 map[(offset_attachments / 4) + 3] = nr_attachments; 262 263 unsigned total_size = offset_attachments + (AGX_IOGPU_ATTACHMENT_LENGTH * nr_attachments) + 16; 264 265 agx_pack(map, IOGPU_HEADER, cfg) { 266 cfg.total_size = total_size; 267 cfg.attachment_offset = offset_attachments; 268 cfg.attachment_length = nr_attachments * AGX_IOGPU_ATTACHMENT_LENGTH; 269 cfg.unknown_offset = offset_unk; 270 cfg.encoder = encoder_ptr; 271 272 cfg.deflake_1 = deflake_1; 273 cfg.deflake_2 = deflake_2; 274 cfg.deflake_3 = deflake_buffer; 275 } 276 277 return total_size; 278} 279 280static struct agx_map_header 281demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size, unsigned count) 282{ 283 /* Structure: header followed by resource groups. For now, we use a single 284 * resource group for every resource. This could be optimized. 285 */ 286 unsigned length = sizeof(struct agx_map_header); 287 length += count * sizeof(struct agx_map_entry); 288 assert(length < 0x10000); 289 290 return (struct agx_map_header) { 291 .cmdbuf_id = cmdbuf_id, 292 .segment_count = 1, 293 .length = length, 294 .encoder_id = encoder_id, 295 .kernel_commands_start_offset = 0, 296 .kernel_commands_end_offset = cmdbuf_size, 297 .total_resources = count, 298 .resource_group_count = count, 299 .unk = 0x8000, 300 }; 301} 302 303void 304demo_mem_map(void *map, size_t size, unsigned *handles, unsigned count, 305 uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size) 306{ 307 struct agx_map_header *header = map; 308 struct agx_map_entry *entries = (struct agx_map_entry *) (((uint8_t *) map) + sizeof(*header)); 309 struct agx_map_entry *end = (struct agx_map_entry *) (((uint8_t *) map) + size); 310 311 /* Header precedes the entry */ 312 *header = demo_map_header(cmdbuf_id, encoder_id, cmdbuf_size, count); 313 314 /* Add an entry for each BO mapped */ 315 for (unsigned i = 0; i < count; ++i) { 316 assert((entries + i) < end); 317 entries[i] = (struct agx_map_entry) { 318 .resource_id = { handles[i] }, 319 .resource_unk = { 0x20 }, 320 .resource_flags = { 0x1 }, 321 .resource_count = 1 322 }; 323 } 324} 325