1/*
2 * Copyright 2021 Alyssa Rosenzweig
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#
24#include <stdint.h>
25#include "agx_state.h"
26#include "magic.h"
27
28/* The structures managed in this file appear to be software defined (either in
29 * the macOS kernel driver or in the AGX firmware) */
30
31/* Odd pattern */
32static uint64_t
33demo_unk6(struct agx_pool *pool)
34{
35   struct agx_ptr ptr = agx_pool_alloc_aligned(pool, 0x4000 * sizeof(uint64_t), 64);
36   uint64_t *buf = ptr.cpu;
37   memset(buf, 0, sizeof(*buf));
38
39   for (unsigned i = 1; i < 0x3ff; ++i)
40      buf[i] = (i + 1);
41
42   return ptr.gpu;
43}
44
45static uint64_t
46demo_zero(struct agx_pool *pool, unsigned count)
47{
48   struct agx_ptr ptr = agx_pool_alloc_aligned(pool, count, 64);
49   memset(ptr.cpu, 0, count);
50   return ptr.gpu;
51}
52
53static size_t
54asahi_size_resource(struct pipe_resource *prsrc, unsigned level)
55{
56   struct agx_resource *rsrc = agx_resource(prsrc);
57   size_t size = rsrc->slices[level].size;
58
59   if (rsrc->separate_stencil)
60      size += asahi_size_resource(&rsrc->separate_stencil->base, level);
61
62   return size;
63}
64
65static size_t
66asahi_size_surface(struct pipe_surface *surf)
67{
68   return asahi_size_resource(surf->texture, surf->u.tex.level);
69}
70
71static size_t
72asahi_size_attachments(struct pipe_framebuffer_state *framebuffer)
73{
74   size_t sum = 0;
75
76   for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i)
77      sum += asahi_size_surface(framebuffer->cbufs[i]);
78
79   if (framebuffer->zsbuf)
80      sum += asahi_size_surface(framebuffer->zsbuf);
81
82   return sum;
83}
84
85static enum agx_iogpu_attachment_type
86asahi_classify_attachment(enum pipe_format format)
87{
88   const struct util_format_description *desc = util_format_description(format);
89
90   if (util_format_has_depth(desc))
91      return AGX_IOGPU_ATTACHMENT_TYPE_DEPTH;
92   else if (util_format_has_stencil(desc))
93      return AGX_IOGPU_ATTACHMENT_TYPE_STENCIL;
94   else
95      return AGX_IOGPU_ATTACHMENT_TYPE_COLOUR;
96}
97
98static uint64_t
99agx_map_surface_resource(struct pipe_surface *surf, struct agx_resource *rsrc)
100{
101   return agx_map_texture_gpu(rsrc, surf->u.tex.level, surf->u.tex.first_layer);
102}
103
104static uint64_t
105agx_map_surface(struct pipe_surface *surf)
106{
107   return agx_map_surface_resource(surf, agx_resource(surf->texture));
108}
109
110static void
111asahi_pack_iogpu_attachment(void *out, struct agx_resource *rsrc,
112                            struct pipe_surface *surf,
113                            unsigned total_size)
114{
115   /* We don't support layered rendering yet */
116   assert(surf->u.tex.first_layer == surf->u.tex.last_layer);
117
118   agx_pack(out, IOGPU_ATTACHMENT, cfg) {
119      cfg.type = asahi_classify_attachment(rsrc->base.format);
120      cfg.address = agx_map_surface_resource(surf, rsrc);
121      cfg.size = rsrc->slices[surf->u.tex.level].size;
122      cfg.percent = (100 * cfg.size) / total_size;
123   }
124}
125
126static unsigned
127asahi_pack_iogpu_attachments(void *out, struct pipe_framebuffer_state *framebuffer)
128{
129   unsigned total_attachment_size = asahi_size_attachments(framebuffer);
130   struct agx_iogpu_attachment_packed *attachments = out;
131   unsigned nr = 0;
132
133   for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) {
134      asahi_pack_iogpu_attachment(attachments + (nr++),
135                                  agx_resource(framebuffer->cbufs[i]->texture),
136                                  framebuffer->cbufs[i],
137                                  total_attachment_size);
138   }
139
140   if (framebuffer->zsbuf) {
141         struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture);
142
143         asahi_pack_iogpu_attachment(attachments + (nr++),
144                                     rsrc, framebuffer->zsbuf,
145                                     total_attachment_size);
146
147         if (rsrc->separate_stencil) {
148            asahi_pack_iogpu_attachment(attachments + (nr++),
149                                        rsrc->separate_stencil,
150                                        framebuffer->zsbuf,
151                                        total_attachment_size);
152         }
153   }
154
155   return nr;
156}
157
158unsigned
159demo_cmdbuf(uint64_t *buf, size_t size,
160            struct agx_pool *pool,
161            struct pipe_framebuffer_state *framebuffer,
162            uint64_t encoder_ptr,
163            uint64_t encoder_id,
164            uint64_t scissor_ptr,
165            uint64_t depth_bias_ptr,
166            uint32_t pipeline_clear,
167            uint32_t pipeline_load,
168            uint32_t pipeline_store,
169            bool clear_pipeline_textures,
170            double clear_depth,
171            unsigned clear_stencil)
172{
173   uint32_t *map = (uint32_t *) buf;
174   memset(map, 0, 518 * 4);
175
176   uint64_t deflake_buffer = demo_zero(pool, 0x7e0);
177   uint64_t deflake_1 = deflake_buffer + 0x2a0;
178   uint64_t deflake_2 = deflake_buffer + 0x20;
179
180   uint64_t unk_buffer_2 = demo_zero(pool, 0x8000);
181
182   uint64_t depth_buffer = 0;
183   uint64_t stencil_buffer = 0;
184
185   agx_pack(map + 160, IOGPU_INTERNAL_PIPELINES, cfg) {
186      cfg.clear_pipeline_bind = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0);
187      cfg.clear_pipeline = pipeline_clear;
188
189      /* store pipeline used when entire frame completes */
190      cfg.store_pipeline_bind = 0x12;
191      cfg.store_pipeline = pipeline_store;
192      cfg.scissor_array = scissor_ptr;
193      cfg.depth_bias_array = depth_bias_ptr;
194
195      if (framebuffer->zsbuf) {
196         struct pipe_surface *zsbuf = framebuffer->zsbuf;
197         const struct util_format_description *desc =
198            util_format_description(zsbuf->texture->format);
199
200         // note: setting 0x4 bit here breaks partial render with depth
201         cfg.depth_flags = 0x80000; // no compression, clear
202
203         cfg.depth_width = framebuffer->width;
204         cfg.depth_height = framebuffer->height;
205
206         if (util_format_has_depth(desc)) {
207            depth_buffer = agx_map_surface(zsbuf);
208         } else {
209            stencil_buffer = agx_map_surface(zsbuf);
210         }
211
212         if (agx_resource(zsbuf->texture)->separate_stencil) {
213            stencil_buffer = agx_map_surface_resource(zsbuf,
214                  agx_resource(zsbuf->texture)->separate_stencil);
215         }
216
217         cfg.stencil_buffer = stencil_buffer;
218         cfg.stencil_buffer_2 = stencil_buffer;
219
220         cfg.depth_buffer = depth_buffer;
221         cfg.depth_buffer_if_clearing = depth_buffer;
222      }
223   }
224
225   agx_pack(map + 228, IOGPU_AUX_FRAMEBUFFER, cfg) {
226      cfg.width = framebuffer->width;
227      cfg.height = framebuffer->height;
228      cfg.pointer = unk_buffer_2;
229   }
230
231   agx_pack(map + 292, IOGPU_CLEAR_Z_S, cfg) {
232      cfg.set_when_reloading_z_1 = clear_pipeline_textures;
233
234      cfg.depth_clear_value = fui(clear_depth);
235      cfg.stencil_clear_value = clear_stencil;
236
237      cfg.partial_reload_pipeline_bind = 0xffff8212;
238      cfg.partial_reload_pipeline = pipeline_load;
239
240      cfg.partial_store_pipeline_bind = 0x12;
241      cfg.partial_store_pipeline = pipeline_store;
242   }
243
244   agx_pack(map + 356, IOGPU_MISC, cfg) {
245      cfg.depth_buffer = depth_buffer;
246      cfg.stencil_buffer = stencil_buffer;
247      cfg.encoder_id = encoder_id;
248      cfg.unknown_buffer = demo_unk6(pool);
249      cfg.width = framebuffer->width;
250      cfg.height = framebuffer->height;
251      cfg.unk_80 = clear_pipeline_textures ? 0x0 : 0x1;
252   }
253
254   unsigned offset_unk = (484 * 4);
255   unsigned offset_attachments = (496 * 4);
256
257   unsigned nr_attachments =
258      asahi_pack_iogpu_attachments(map + (offset_attachments / 4) + 4,
259                                   framebuffer);
260
261   map[(offset_attachments / 4) + 3] = nr_attachments;
262
263   unsigned total_size = offset_attachments + (AGX_IOGPU_ATTACHMENT_LENGTH * nr_attachments) + 16;
264
265   agx_pack(map, IOGPU_HEADER, cfg) {
266      cfg.total_size = total_size;
267      cfg.attachment_offset = offset_attachments;
268      cfg.attachment_length = nr_attachments * AGX_IOGPU_ATTACHMENT_LENGTH;
269      cfg.unknown_offset = offset_unk;
270      cfg.encoder = encoder_ptr;
271
272      cfg.deflake_1 = deflake_1;
273      cfg.deflake_2 = deflake_2;
274      cfg.deflake_3 = deflake_buffer;
275   }
276
277   return total_size;
278}
279
280static struct agx_map_header
281demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size, unsigned count)
282{
283   /* Structure: header followed by resource groups. For now, we use a single
284    * resource group for every resource. This could be optimized.
285    */
286   unsigned length = sizeof(struct agx_map_header);
287   length += count * sizeof(struct agx_map_entry);
288   assert(length < 0x10000);
289
290   return (struct agx_map_header) {
291      .cmdbuf_id = cmdbuf_id,
292      .segment_count = 1,
293      .length = length,
294      .encoder_id = encoder_id,
295      .kernel_commands_start_offset = 0,
296      .kernel_commands_end_offset = cmdbuf_size,
297      .total_resources = count,
298      .resource_group_count = count,
299      .unk = 0x8000,
300   };
301}
302
303void
304demo_mem_map(void *map, size_t size, unsigned *handles, unsigned count,
305             uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size)
306{
307   struct agx_map_header *header = map;
308   struct agx_map_entry *entries = (struct agx_map_entry *) (((uint8_t *) map) + sizeof(*header));
309   struct agx_map_entry *end = (struct agx_map_entry *) (((uint8_t *) map) + size);
310
311   /* Header precedes the entry */
312   *header = demo_map_header(cmdbuf_id, encoder_id, cmdbuf_size, count);
313
314   /* Add an entry for each BO mapped */
315   for (unsigned i = 0; i < count; ++i) {
316	   assert((entries + i) < end);
317      entries[i] = (struct agx_map_entry) {
318         .resource_id = { handles[i] },
319         .resource_unk = { 0x20 },
320         .resource_flags = { 0x1 },
321         .resource_count = 1
322      };
323   }
324}
325