1 /*
2  * Copyright (C) 2021-2022 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <unistd.h>
26 #include <dlfcn.h>
27 #include <assert.h>
28 #include <inttypes.h>
29 
30 #include <mach/mach.h>
31 #include <IOKit/IOKitLib.h>
32 
33 #include "util/compiler.h"
34 #include "io.h"
35 #include "decode.h"
36 #include "util.h"
37 #include "hexdump.h"
38 #include "dyld_interpose.h"
39 
40 /*
41  * Wrap IOKit entrypoints to intercept communication between the AGX kernel
42  * extension and userspace clients. IOKit prototypes are public from the IOKit
43  * source release.
44  */
45 
46 mach_port_t metal_connection = 0;
47 
48 kern_return_t
wrap_Method(mach_port_t connection, uint32_t selector, const uint64_t* input, uint32_t inputCnt, const void *inputStruct, size_t inputStructCnt, uint64_t *output, uint32_t *outputCnt, void *outputStruct, size_t *outputStructCntP)49 wrap_Method(mach_port_t connection, uint32_t selector, const uint64_t* input,
50             uint32_t inputCnt, const void *inputStruct, size_t inputStructCnt,
51             uint64_t *output, uint32_t *outputCnt, void *outputStruct,
52             size_t *outputStructCntP)
53 {
54    /* Heuristic guess which connection is Metal, skip over I/O from everything
55     * else. This is technically wrong but it works in practice, and reduces the
56     * surface area we need to wrap.
57     */
58    if (selector == AGX_SELECTOR_SET_API) {
59       metal_connection = connection;
60    } else if (metal_connection != connection) {
61       return IOConnectCallMethod(connection, selector, input, inputCnt,
62                                  inputStruct, inputStructCnt, output, outputCnt,
63                                  outputStruct, outputStructCntP);
64    }
65 
66    printf("Selector %u, %X, %X\n", selector, connection, metal_connection);
67 
68    /* Check the arguments make sense */
69    assert((input != NULL) == (inputCnt != 0));
70    assert((inputStruct != NULL) == (inputStructCnt != 0));
71    assert((output != NULL) == (outputCnt != 0));
72    assert((outputStruct != NULL) == (outputStructCntP != 0));
73 
74    /* Dump inputs */
75    switch (selector) {
76    case AGX_SELECTOR_SET_API:
77       assert(input == NULL && output == NULL && outputStruct == NULL);
78       assert(inputStruct != NULL && inputStructCnt == 16);
79       assert(((uint8_t *) inputStruct)[15] == 0x0);
80 
81       printf("%X: SET_API(%s)\n", connection, (const char *) inputStruct);
82       break;
83 
84    case AGX_SELECTOR_ALLOCATE_MEM: {
85       const struct agx_allocate_resource_req *req = inputStruct;
86       struct agx_allocate_resource_req *req2 = (void *) inputStruct;
87       req2->mode = (req->mode & 0x800) | 0x430;
88 
89       bool suballocated = req->mode & 0x800;
90 
91       printf("Resource allocation:\n");
92       printf("  Mode: 0x%X%s\n", req->mode & ~0x800,
93             suballocated ? " (suballocated) " : "");
94       printf("  CPU fixed: 0x%" PRIx64 "\n", req->cpu_fixed);
95       printf("  CPU fixed (parent): 0x%" PRIx64 "\n", req->cpu_fixed_parent);
96       printf("  Size: 0x%X\n", req->size);
97       printf("  Flags: 0x%X\n", req->flags);
98 
99       if (suballocated) {
100          printf("  Parent: %u\n", req->parent);
101       } else {
102          assert(req->parent == 0);
103       }
104 
105       for (unsigned i = 0; i < ARRAY_SIZE(req->unk0); ++i) {
106          if (req->unk0[i])
107             printf("  UNK%u: 0x%X\n", 0 + i, req->unk0[i]);
108       }
109 
110       for (unsigned i = 0; i < ARRAY_SIZE(req->unk6); ++i) {
111          if (req->unk6[i])
112             printf("  UNK%u: 0x%X\n", 6 + i, req->unk6[i]);
113       }
114 
115       if (req->unk17)
116          printf("  UNK17: 0x%X\n", req->unk17);
117 
118       if (req->unk19)
119          printf("  UNK19: 0x%X\n", req->unk19);
120 
121       for (unsigned i = 0; i < ARRAY_SIZE(req->unk21); ++i) {
122          if (req->unk21[i])
123             printf("  UNK%u: 0x%X\n", 21 + i, req->unk21[i]);
124       }
125 
126       break;
127    }
128 
129    case AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS:
130       assert(output == NULL && outputStruct == NULL);
131       assert(inputStructCnt == sizeof(struct agx_submit_cmdbuf_req));
132       assert(inputCnt == 1);
133 
134       printf("%X: SUBMIT_COMMAND_BUFFERS command queue id:%llx %p\n",
135              connection, input[0], inputStruct);
136 
137       const struct agx_submit_cmdbuf_req *req = inputStruct;
138 
139       agxdecode_cmdstream(req->command_buffer_shmem_id,
140             req->segment_list_shmem_id, true);
141 
142       if (getenv("ASAHI_DUMP"))
143          agxdecode_dump_mappings(req->segment_list_shmem_id);
144 
145       agxdecode_next_frame();
146       FALLTHROUGH;
147 
148    default:
149       printf("%X: call %s (out %p, %zu)", connection,
150              wrap_selector_name(selector), outputStructCntP,
151              outputStructCntP ? *outputStructCntP : 0);
152 
153       for (uint64_t u = 0; u < inputCnt; ++u)
154          printf(" %llx", input[u]);
155 
156       if(inputStructCnt) {
157          printf(", struct:\n");
158          hexdump(stdout, inputStruct, inputStructCnt, true);
159       } else {
160          printf("\n");
161       }
162 
163       break;
164    }
165 
166    /* Invoke the real method */
167    kern_return_t ret =
168       IOConnectCallMethod(connection, selector, input, inputCnt, inputStruct,
169                           inputStructCnt, output, outputCnt, outputStruct,
170                           outputStructCntP);
171 
172    if (ret != 0)
173       printf("return %u\n", ret);
174 
175    /* Track allocations for later analysis (dumping, disassembly, etc) */
176    switch (selector) {
177       case AGX_SELECTOR_CREATE_SHMEM: {
178          assert(inputCnt == 2);
179          assert((*outputStructCntP) == 0x10);
180          uint64_t *inp = (uint64_t *) input;
181 
182          uint8_t type = inp[1];
183 
184          assert(type <= 2);
185          if (type == 2)
186             printf("(cmdbuf with error reporting)\n");
187 
188          uint64_t *ptr = (uint64_t *) outputStruct;
189          uint32_t *words = (uint32_t *) (ptr + 1);
190 
191          agxdecode_track_alloc(&(struct agx_bo) {
192                .handle = words[1],
193                .ptr.cpu = (void *) *ptr,
194                .size = words[0],
195                .type = inp[1] ? AGX_ALLOC_CMDBUF : AGX_ALLOC_MEMMAP
196          });
197 
198          break;
199       }
200 
201       case AGX_SELECTOR_ALLOCATE_MEM: {
202          assert((*outputStructCntP) == 0x50);
203          const struct agx_allocate_resource_req *req = inputStruct;
204          struct agx_allocate_resource_resp *resp = outputStruct;
205          if (resp->cpu && req->cpu_fixed)
206             assert(resp->cpu == req->cpu_fixed);
207          printf("Response:\n");
208          printf("  GPU VA: 0x%" PRIx64 "\n", resp->gpu_va);
209          printf("  CPU VA: 0x%" PRIx64 "\n", resp->cpu);
210          printf("  Handle: %u\n", resp->handle);
211          printf("  Root size: 0x%" PRIx64 "\n", resp->root_size);
212          printf("  Suballocation size: 0x%" PRIx64 "\n", resp->sub_size);
213          printf("  GUID: 0x%X\n", resp->guid);
214          for (unsigned i = 0; i < ARRAY_SIZE(resp->unk4); ++i) {
215             if (resp->unk4[i])
216                printf("  UNK%u: 0x%X\n", 4 + i, resp->unk4[i]);
217          }
218          for (unsigned i = 0; i < ARRAY_SIZE(resp->unk11); ++i) {
219             if (resp->unk11[i])
220                printf("  UNK%u: 0x%X\n", 11 + i, resp->unk11[i]);
221          }
222 
223          if (req->parent)
224             assert(resp->sub_size <= resp->root_size);
225          else
226             assert(resp->sub_size == resp->root_size);
227 
228          agxdecode_track_alloc(&(struct agx_bo) {
229                .type = AGX_ALLOC_REGULAR,
230                .size = resp->sub_size,
231                .handle = resp->handle,
232                .ptr.gpu = resp->gpu_va,
233                .ptr.cpu = (void *) resp->cpu,
234          });
235 
236          break;
237       }
238 
239       case AGX_SELECTOR_FREE_MEM: {
240          assert(inputCnt == 1);
241          assert(inputStruct == NULL);
242          assert(output == NULL);
243          assert(outputStruct == NULL);
244 
245          agxdecode_track_free(&(struct agx_bo) {
246                .type = AGX_ALLOC_REGULAR,
247                .handle = input[0]
248          });
249 
250          break;
251       }
252 
253       default:
254          /* Dump the outputs */
255          if(outputCnt) {
256             printf("%u scalars: ", *outputCnt);
257 
258             for (uint64_t u = 0; u < *outputCnt; ++u)
259                printf("%llx ", output[u]);
260 
261             printf("\n");
262          }
263 
264          if(outputStructCntP) {
265             printf(" struct\n");
266             hexdump(stdout, outputStruct, *outputStructCntP, true);
267 
268             if (selector == 2) {
269                /* Dump linked buffer as well */
270                void **o = outputStruct;
271                hexdump(stdout, *o, 64, true);
272             }
273          }
274 
275          printf("\n");
276          break;
277    }
278 
279    return ret;
280 }
281 
282 kern_return_t
wrap_AsyncMethod(mach_port_t connection, uint32_t selector, mach_port_t wakePort, uint64_t *reference, uint32_t referenceCnt, const uint64_t *input, uint32_t inputCnt, const void *inputStruct, size_t inputStructCnt, uint64_t *output, uint32_t *outputCnt, void *outputStruct, size_t *outputStructCntP)283 wrap_AsyncMethod(mach_port_t connection, uint32_t selector,
284                  mach_port_t wakePort, uint64_t *reference,
285                  uint32_t referenceCnt, const uint64_t *input,
286                  uint32_t inputCnt, const void *inputStruct,
287                  size_t inputStructCnt, uint64_t *output, uint32_t *outputCnt,
288                  void *outputStruct, size_t *outputStructCntP)
289 {
290 	/* Check the arguments make sense */
291 	assert((input != NULL) == (inputCnt != 0));
292 	assert((inputStruct != NULL) == (inputStructCnt != 0));
293 	assert((output != NULL) == (outputCnt != 0));
294 	assert((outputStruct != NULL) == (outputStructCntP != 0));
295 
296 	printf("%X: call %X, wake port %X (out %p, %zu)", connection, selector,
297           wakePort, outputStructCntP, outputStructCntP ? *outputStructCntP : 0);
298 
299 	for (uint64_t u = 0; u < inputCnt; ++u)
300 		printf(" %llx", input[u]);
301 
302 	if(inputStructCnt) {
303 		printf(", struct:\n");
304 		hexdump(stdout, inputStruct, inputStructCnt, true);
305 	} else {
306 		printf("\n");
307 	}
308 
309 	printf(", references: ");
310 	for (unsigned i = 0; i < referenceCnt; ++i)
311 		printf(" %llx", reference[i]);
312 	printf("\n");
313 
314    kern_return_t ret = IOConnectCallAsyncMethod(connection, selector, wakePort,
315          reference, referenceCnt, input, inputCnt, inputStruct, inputStructCnt,
316          output, outputCnt, outputStruct, outputStructCntP);
317 
318 	printf("return %u", ret);
319 
320  	if(outputCnt) {
321 		printf("%u scalars: ", *outputCnt);
322 
323 		for (uint64_t u = 0; u < *outputCnt; ++u)
324 			printf("%llx ", output[u]);
325 
326 		printf("\n");
327 	}
328 
329 	if(outputStructCntP) {
330 		printf(" struct\n");
331 		hexdump(stdout, outputStruct, *outputStructCntP, true);
332 
333 		if (selector == 2) {
334 			/* Dump linked buffer as well */
335 			void **o = outputStruct;
336 			hexdump(stdout, *o, 64, true);
337 		}
338 	}
339 
340 	printf("\n");
341 	return ret;
342 }
343 
344 kern_return_t
wrap_StructMethod(mach_port_t connection, uint32_t selector, const void *inputStruct, size_t inputStructCnt, void *outputStruct, size_t *outputStructCntP)345 wrap_StructMethod(mach_port_t connection, uint32_t selector,
346                   const void *inputStruct, size_t inputStructCnt,
347                   void *outputStruct, size_t *outputStructCntP)
348 {
349    return wrap_Method(connection, selector, NULL, 0, inputStruct,
350                       inputStructCnt, NULL, NULL, outputStruct,
351                       outputStructCntP);
352 }
353 
354 kern_return_t
wrap_AsyncStructMethod(mach_port_t connection, uint32_t selector, mach_port_t wakePort, uint64_t *reference, uint32_t referenceCnt, const void *inputStruct, size_t inputStructCnt, void *outputStruct, size_t *outputStructCnt)355 wrap_AsyncStructMethod(mach_port_t connection, uint32_t selector,
356                        mach_port_t wakePort, uint64_t *reference,
357                        uint32_t referenceCnt, const void *inputStruct,
358                        size_t inputStructCnt, void *outputStruct,
359                        size_t *outputStructCnt)
360 {
361     return wrap_AsyncMethod(connection, selector, wakePort, reference,
362                             referenceCnt, NULL, 0, inputStruct, inputStructCnt,
363                             NULL, NULL, outputStruct, outputStructCnt);
364 }
365 
366 kern_return_t
wrap_ScalarMethod(mach_port_t connection, uint32_t selector, const uint64_t *input, uint32_t inputCnt, uint64_t *output, uint32_t *outputCnt)367 wrap_ScalarMethod(mach_port_t connection, uint32_t selector,
368                   const uint64_t *input, uint32_t inputCnt, uint64_t *output,
369                   uint32_t *outputCnt)
370 {
371     return wrap_Method(connection, selector, input, inputCnt, NULL, 0, output,
372                        outputCnt, NULL, NULL);
373 }
374 
375 kern_return_t
wrap_AsyncScalarMethod(mach_port_t connection, uint32_t selector, mach_port_t wakePort, uint64_t *reference, uint32_t referenceCnt, const uint64_t *input, uint32_t inputCnt, uint64_t *output, uint32_t *outputCnt)376 wrap_AsyncScalarMethod(mach_port_t connection, uint32_t selector,
377                        mach_port_t wakePort, uint64_t *reference,
378                        uint32_t referenceCnt, const uint64_t *input,
379                        uint32_t inputCnt, uint64_t *output, uint32_t *outputCnt)
380 {
381     return wrap_AsyncMethod(connection, selector, wakePort, reference,
382                             referenceCnt, input, inputCnt, NULL, 0, output,
383                             outputCnt, NULL, NULL);
384 }
385 
386 mach_port_t
wrap_DataQueueAllocateNotificationPortnull387 wrap_DataQueueAllocateNotificationPort()
388 {
389    mach_port_t ret = IODataQueueAllocateNotificationPort();
390    printf("Allocated notif port %X\n", ret);
391    return ret;
392 }
393 
394 kern_return_t
wrap_SetNotificationPort(io_connect_t connect, uint32_t type, mach_port_t port, uintptr_t reference)395 wrap_SetNotificationPort(io_connect_t connect, uint32_t type,
396                              mach_port_t port, uintptr_t reference)
397 {
398 	printf("Set noficiation port connect=%X, type=%X, port=%X, reference=%"
399          PRIx64"\n", connect, type, port, (uint64_t) reference);
400 
401    return IOConnectSetNotificationPort(connect, type, port, reference);
402 }
403 
404 IOReturn
wrap_DataQueueWaitForAvailableData(IODataQueueMemory *dataQueue, mach_port_t notificationPort)405 wrap_DataQueueWaitForAvailableData(IODataQueueMemory *dataQueue,
406                               mach_port_t notificationPort)
407 {
408 	printf("Waiting for data queue at notif port %X\n", notificationPort);
409    IOReturn ret = IODataQueueWaitForAvailableData(dataQueue, notificationPort);
410 	printf("ret=%X\n", ret);
411    return ret;
412 }
413 
414 IODataQueueEntry *
wrap_DataQueuePeek(IODataQueueMemory *dataQueue)415 wrap_DataQueuePeek(IODataQueueMemory *dataQueue)
416 {
417    printf("Peeking data queue\n");
418    return IODataQueuePeek(dataQueue);
419 }
420 
421 IOReturn
wrap_DataQueueDequeue(IODataQueueMemory *dataQueue, void *data, uint32_t *dataSize)422 wrap_DataQueueDequeue(IODataQueueMemory *dataQueue, void *data, uint32_t *dataSize)
423 {
424    printf("Dequeueing (dataQueue=%p, data=%p, buffer %u)\n", dataQueue, data, *dataSize);
425    IOReturn ret = IODataQueueDequeue(dataQueue, data, dataSize);
426    printf("Return \"%s\", got %u bytes\n", mach_error_string(ret), *dataSize);
427 
428    uint8_t *data8 = data;
429    for (unsigned i = 0; i < *dataSize; ++i) {
430       printf("%02X ", data8[i]);
431    }
432    printf("\n");
433 
434    return ret;
435 }
436 
437 DYLD_INTERPOSE(wrap_Method, IOConnectCallMethod);
438 DYLD_INTERPOSE(wrap_AsyncMethod, IOConnectCallAsyncMethod);
439 DYLD_INTERPOSE(wrap_StructMethod, IOConnectCallStructMethod);
440 DYLD_INTERPOSE(wrap_AsyncStructMethod, IOConnectCallAsyncStructMethod);
441 DYLD_INTERPOSE(wrap_ScalarMethod, IOConnectCallScalarMethod);
442 DYLD_INTERPOSE(wrap_AsyncScalarMethod, IOConnectCallAsyncScalarMethod);
443 DYLD_INTERPOSE(wrap_SetNotificationPort, IOConnectSetNotificationPort);
444 DYLD_INTERPOSE(wrap_DataQueueAllocateNotificationPort, IODataQueueAllocateNotificationPort);
445 DYLD_INTERPOSE(wrap_DataQueueWaitForAvailableData, IODataQueueWaitForAvailableData);
446 DYLD_INTERPOSE(wrap_DataQueuePeek, IODataQueuePeek);
447 DYLD_INTERPOSE(wrap_DataQueueDequeue, IODataQueueDequeue);
448