1/* 2 * Copyright © 2016 Broadcom 3 * Copyright © 2020 Google LLC 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25/* Unit test for disassembly of instructions. 26 * 27 * The goal is to take instructions we've seen the blob produce, and test that 28 * we can disassemble them correctly. For the next person investigating the 29 * behavior of this instruction, please include the testcase it was generated 30 * from, and the qcom disassembly as a comment if it differs from what we 31 * produce. 32 */ 33 34#include <stdio.h> 35#include <stdlib.h> 36#include <string.h> 37#include "util/macros.h" 38 39#include "ir3.h" 40#include "ir3_assembler.h" 41#include "ir3_shader.h" 42 43#include "isa/isa.h" 44 45/* clang-format off */ 46/* Note: @anholt's 4xx disasm was done on an a418 Nexus 5x */ 47#define INSTR_4XX(i, d, ...) { .gpu_id = 420, .instr = #i, .expected = d, __VA_ARGS__ } 48#define INSTR_5XX(i, d, ...) { .gpu_id = 540, .instr = #i, .expected = d, __VA_ARGS__ } 49#define INSTR_6XX(i, d, ...) { .gpu_id = 630, .instr = #i, .expected = d, __VA_ARGS__ } 50/* clang-format on */ 51 52static const struct test { 53 int gpu_id; 54 const char *instr; 55 const char *expected; 56 /** 57 * Do we expect asm parse fail (ie. for things not (yet) supported by 58 * ir3_parser.y) 59 */ 60 bool parse_fail; 61} tests[] = { 62 /* clang-format off */ 63 /* cat0 */ 64 INSTR_6XX(00000000_00000000, "nop"), 65 INSTR_6XX(00000200_00000000, "(rpt2)nop"), 66 INSTR_6XX(03000000_00000000, "end"), 67 INSTR_6XX(00800000_00000004, "br p0.x, #4"), 68 INSTR_6XX(00800000_fffffffc, "br p0.x, #-4"), 69 INSTR_6XX(00900000_00000003, "br !p0.x, #3"), 70 INSTR_6XX(03820000_00000015, "shps #21"), /* emit */ 71 INSTR_6XX(04021000_00000000, "(ss)shpe"), /* cut */ 72 INSTR_6XX(02220000_00000004, "getlast.w8 #4"), 73 INSTR_6XX(02820000_00000014, "getone #20"), /* kill p0.x */ 74 INSTR_6XX(00906020_00000007, "brao !p0.x, !p0.y, #7"), 75 INSTR_6XX(00804040_00000003, "braa p0.x, p0.y, #3"), 76 INSTR_6XX(07820000_00000000, "prede"), 77 INSTR_6XX(00800063_0000001e, "brac.3 #30"), 78 INSTR_6XX(06820000_00000000, "predt p0.x"), 79 INSTR_6XX(07020000_00000000, "predf p0.x"), 80 INSTR_6XX(07820000_00000000, "prede"), 81 82 /* cat1 */ 83 INSTR_6XX(20244000_00000020, "mov.f32f32 r0.x, c8.x"), 84 INSTR_6XX(20200000_00000020, "mov.f16f16 hr0.x, hc8.x"), 85 INSTR_6XX(20150000_00000000, "cov.s32s16 hr0.x, r0.x"), 86 INSTR_6XX(20156004_00000c11, "(ul)mov.s32s32 r1.x, c<a0.x + 17>"), 87 INSTR_6XX(201100f4_00000000, "mova a0.x, hr0.x"), 88 INSTR_6XX(20244905_00000410, "(rpt1)mov.f32f32 r1.y, (r)c260.x"), 89 INSTR_6XX(20174004_00000008, "mov.s32s32 r<a0.x + 4>, r2.x"), 90 INSTR_6XX(20130000_00000005, "mov.s16s16 hr<a0.x>, hr1.y"), 91 INSTR_6XX(20110004_00000800, "mov.s16s16 hr1.x, hr<a0.x>"), 92 /* dEQP-VK.subgroups.ballot.compute.compute */ 93 INSTR_6XX(260cc3c0_00000000, "movmsk.w128 r48.x"), /* movmsk.w128 sr48.x */ 94 95 INSTR_6XX(240cc004_00030201, "swz.u32u32 r1.x, r0.w, r0.y, r0.z"), 96 INSTR_6XX(2400c105_04030201, "gat.f16u32 r1.y, hr0.y, hr0.z, hr0.w, hr1.x"), 97 INSTR_6XX(240c0205_04030201, "sct.u32f16 hr1.y, hr0.z, hr0.w, hr1.x, r0.y"), 98 INSTR_6XX(2400c205_04030201, "sct.f16u32 r1.y, r0.z, r0.w, r1.x, hr0.y"), 99 100 INSTR_6XX(20510005_0000ffff, "mov.s16s16 hr1.y, -1"), 101 INSTR_6XX(20400005_00003900, "mov.f16f16 hr1.y, h(0.625000)"), 102 INSTR_6XX(20400006_00003800, "mov.f16f16 hr1.z, h(0.500000)"), 103 INSTR_6XX(204880f5_00000000, "mova1 a1.x, 0"), 104 105 /* cat2 */ 106 INSTR_6XX(40104002_0c210001, "add.f hr0.z, r0.y, c<a0.x + 33>"), 107 INSTR_6XX(40b80804_10408004, "(nop3) cmps.f.lt r1.x, (abs)r1.x, c16.x"), 108 INSTR_6XX(47308a02_00002000, "(rpt2)bary.f (ei)r0.z, (r)0, r0.x"), 109 INSTR_6XX(47348000_00002000, "flat.b (ei)r0.x, 0, r0.x"), 110 INSTR_6XX(43480801_00008001, "(nop3) absneg.s hr0.y, (abs)hr0.y"), 111 INSTR_6XX(50600004_2c010004, "(sy)mul.f hr1.x, hr1.x, h(0.5)"), 112 INSTR_6XX(42280807_27ff0000, "(nop3) add.s hr1.w, hr0.x, h(-1)"), 113 INSTR_6XX(40a500f8_2c000004, "cmps.f.ne p0.x, hr1.x, h(0.0)"), 114 INSTR_6XX(438000f8_20010009, "and.b p0.x, hr2.y, h(1)"), 115 INSTR_6XX(438000f9_00020001, "and.b p0.y, hr0.y, hr0.z"), 116 INSTR_6XX(40080902_50200006, "(rpt1)add.f hr0.z, (r)hr1.z, (neg)(r)hc8.x"), 117 INSTR_6XX(42380c01_00040001, "(sat)(nop3) add.s r0.y, r0.y, r1.x"), 118 INSTR_6XX(42480000_48801086, "(nop2) sub.u hr0.x, hc33.z, (neg)hr<a0.x + 128>"), 119 INSTR_6XX(46b00001_00001020, "clz.b r0.y, c8.x"), 120 INSTR_6XX(46700009_00000009, "bfrev.b r2.y, r2.y"), 121 122 /* cat3 */ 123 INSTR_6XX(66000000_10421041, "sel.f16 hr0.x, hc16.y, hr0.x, hc16.z"), 124 INSTR_6XX(64848109_109a9099, "(rpt1)sel.b32 r2.y, c38.y, (r)r2.y, c38.z"), 125 INSTR_6XX(64810904_30521036, "(rpt1)sel.b32 r1.x, (r)c13.z, r0.z, (r)c20.z"), 126 INSTR_6XX(64818902_20041032, "(rpt1)sel.b32 r0.z, (r)c12.z, r0.w, (r)r1.x"), 127 INSTR_6XX(63820005_10315030, "mad.f32 r1.y, (neg)c12.x, r1.x, c12.y"), 128 INSTR_6XX(62050009_00091000, "mad.u24 r2.y, c0.x, r2.z, r2.y"), 129 INSTR_6XX(61828008_00081033, "madsh.m16 r2.x, c12.w, r1.y, r2.x"), 130 INSTR_6XX(65900820_100cb008, "(nop3) shlg hr8.x, 8, hr8.x, 12"), /* (nop3) shlg.b16 hr8.x, (r)8, (r)hr8.x, 12; */ 131 INSTR_6XX(65ae085c_0002a001, "(nop3) shlg hr23.x, hr0.y, hr23.x, hr0.z"), /* not seen in blob */ 132 INSTR_6XX(65900820_0c0aac05, "(nop3) shlg hr8.x, hc<a0.x + 5>, hr8.x, hc<a0.x + 10>"), /* not seen in blob */ 133 INSTR_6XX(65ae0c5c_0002a001, "(nop3) shlg r23.x, r0.y, r23.x, r0.z"), /* (nop3) shlg.b32 r23.x, (r)r0.y, (r)r23.x, r0.z */ 134 INSTR_6XX(64018802_0002e003, "(nop3) shrm hr0.z, (neg)hr0.w, hr0.w, hr0.z"), 135 INSTR_6XX(64818802_0002e003, "(nop3) shlm hr0.z, (neg)hr0.w, hr0.w, hr0.z"), 136 INSTR_6XX(65018802_0002e003, "(nop3) shrg hr0.z, (neg)hr0.w, hr0.w, hr0.z"), 137 INSTR_6XX(66018802_0002e003, "(nop3) andg hr0.z, (neg)hr0.w, hr0.w, hr0.z"), 138 INSTR_6XX(67018802_1002e003, "(nop3) wmm hr0.z, (neg)hr0.w, hr0.w, 2"), /* (nop3) wmm.f16f16 hr0.z, (abs)(r)hr0.w, (r)hr0.w, 2 */ 139 INSTR_6XX(67018c02_1002e003, "(nop3) wmm.accu hr0.z, (neg)hr0.w, hr0.w, 2"), 140 INSTR_6XX(6701c802_9002a003, "(nop3) wmm r0.z, r0.w, r0.w, 2"), /* (nop3) wmm.f32f32 r0.z, (r)r0.w, (r)r0.w, 2 */ 141 /* custom test with qcom_dot8 function from cl_qcom_dot_product8 */ 142 INSTR_6XX(66818c02_0002e003, "(sat)(nop3) dp2acc.mixed.low r0.z, r0.w, r0.w, r0.z"), /* (nop3) dp2acc (sat)r0.z, (signed)(low)(r)r0.w, (low)(r)r0.w, r0.z */ 143 INSTR_6XX(6681c802_8002a003, "(nop3) dp4acc.unsigned.low r0.z, r0.w, r0.w, (neg)r0.z"), /* (nop3) dp4acc r0.z, (unsigned)(r)r0.w, (r)r0.w, (neg)r0.z */ 144 145 /* cat4 */ 146 INSTR_6XX(8010000a_00000003, "rcp r2.z, r0.w"), 147 148 /* cat5 */ 149 /* dEQP-VK.glsl.derivate.dfdx.uniform_if.float_mediump */ 150 INSTR_6XX(a3801102_00000001, "dsx (f32)(x)r0.z, r0.x"), /* dsx (f32)(xOOO)r0.z, r0.x */ 151 /* dEQP-VK.glsl.derivate.dfdy.uniform_if.float_mediump */ 152 INSTR_6XX(a3c01102_00000001, "dsy (f32)(x)r0.z, r0.x"), /* dsy (f32)(xOOO)r0.z, r0.x */ 153 /* dEQP-VK.glsl.derivate.dfdxfine.uniform_loop.float_highp */ 154 INSTR_6XX(a6001105_00000001, "dsxpp.1 (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */ 155 INSTR_6XX(a6201105_00000001, "dsxpp.1.p (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */ 156 157 INSTR_6XX(a2802f00_00000001, "getsize (u16)(xyzw)hr0.x, r0.x, t#0"), 158 INSTR_6XX(a0c89f04_c4600005, "sam.base1 (f32)(xyzw)r1.x, r0.z, s#3, t#2"), /* sam.s2en.mode6.base1 (f32)(xyzw)r1.x, r0.z, 35 */ 159 INSTR_6XX(a1c85f00_c0200005, "getlod.base0 (s32)(xyzw)r0.x, r0.z, s#1, t#0"), /* getlod.s2en.mode6.base0 (s32)(xyzw)r0.x, r0.z, 1 */ 160 INSTR_6XX(a1000f00_00000004, "samb (f16)(xyzw)hr0.x, hr0.z, hr0.x, s#0, t#0"), 161 INSTR_6XX(a1000f00_00000003, "samb (f16)(xyzw)hr0.x, r0.y, r0.x, s#0, t#0"), 162 INSTR_6XX(a0c00f00_04400002, "sam (f16)(xyzw)hr0.x, hr0.y, s#2, t#2"), 163 INSTR_6XX(a6c02f00_00000000, "rgetinfo (u16)(xyzw)hr0.x"), 164 INSTR_6XX(a3482f08_c0000000, "getinfo.base0 (u16)(xyzw)hr2.x, t#0"), 165 /* dEQP-GLES31.functional.texture.texture_buffer.render.as_fragment_texture.buffer_size_65536 */ 166 INSTR_5XX(a2c03102_00000000, "getbuf (u32)(x)r0.z, t#0"), 167 INSTR_6XX(a0c81f00_e0200005, "sam.base0 (f32)(xyzw)r0.x, r0.z, s#1, a1.x"), 168 INSTR_6XX(a0c81108_e2000001, "sam.base0 (f32)(x)r2.x, r0.x, s#16, a1.x"), 169 INSTR_6XX(a048d107_cc080a07, "isaml.base3 (s32)(x)r1.w, r0.w, r1.y, s#0, t#6"), 170 171 172 /* dEQP-VK.subgroups.arithmetic.compute.subgroupadd_float */ 173 INSTR_6XX(a7c03102_00100003, "brcst.active.w8 (u32)(x)r0.z, r0.y"), /* brcst.active.w8 (u32)(xOOO)r0.z, r0.y */ 174 /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */ 175 INSTR_6XX(b7e03107_00000401, "(sy)quad_shuffle.brcst (u32)(x)r1.w, r0.x, r0.z"), /* (sy)quad_shuffle.brcst (u32)(xOOO)r1.w, r0.x, r0.z */ 176 /* dEQP-VK.subgroups.quad.graphics.subgroupquadswapdiagonal_int */ 177 INSTR_6XX(b7e03104_00180001, "(sy)quad_shuffle.diag (u32)(x)r1.x, r0.x"), /* (sy)quad_shuffle.diag (u32)(xOOO)r1.x, r0.x */ 178 179 /* cat6 */ 180 181 INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), /* ldgb.a.untyped.1dtype.u32.1 r0.x, g[r1.x], r0.x, 0 */ 182 INSTR_5XX(d7660204_02000a01, "(sy)stib.typed.2d.u32.1 g[1], r0.x, r0.z, r1.x"), /* (sy)stib.a.u32.2d.1 g[r1.x], r0.x, r0.z, 1. r1.x is offset in ibo, r0.x is value*/ 183 /* dEQP-VK.image.load_store.1d_array.r8g8b8a8_unorm */ 184 INSTR_5XX(c1a20006_0600ba01, "ldib.typed.2d.f32.4 r1.z, g[0], r0.z, r1.z"), /* ldib.a.f32.2d.4 r1.z, g[r0.z], r1.z, 0. r0.z is offset in ibo as src. r1.z */ 185 /* dEQP-VK.image.load_store.3d.r32g32b32a32_sint */ 186 INSTR_5XX(c1aa0003_0500fc01, "ldib.typed.3d.s32.4 r0.w, g[0], r0.w, r1.y"), /* ldib.a.s32.3d.4 r0.w, g[r0.w], r1.y, 0. r0.w is offset in ibo as src, and dst */ 187 /* dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_image.vertex.descriptor_array.3d */ 188 INSTR_5XX(c1a20204_0401fc01, "ldib.typed.3d.f32.4 r1.x, g[1], r1.w, r1.x"), /* ldib.a.f32.3d.4 r1.x, g[r1.w], r1.x, 1 */ 189 /* dEQP-VK.binding_model.shader_access.secondary_cmd_buf.with_push.storage_texel_buffer.vertex_fragment.single_descriptor.offset_zero */ 190 INSTR_5XX(c1a20005_0501be01, "ldib.typed.4d.f32.4 r1.y, g[0], r1.z, r1.y"), /* ldib.a.f32.1dtype.4 r1.y, g[r1.z], r1.y, 0 */ 191 /* dEQP-VK.texture.filtering.cube.formats.r8g8b8a8_snorm_nearest */ 192 INSTR_5XX(c1a60200_0000ba01, "ldib.typed.2d.u32.4 r0.x, g[1], r0.z, r0.x"), /* ldib.a.u32.2d.4 r0.x, g[r0.z], r0.x, 1 */ 193 194 // TODO is this a real instruction? Or float -6.0 ? 195 // INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true), 196 /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */ 197 INSTR_6XX(c0d20906_02800004, "stg.a.f32 g[r1.x+(r1.z)<<2], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */ 198 INSTR_6XX(c0da052e_01800042, "stg.a.s32 g[r0.z+(r11.z)<<2], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */ 199 INSTR_6XX(c0dc052e_01800042, "stg.a.u8 g[r0.z+(r11.z)<<2], hr8.y, 1"), 200 INSTR_6XX(c0ca0505_03800042, "stg.s32 g[r0.z+5], r8.y, 3"), 201 INSTR_6XX(c0ca0500_03800042, "stg.s32 g[r0.z], r8.y, 3"), 202 INSTR_6XX(c0ca0531_03800242, "stg.s32 g[r0.z+305], r8.y, 3"), 203 INSTR_5XX(c0ce0100_02800000, "stg.s8 g[r0.x], hr0.x, 2"), 204 INSTR_5XX(c0c00100_02800000, "stg.f16 g[r0.x], hr0.x, 2"), 205 206 /* Customely crafted */ 207 INSTR_6XX(c0d61104_01800228, "stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1"), 208 INSTR_6XX(c0d61104_01802628, "stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1"), 209 210 INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */ 211 INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */ 212 INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"), 213 INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"), 214 INSTR_6XX(c0060003_0180c269, "ldg.u32 r0.w, g[r0.w+308], 1"), 215 INSTR_6XX(c0040003_0180c269, "ldg.u16 hr0.w, g[r0.w+308], 1"), 216 217 /* Found in TCS/TES shaders of GTA V */ 218 INSTR_6XX(c0020007_03c1420f, "ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3"), /* ldg.a.f32 r1.w, g[r1.y+((r1.w+1)<<2)], 3 */ 219 220 /* Customely crafted */ 221 INSTR_6XX(c0020007_03c1740f, "ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3"), 222 223 INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */ 224 INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */ 225 INSTR_6XX(c0000006_01c18017, "ldg.a.f16 hr1.z, g[r1.z+(r2.w)<<2], 1"), 226 INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"), 227 INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"), 228 229 /* dEQP-GLES3.functional.ubo.random.basic_arrays.0 */ 230 INSTR_6XX(c7020020_01800000, "stc.f32 c[32], r0.x, 1"), /* stc c[32], r0.x, 1 */ 231 /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */ 232 INSTR_6XX(c7060020_03800000, "stc.u32 c[32], r0.x, 3"), /* stc c[32], r0.x, 3 */ 233 234 /* custom */ 235 INSTR_6XX(c7060100_03800000, "stc.u32 c[a1.x], r0.x, 3"), /* stc c[a1.x], r0.x, 3 */ 236 INSTR_6XX(c7060120_03800000, "stc.u32 c[a1.x+32], r0.x, 3"), /* stc c[a1.x+32], r0.x, 3 */ 237 238 /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */ 239 INSTR_6XX(c0260200_03676100, "stib.b.untyped.1d.u32.3.imm.base0 r0.x, r0.w, 1"), /* stib.untyped.u32.1d.3.mode4.base0 r0.x, r0.w, 1 */ 240 241 INSTR_6XX(c0240402_00674100, "stib.b.untyped.1d.u16.1.imm.base0 hr0.z, r0.x, 2"), 242#if 0 243 /* TODO blob sometimes/frequently sets b0, although there does not seem 244 * to be an obvious pattern and our encoding never sets it. AFAICT it 245 * is a dontcare bit 246 */ 247 /* dEQP-VK.texture.filtering.cube.formats.a8b8g8r8_srgb_nearest_mipmap_nearest.txt */ 248 INSTR_6XX(c0220200_0361b801, "ldib.b.typed.1d.f32.4.imm r0.x, r0.w, 1"), /* ldib.f32.1d.4.mode0.base0 r0.x, r0.w, 1 */ 249#else 250 /* dEQP-VK.texture.filtering.cube.formats.a8b8g8r8_srgb_nearest_mipmap_nearest.txt */ 251 INSTR_6XX(c0220200_0361b800, "ldib.b.typed.1d.f32.4.imm r0.x, r0.w, 1"), /* ldib.f32.1d.4.mode0.base0 r0.x, r0.w, 1 */ 252#endif 253 254 /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */ 255 INSTR_6XX(c2c21100_04800006, "stlw.f32 l[r2.x], r0.w, 4"), 256 INSTR_6XX(c2c20f00_01800004, "stlw.f32 l[r1.w], r0.z, 1"), 257 INSTR_6XX(c2860003_02808011, "ldlw.u32 r0.w, l[r0.z+8], 2"), 258 259 /* dEQP-VK.compute.basic.shared_var_single_group */ 260 INSTR_6XX(c1060500_01800008, "stl.u32 l[r0.z], r1.x, 1"), 261 INSTR_6XX(c0460001_01804001, "ldl.u32 r0.y, l[r0.y], 1"), 262 263 INSTR_6XX(c0860018_03820001, "ldp.u32 r6.x, p[r2.x], 3"), 264 INSTR_6XX(c0420002_01808019, "ldl.f32 r0.z, l[r0.z+12], 1"), 265 INSTR_6XX(c1021710_04800000, "stl.f32 l[r2.w+16], r0.x, 4"), 266 INSTR_6XX(d7c60011_03c00000, "(sy)ldlv.u32 r4.y, l[0], 3"), 267 268 /* resinfo */ 269 INSTR_6XX(c0260000_0063c200, "resinfo.b.untyped.2d.u32.1.imm r0.x, 0"), /* resinfo.u32.2d.mode0.base0 r0.x, 0 */ 270 /* dEQP-GLES31.functional.image_load_store.buffer.image_size.writeonly_7.txt */ 271 INSTR_6XX(c0260000_0063c000, "resinfo.b.untyped.1d.u32.1.imm r0.x, 0"), /* resinfo.u32.1d.mode0.base0 r0.x, 0 */ 272 /* dEQP-VK.image.image_size.2d.readonly_12x34.txt */ 273 INSTR_6XX(c0260000_0063c300, "resinfo.b.untyped.2d.u32.1.imm.base0 r0.x, 0"), /* resinfo.u32.2d.mode4.base0 r0.x, 0 */ 274 /* Custom test */ 275 INSTR_6XX(c0260000_0063c382, "resinfo.b.untyped.2d.u32.1.nonuniform.base1 r0.x, r0.x"), /* resinfo.u32.2d.mode6.base1 r0.x, r0.x */ 276 277 /* dEQP-GLES31.functional.image_load_store.2d.image_size.readonly_writeonly_32x32.txt */ 278 INSTR_5XX(c3e60000_00000200, "resinfo.u32.2d r0.x, g[0]"), /* resinfo.u32.2d r0.x, 0 */ 279#if 0 280 /* TODO our encoding differs in b11 ('typed'), which seems to be a dontcare bit */ 281 /* dEQP-GLES31.functional.image_load_store.buffer.image_size.readonly_writeonly_7 */ 282 INSTR_5XX(c3e60000_00000e00, "resinfo.u32.4d r0.x, g[0]"), /* resinfo.u32.1dtype r0.x, 0 */ 283 /* dEQP-GLES31.functional.image_load_store.3d.image_size.readonly_writeonly_12x34x56 */ 284 INSTR_5XX(c3e60000_00000c00, "resinfo.u32.3d r0.x, g[0]"), /* resinfo.u32.3d r0.x, 0 */ 285#else 286 /* dEQP-GLES31.functional.image_load_store.buffer.image_size.readonly_writeonly_7 */ 287 INSTR_5XX(c3e60000_00000600, "resinfo.u32.4d r0.x, g[0]"), /* resinfo.u32.1dtype r0.x, 0 */ 288 /* dEQP-GLES31.functional.image_load_store.2d.image_size.readonly_writeonly_32x32.txt */ 289 INSTR_5XX(c3e60000_00000400, "resinfo.u32.3d r0.x, g[0]"), /* resinfo.u32.3d r0.x, 0 */ 290#endif 291 292 /* ldgb */ 293 /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_vec4 */ 294 INSTR_5XX(c6e20000_06003600, "ldgb.untyped.4d.f32.4 r0.x, g[0], r0.x, r1.z"), /* ldgb.a.untyped.1dtype.f32.4 r0.x, g[r0.x], r1.z, 0 */ 295 /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_ivec4 */ 296 INSTR_5XX(c6ea0000_06003600, "ldgb.untyped.4d.s32.4 r0.x, g[0], r0.x, r1.z"), /* ldgb.a.untyped.1dtype.s32.4 r0.x, g[r0.x], r1.z, 0 */ 297 /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_float */ 298 INSTR_5XX(c6e20000_02000600, "ldgb.untyped.4d.f32.1 r0.x, g[0], r0.x, r0.z"), /* ldgb.a.untyped.1dtype.f32.1 r0.x, g[r0.x], r0.z, 0 */ 299 /* dEQP-GLES31.functional.ssbo.layout.random.vector_types.0 */ 300 INSTR_5XX(c6ea0008_14002600, "ldgb.untyped.4d.s32.3 r2.x, g[0], r0.x, r5.x"), /* ldgb.a.untyped.1dtype.s32.3 r2.x, g[r0.x], r5.x, 0 */ 301 INSTR_5XX(c6ea0204_1401a600, "ldgb.untyped.4d.s32.3 r1.x, g[1], r1.z, r5.x"), /* ldgb.a.untyped.1dtype.s32.3 r1.x, g[r1.z], r5.x, 1 */ 302 303 /* stgb */ 304 INSTR_5XX(c7220028_0480000d, "stgb.untyped.1d.f32.1 g[0], r1.z, 4, r10.x"), /* stgb.untyped.1d.1 g[r10.x], r1.z, 4, r0.x */ 305 INSTR_5XX(c7260023_02800009, "stgb.untyped.1d.u32.1 g[0], r1.x, 2, r8.w"), /* stgb.untyped.1d.1 g[r8.w], r1.x, 2, r0.x */ 306 307 /* discard stuff */ 308 INSTR_6XX(42b400f8_20010004, "cmps.s.eq p0.x, r1.x, 1"), 309 INSTR_6XX(02800000_00000000, "kill p0.x"), 310 311 /* Immediates */ 312 INSTR_6XX(40100007_68000008, "add.f r1.w, r2.x, (neg)(0.0)"), 313 INSTR_6XX(40100007_68010008, "add.f r1.w, r2.x, (neg)(0.5)"), 314 INSTR_6XX(40100007_68020008, "add.f r1.w, r2.x, (neg)(1.0)"), 315 INSTR_6XX(40100007_68030008, "add.f r1.w, r2.x, (neg)(2.0)"), 316 INSTR_6XX(40100007_68040008, "add.f r1.w, r2.x, (neg)(e)"), 317 INSTR_6XX(40100007_68050008, "add.f r1.w, r2.x, (neg)(pi)"), 318 INSTR_6XX(40100007_68060008, "add.f r1.w, r2.x, (neg)(1/pi)"), 319 INSTR_6XX(40100007_68070008, "add.f r1.w, r2.x, (neg)(1/log2(e))"), 320 INSTR_6XX(40100007_68080008, "add.f r1.w, r2.x, (neg)(log2(e))"), 321 INSTR_6XX(40100007_68090008, "add.f r1.w, r2.x, (neg)(1/log2(10))"), 322 INSTR_6XX(40100007_680a0008, "add.f r1.w, r2.x, (neg)(log2(10))"), 323 INSTR_6XX(40100007_680b0008, "add.f r1.w, r2.x, (neg)(4.0)"), 324 325 /* LDC. Our disasm differs greatly from qcom here, and we've got some 326 * important info they lack(?!), but same goes the other way. 327 */ 328 /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.uniform_fragment */ 329 INSTR_6XX(c0260000_00c78040, "ldc.offset0.1.uniform r0.x, 0, r0.x"), /* ldc.1.mode1.base0 r0.x, 0, r0.x */ 330 INSTR_6XX(c0260201_00c78040, "ldc.offset0.1.uniform r0.y, 0, r0.y"), /* ldc.1.mode1.base0 r0.y, 0, r0.y */ 331 /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.dynamically_uniform_fragment */ 332 INSTR_6XX(c0260000_00c78080, "ldc.offset0.1.nonuniform r0.x, 0, r0.x"), /* ldc.1.mode2.base0 r0.x, 0, r0.x */ 333 INSTR_6XX(c0260201_00c78080, "ldc.offset0.1.nonuniform r0.y, 0, r0.y"), /* ldc.1.mode2.base0 r0.y, 0, r0.y */ 334 335 /* a4xx-a5xx has the exact same instrs in 336 * dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.(dynamically_)uniform_fragment 337 * with no change based on the mode. Note that we can't decode this yet. 338 */ 339 /* INSTR_4XX(c7860000_00810001), */ /* ldc.1 r0.x, g[r1.x], 0, r0.x */ 340 /* INSTR_5XX(c7860000_00800000), */ /* ldc.a.1 r0.x, g[r0.x], 0, r0.x */ 341 342 /* custom */ 343 INSTR_6XX(c0260201_ffc78080, "ldc.offset0.1.nonuniform r0.y, 255, r0.y"), /* ldc.1.mode2.base0 r0.y, 255, r0.y */ 344 345 /* custom shaders, loading .x, .y, .z, .w from an array of vec4 in block 0 */ 346 INSTR_6XX(c0260000_00478000, "ldc.offset0.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */ 347 INSTR_6XX(c0260000_00478200, "ldc.offset1.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */ 348 INSTR_6XX(c0260000_00478400, "ldc.offset2.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */ 349 INSTR_6XX(c0260000_00478600, "ldc.offset3.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */ 350 351 /* dEQP-VK.glsl.conditionals.if.if_else_vertex */ 352 INSTR_6XX(c0360000_00c78100, "ldc.1.k.imm.base0 c[a1.x], 0, 0"), /* ldc.1.k.mode4.base0 c[a1.x], 0, 0 */ 353 /* custom */ 354 INSTR_6XX(c0360003_00c78100, "ldc.4.k.imm.base0 c[a1.x], 0, 0"), /* ldc.4.k.mode4.base0 c[a1.x], 0, 0 */ 355 356 /* dEQP-VK.glsl.struct.local.nested_struct_array_dynamic_index_fragment */ 357 INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y-176], r0.y, 1"), 358 INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r2.z, 2"), 359 INSTR_6XX(c1465ba0_01803e2a, "stp.u32 p[r11.y-96], r5.y, 1"), 360 INSTR_6XX(c0860008_01860001, "ldp.u32 r2.x, p[r6.x], 1"), 361 /* Custom stp based on above to catch a disasm bug. */ 362 INSTR_6XX(c1465b00_0180022a, "stp.u32 p[r11.y+256], r5.y, 1"), 363 364 /* Atomic: */ 365#if 0 366 /* TODO our encoding differs in b53 for these two */ 367 INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"), 368 INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"), 369#else 370 INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"), 371 INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"), 372#endif 373 INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"), 374 375 /* dEQP-VK.glsl.atomic_operations.add_unsigned_compute_reference */ 376 INSTR_6XX(c4160002_02000001, "atomic.g.add.untyped.1d.u32.1.g r0.z, r0.x, r0.z"), 377 378 /* Bindless atomic: */ 379 INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */ 380 INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */ 381 INSTR_6XX(c0360000_0365c800, "atomic.b.max.typed.1d.u32.1.imm r0.x, r0.w, 0"), /* atomic.b.max.g.u32.1d.mode0.base0 r0.x,r0.w,0 */ 382 383 /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.fragment.sampler2d */ 384 INSTR_6XX(a0c01f04_0cc00005, "sam (f32)(xyzw)r1.x, r0.z, s#6, t#6"), 385 386 /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.uniform.fragment.sampler2d */ 387 INSTR_4XX(a0c81f02_00800001, "sam.s2en.uniform (f32)(xyzw)r0.z, r0.x, hr1.x"), /* sam.s2en.mode0 (f32)(xyzw)r0.z, r0.x, hr1.x */ /* same for 5xx */ 388 INSTR_6XX(a0c81f07_0100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode0 (f32)(xyzw)r1.w, r1.y, hr2.x */ 389 390 /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2d */ 391 INSTR_4XX(a0c81f02_80800001, "sam.s2en.nonuniform (f32)(xyzw)r0.z, r0.x, hr1.x"), /* sam.s2en.uniform (f32)(xyzw)r0.z, r0.x, hr1.x */ /* same for 5xx */ 392 INSTR_6XX(a0c81f07_8100000b, "sam.s2en.nonuniform (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */ 393 394 /* NonUniform: */ 395 /* dEQP-VK.descriptor_indexing.storage_buffer */ 396 INSTR_6XX(c0260c0a_0a61b180, "ldib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.z, r1.z"), 397 INSTR_6XX(d0260e0a_09677180, "(sy)stib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.y, r1.w"), 398 /* dEQP-VK.descriptor_indexing.uniform_texel_buffer */ 399 INSTR_6XX(a0481f00_40000405, "isaml.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.z, r0.x"), 400 /* dEQP-VK.descriptor_indexing.storage_image */ 401 INSTR_6XX(d0360c04_02640b80, "(sy)atomic.b.add.typed.2d.u32.1.nonuniform.base0 r1.x, r0.z, r1.z"), 402 /* dEQP-VK.descriptor_indexing.sampler */ 403 INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"), 404 405 /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */ 406 INSTR_6XX(c0260001_00c98000, "getfiberid.u32 r0.y"), 407 408 /* Custom test since we've never seen the blob emit these. */ 409 INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"), 410 INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"), 411 412 /* cat7 */ 413 414 /* dEQP-VK.compute.basic.ssbo_local_barrier_single_invocation */ 415 INSTR_6XX(e0fa0000_00000000, "fence.g.l.r.w"), 416 INSTR_6XX(e09a0000_00000000, "fence.r.w"), 417 INSTR_6XX(f0420000_00000000, "(sy)bar.g"), 418 /* clang-format on */ 419}; 420 421static void 422trim(char *string) 423{ 424 for (int len = strlen(string); len > 0 && string[len - 1] == '\n'; len--) 425 string[len - 1] = 0; 426} 427 428int 429main(int argc, char **argv) 430{ 431 int retval = 0; 432 int decode_fails = 0, asm_fails = 0, encode_fails = 0; 433 const int output_size = 4096; 434 char *disasm_output = malloc(output_size); 435 FILE *fdisasm = fmemopen(disasm_output, output_size, "w+"); 436 if (!fdisasm) { 437 fprintf(stderr, "failed to fmemopen\n"); 438 return 1; 439 } 440 441 struct ir3_compiler *compilers[10] = {}; 442 struct fd_dev_id dev_ids[ARRAY_SIZE(compilers)]; 443 444 for (int i = 0; i < ARRAY_SIZE(tests); i++) { 445 const struct test *test = &tests[i]; 446 printf("Testing a%d %s: \"%s\"...\n", test->gpu_id, test->instr, 447 test->expected); 448 449 rewind(fdisasm); 450 memset(disasm_output, 0, output_size); 451 452 /* 453 * Test disassembly: 454 */ 455 456 uint32_t code[2] = { 457 strtoll(&test->instr[9], NULL, 16), 458 strtoll(&test->instr[0], NULL, 16), 459 }; 460 isa_decode(code, 8, fdisasm, 461 &(struct isa_decode_options){ 462 .gpu_id = test->gpu_id, 463 .show_errors = true, 464 }); 465 fflush(fdisasm); 466 467 trim(disasm_output); 468 469 if (strcmp(disasm_output, test->expected) != 0) { 470 printf("FAIL: disasm\n"); 471 printf(" Expected: \"%s\"\n", test->expected); 472 printf(" Got: \"%s\"\n", disasm_output); 473 retval = 1; 474 decode_fails++; 475 } 476 477 /* 478 * Test assembly, which should result in the identical binary: 479 */ 480 481 unsigned gen = test->gpu_id / 100; 482 if (!compilers[gen]) { 483 dev_ids[gen].gpu_id = test->gpu_id; 484 compilers[gen] = ir3_compiler_create(NULL, &dev_ids[gen], 485 &(struct ir3_compiler_options){}); 486 } 487 488 FILE *fasm = 489 fmemopen((void *)test->expected, strlen(test->expected), "r"); 490 491 struct ir3_kernel_info info = {}; 492 struct ir3_shader *shader = ir3_parse_asm(compilers[gen], &info, fasm); 493 fclose(fasm); 494 if (!shader) { 495 printf("FAIL: %sexpected assembler fail\n", 496 test->parse_fail ? "" : "un"); 497 asm_fails++; 498 /* If this is an instruction that the asm parser is not expected 499 * to handle, don't count it as a fail. 500 */ 501 if (!test->parse_fail) 502 retval = 1; 503 continue; 504 } else if (test->parse_fail) { 505 /* If asm parse starts passing, and we don't expect that, flag 506 * it as a fail so we don't forget to update the test vector: 507 */ 508 printf( 509 "FAIL: unexpected parse success, please remove '.parse_fail=true'\n"); 510 retval = 1; 511 } 512 513 struct ir3_shader_variant *v = shader->variants; 514 if (memcmp(v->bin, code, sizeof(code))) { 515 printf("FAIL: assembler\n"); 516 printf(" Expected: %08x_%08x\n", code[1], code[0]); 517 printf(" Got: %08x_%08x\n", v->bin[1], v->bin[0]); 518 retval = 1; 519 encode_fails++; 520 } 521 522 ir3_shader_destroy(shader); 523 } 524 525 if (decode_fails) 526 printf("%d/%d decode fails\n", decode_fails, (int)ARRAY_SIZE(tests)); 527 if (asm_fails) 528 printf("%d/%d assembler fails\n", asm_fails, (int)ARRAY_SIZE(tests)); 529 if (encode_fails) 530 printf("%d/%d encode fails\n", encode_fails, (int)ARRAY_SIZE(tests)); 531 532 if (retval) { 533 printf("FAILED!\n"); 534 } else { 535 printf("PASSED!\n"); 536 } 537 538 for (unsigned i = 0; i < ARRAY_SIZE(compilers); i++) { 539 if (!compilers[i]) 540 continue; 541 ir3_compiler_destroy(compilers[i]); 542 } 543 544 fclose(fdisasm); 545 free(disasm_output); 546 547 return retval; 548} 549