1/* 2 * Copyright (C) 2021 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 */ 24 25#include <stdio.h> 26#include "pan_bo.h" 27#include "pan_shader.h" 28#include "pan_scoreboard.h" 29#include "pan_encoder.h" 30#include "pan_indirect_dispatch.h" 31#include "pan_pool.h" 32#include "pan_util.h" 33#include "compiler/nir/nir_builder.h" 34#include "util/u_memory.h" 35#include "util/macros.h" 36 37#define get_input_field(b, name) \ 38 nir_load_push_constant(b, \ 39 1, sizeof(((struct pan_indirect_dispatch_info *)0)->name) * 8, \ 40 nir_imm_int(b, 0), \ 41 .base = offsetof(struct pan_indirect_dispatch_info, name)) 42 43static mali_ptr 44get_rsd(const struct panfrost_device *dev) 45{ 46 return dev->indirect_dispatch.descs->ptr.gpu; 47} 48 49static mali_ptr 50get_tls(const struct panfrost_device *dev) 51{ 52 return dev->indirect_dispatch.descs->ptr.gpu + 53 pan_size(RENDERER_STATE); 54} 55 56unsigned 57GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool, 58 struct pan_scoreboard *scoreboard, 59 const struct pan_indirect_dispatch_info *inputs) 60{ 61 struct panfrost_device *dev = pool->dev; 62 struct panfrost_ptr job = 63 pan_pool_alloc_desc(pool, COMPUTE_JOB); 64 void *invocation = 65 pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION); 66 67 panfrost_pack_work_groups_compute(invocation, 68 1, 1, 1, 1, 1, 1, 69 false, false); 70 71 pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { 72 cfg.job_task_split = 2; 73 } 74 75 pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) { 76 cfg.state = get_rsd(dev); 77 cfg.thread_storage = get_tls(pool->dev); 78 cfg.push_uniforms = 79 pan_pool_upload_aligned(pool, inputs, sizeof(*inputs), 16); 80 } 81 82 return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE, 83 false, true, 0, 0, &job, false); 84} 85 86void 87GENX(pan_indirect_dispatch_init)(struct panfrost_device *dev) 88{ 89 nir_builder b = 90 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 91 GENX(pan_shader_get_compiler_options)(), 92 "%s", "indirect_dispatch"); 93 nir_ssa_def *zero = nir_imm_int(&b, 0); 94 nir_ssa_def *one = nir_imm_int(&b, 1); 95 nir_ssa_def *num_wg = nir_load_global(&b, get_input_field(&b, indirect_dim), 4, 3, 32); 96 nir_ssa_def *num_wg_x = nir_channel(&b, num_wg, 0); 97 nir_ssa_def *num_wg_y = nir_channel(&b, num_wg, 1); 98 nir_ssa_def *num_wg_z = nir_channel(&b, num_wg, 2); 99 100 nir_ssa_def *job_hdr_ptr = get_input_field(&b, job); 101 nir_ssa_def *num_wg_flat = nir_imul(&b, num_wg_x, nir_imul(&b, num_wg_y, num_wg_z)); 102 103 nir_push_if(&b, nir_ieq(&b, num_wg_flat, zero)); 104 { 105 nir_ssa_def *type_ptr = nir_iadd(&b, job_hdr_ptr, nir_imm_int64(&b, 4 * 4)); 106 nir_ssa_def *ntype = nir_imm_intN_t(&b, (MALI_JOB_TYPE_NULL << 1) | 1, 8); 107 nir_store_global(&b, type_ptr, 1, ntype, 1); 108 } 109 nir_push_else(&b, NULL); 110 { 111 nir_ssa_def *job_dim_ptr = nir_iadd(&b, job_hdr_ptr, 112 nir_imm_int64(&b, pan_section_offset(COMPUTE_JOB, INVOCATION))); 113 nir_ssa_def *num_wg_x_m1 = nir_isub(&b, num_wg_x, one); 114 nir_ssa_def *num_wg_y_m1 = nir_isub(&b, num_wg_y, one); 115 nir_ssa_def *num_wg_z_m1 = nir_isub(&b, num_wg_z, one); 116 nir_ssa_def *job_dim = nir_load_global(&b, job_dim_ptr, 8, 2, 32); 117 nir_ssa_def *dims = nir_channel(&b, job_dim, 0); 118 nir_ssa_def *split = nir_channel(&b, job_dim, 1); 119 nir_ssa_def *num_wg_x_split = nir_iand_imm(&b, nir_ushr_imm(&b, split, 10), 0x3f); 120 nir_ssa_def *num_wg_y_split = nir_iadd(&b, num_wg_x_split, 121 nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_x_m1))); 122 nir_ssa_def *num_wg_z_split = nir_iadd(&b, num_wg_y_split, 123 nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_y_m1))); 124 split = nir_ior(&b, split, 125 nir_ior(&b, 126 nir_ishl(&b, num_wg_y_split, nir_imm_int(&b, 16)), 127 nir_ishl(&b, num_wg_z_split, nir_imm_int(&b, 22)))); 128 dims = nir_ior(&b, dims, 129 nir_ior(&b, nir_ishl(&b, num_wg_x_m1, num_wg_x_split), 130 nir_ior(&b, nir_ishl(&b, num_wg_y_m1, num_wg_y_split), 131 nir_ishl(&b, num_wg_z_m1, num_wg_z_split)))); 132 133 nir_store_global(&b, job_dim_ptr, 8, nir_vec2(&b, dims, split), 3); 134 135 nir_ssa_def *num_wg_x_ptr = get_input_field(&b, num_wg_sysval[0]); 136 137 nir_push_if(&b, nir_ine(&b, num_wg_x_ptr, nir_imm_int64(&b, 0))); 138 { 139 nir_store_global(&b, num_wg_x_ptr, 8, num_wg_x, 1); 140 nir_store_global(&b, get_input_field(&b, num_wg_sysval[1]), 8, num_wg_y, 1); 141 nir_store_global(&b, get_input_field(&b, num_wg_sysval[2]), 8, num_wg_z, 1); 142 } 143 nir_pop_if(&b, NULL); 144 } 145 146 nir_pop_if(&b, NULL); 147 148 struct panfrost_compile_inputs inputs = { 149 .gpu_id = dev->gpu_id, 150 .fixed_sysval_ubo = -1, 151 .no_ubo_to_push = true, 152 }; 153 struct pan_shader_info shader_info; 154 struct util_dynarray binary; 155 156 util_dynarray_init(&binary, NULL); 157 GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader_info); 158 159 ralloc_free(b.shader); 160 161 assert(!shader_info.tls_size); 162 assert(!shader_info.wls_size); 163 assert(!shader_info.sysvals.sysval_count); 164 165 shader_info.push.count = 166 DIV_ROUND_UP(sizeof(struct pan_indirect_dispatch_info), 4); 167 168 dev->indirect_dispatch.bin = 169 panfrost_bo_create(dev, binary.size, PAN_BO_EXECUTE, 170 "Indirect dispatch shader"); 171 172 memcpy(dev->indirect_dispatch.bin->ptr.cpu, binary.data, binary.size); 173 util_dynarray_fini(&binary); 174 175 dev->indirect_dispatch.descs = 176 panfrost_bo_create(dev, 177 pan_size(RENDERER_STATE) + 178 pan_size(LOCAL_STORAGE), 179 0, "Indirect dispatch descriptors"); 180 181 mali_ptr address = dev->indirect_dispatch.bin->ptr.gpu; 182 183 void *rsd = dev->indirect_dispatch.descs->ptr.cpu; 184 pan_pack(rsd, RENDERER_STATE, cfg) { 185 pan_shader_prepare_rsd(&shader_info, address, &cfg); 186 } 187 188 void *tsd = dev->indirect_dispatch.descs->ptr.cpu + 189 pan_size(RENDERER_STATE); 190 pan_pack(tsd, LOCAL_STORAGE, ls) { 191 ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM; 192 }; 193} 194 195void 196GENX(pan_indirect_dispatch_cleanup)(struct panfrost_device *dev) 197{ 198 panfrost_bo_unreference(dev->indirect_dispatch.bin); 199 panfrost_bo_unreference(dev->indirect_dispatch.descs); 200} 201