1/* 2 * Copyright 2016 Red Hat. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "util/u_inlines.h" 24#include "util/u_math.h" 25#include "util/u_memory.h" 26#include "pipe/p_shader_tokens.h" 27#include "draw/draw_context.h" 28#include "draw/draw_vertex.h" 29#include "sp_context.h" 30#include "sp_screen.h" 31#include "sp_state.h" 32#include "sp_texture.h" 33#include "sp_tex_sample.h" 34#include "sp_tex_tile_cache.h" 35#include "tgsi/tgsi_parse.h" 36 37static void 38cs_prepare(const struct sp_compute_shader *cs, 39 struct tgsi_exec_machine *machine, 40 int local_x, int local_y, int local_z, 41 int g_w, int g_h, int g_d, 42 int b_w, int b_h, int b_d, 43 struct tgsi_sampler *sampler, 44 struct tgsi_image *image, 45 struct tgsi_buffer *buffer ) 46{ 47 int j; 48 /* 49 * Bind tokens/shader to the interpreter's machine state. 50 */ 51 tgsi_exec_machine_bind_shader(machine, 52 cs->tokens, 53 sampler, image, buffer); 54 55 if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) { 56 unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID]; 57 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 58 machine->SystemValue[i].xyzw[0].i[j] = local_x + j; 59 machine->SystemValue[i].xyzw[1].i[j] = local_y; 60 machine->SystemValue[i].xyzw[2].i[j] = local_z; 61 } 62 } 63 64 if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) { 65 unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE]; 66 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 67 machine->SystemValue[i].xyzw[0].i[j] = g_w; 68 machine->SystemValue[i].xyzw[1].i[j] = g_h; 69 machine->SystemValue[i].xyzw[2].i[j] = g_d; 70 } 71 } 72 73 if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) { 74 unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE]; 75 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 76 machine->SystemValue[i].xyzw[0].i[j] = b_w; 77 machine->SystemValue[i].xyzw[1].i[j] = b_h; 78 machine->SystemValue[i].xyzw[2].i[j] = b_d; 79 } 80 } 81} 82 83static bool 84cs_run(const struct sp_compute_shader *cs, 85 int g_w, int g_h, int g_d, 86 struct tgsi_exec_machine *machine, bool restart) 87{ 88 if (!restart) { 89 if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) { 90 unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID]; 91 int j; 92 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 93 machine->SystemValue[i].xyzw[0].i[j] = g_w; 94 machine->SystemValue[i].xyzw[1].i[j] = g_h; 95 machine->SystemValue[i].xyzw[2].i[j] = g_d; 96 } 97 } 98 } 99 100 tgsi_exec_machine_run(machine, restart ? machine->pc : 0); 101 102 if (machine->pc != -1) 103 return true; 104 return false; 105} 106 107static void 108run_workgroup(const struct sp_compute_shader *cs, 109 int g_w, int g_h, int g_d, int num_threads, 110 struct tgsi_exec_machine **machines) 111{ 112 int i; 113 bool grp_hit_barrier, restart_threads = false; 114 115 do { 116 grp_hit_barrier = false; 117 for (i = 0; i < num_threads; i++) { 118 grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads); 119 } 120 restart_threads = false; 121 if (grp_hit_barrier) { 122 grp_hit_barrier = false; 123 restart_threads = true; 124 } 125 } while (restart_threads); 126} 127 128static void 129cs_delete(const struct sp_compute_shader *cs, 130 struct tgsi_exec_machine *machine) 131{ 132 if (machine->Tokens == cs->tokens) { 133 tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL); 134 } 135} 136 137static void 138fill_grid_size(struct pipe_context *context, 139 const struct pipe_grid_info *info, 140 uint32_t grid_size[3]) 141{ 142 struct pipe_transfer *transfer; 143 uint32_t *params; 144 if (!info->indirect) { 145 grid_size[0] = info->grid[0]; 146 grid_size[1] = info->grid[1]; 147 grid_size[2] = info->grid[2]; 148 return; 149 } 150 params = pipe_buffer_map_range(context, info->indirect, 151 info->indirect_offset, 152 3 * sizeof(uint32_t), 153 PIPE_MAP_READ, 154 &transfer); 155 156 if (!transfer) 157 return; 158 159 grid_size[0] = params[0]; 160 grid_size[1] = params[1]; 161 grid_size[2] = params[2]; 162 pipe_buffer_unmap(context, transfer); 163} 164 165void 166softpipe_launch_grid(struct pipe_context *context, 167 const struct pipe_grid_info *info) 168{ 169 struct softpipe_context *softpipe = softpipe_context(context); 170 struct sp_compute_shader *cs = softpipe->cs; 171 int num_threads_in_group; 172 struct tgsi_exec_machine **machines; 173 int bwidth, bheight, bdepth; 174 int local_x, local_y, local_z, i; 175 int g_w, g_h, g_d; 176 uint32_t grid_size[3] = {0}; 177 void *local_mem = NULL; 178 179 softpipe_update_compute_samplers(softpipe); 180 bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH]; 181 bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT]; 182 bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]; 183 num_threads_in_group = DIV_ROUND_UP(bwidth, TGSI_QUAD_SIZE) * bheight * bdepth; 184 185 fill_grid_size(context, info, grid_size); 186 187 if (cs->shader.req_local_mem) { 188 local_mem = CALLOC(1, cs->shader.req_local_mem); 189 } 190 191 machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group); 192 if (!machines) { 193 FREE(local_mem); 194 return; 195 } 196 197 /* initialise machines + GRID_SIZE + THREAD_ID + BLOCK_SIZE */ 198 int idx = 0; 199 for (local_z = 0; local_z < bdepth; local_z++) { 200 for (local_y = 0; local_y < bheight; local_y++) { 201 for (local_x = 0; local_x < bwidth; local_x += TGSI_QUAD_SIZE) { 202 machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE); 203 204 machines[idx]->LocalMem = local_mem; 205 machines[idx]->LocalMemSize = cs->shader.req_local_mem; 206 machines[idx]->NonHelperMask = (1 << (MIN2(TGSI_QUAD_SIZE, bwidth - local_x))) - 1; 207 cs_prepare(cs, machines[idx], 208 local_x, local_y, local_z, 209 grid_size[0], grid_size[1], grid_size[2], 210 bwidth, bheight, bdepth, 211 (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE], 212 (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE], 213 (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]); 214 tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS, 215 softpipe->mapped_constants[PIPE_SHADER_COMPUTE], 216 softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]); 217 idx++; 218 } 219 } 220 } 221 222 for (g_d = 0; g_d < grid_size[2]; g_d++) { 223 for (g_h = 0; g_h < grid_size[1]; g_h++) { 224 for (g_w = 0; g_w < grid_size[0]; g_w++) { 225 run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines); 226 } 227 } 228 } 229 230 if (softpipe->active_statistics_queries) { 231 softpipe->pipeline_statistics.cs_invocations += 232 grid_size[0] * grid_size[1] * grid_size[2]; 233 } 234 235 for (i = 0; i < num_threads_in_group; i++) { 236 cs_delete(cs, machines[i]); 237 tgsi_exec_machine_destroy(machines[i]); 238 } 239 240 FREE(local_mem); 241 FREE(machines); 242} 243