1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2016 Red Hat. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 8bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 9bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci#include "util/u_inlines.h" 24bf215546Sopenharmony_ci#include "util/u_math.h" 25bf215546Sopenharmony_ci#include "util/u_memory.h" 26bf215546Sopenharmony_ci#include "pipe/p_shader_tokens.h" 27bf215546Sopenharmony_ci#include "draw/draw_context.h" 28bf215546Sopenharmony_ci#include "draw/draw_vertex.h" 29bf215546Sopenharmony_ci#include "sp_context.h" 30bf215546Sopenharmony_ci#include "sp_screen.h" 31bf215546Sopenharmony_ci#include "sp_state.h" 32bf215546Sopenharmony_ci#include "sp_texture.h" 33bf215546Sopenharmony_ci#include "sp_tex_sample.h" 34bf215546Sopenharmony_ci#include "sp_tex_tile_cache.h" 35bf215546Sopenharmony_ci#include "tgsi/tgsi_parse.h" 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_cistatic void 38bf215546Sopenharmony_cics_prepare(const struct sp_compute_shader *cs, 39bf215546Sopenharmony_ci struct tgsi_exec_machine *machine, 40bf215546Sopenharmony_ci int local_x, int local_y, int local_z, 41bf215546Sopenharmony_ci int g_w, int g_h, int g_d, 42bf215546Sopenharmony_ci int b_w, int b_h, int b_d, 43bf215546Sopenharmony_ci struct tgsi_sampler *sampler, 44bf215546Sopenharmony_ci struct tgsi_image *image, 45bf215546Sopenharmony_ci struct tgsi_buffer *buffer ) 46bf215546Sopenharmony_ci{ 47bf215546Sopenharmony_ci int j; 48bf215546Sopenharmony_ci /* 49bf215546Sopenharmony_ci * Bind tokens/shader to the interpreter's machine state. 50bf215546Sopenharmony_ci */ 51bf215546Sopenharmony_ci tgsi_exec_machine_bind_shader(machine, 52bf215546Sopenharmony_ci cs->tokens, 53bf215546Sopenharmony_ci sampler, image, buffer); 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) { 56bf215546Sopenharmony_ci unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID]; 57bf215546Sopenharmony_ci for (j = 0; j < TGSI_QUAD_SIZE; j++) { 58bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[0].i[j] = local_x + j; 59bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[1].i[j] = local_y; 60bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[2].i[j] = local_z; 61bf215546Sopenharmony_ci } 62bf215546Sopenharmony_ci } 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) { 65bf215546Sopenharmony_ci unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE]; 66bf215546Sopenharmony_ci for (j = 0; j < TGSI_QUAD_SIZE; j++) { 67bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[0].i[j] = g_w; 68bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[1].i[j] = g_h; 69bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[2].i[j] = g_d; 70bf215546Sopenharmony_ci } 71bf215546Sopenharmony_ci } 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) { 74bf215546Sopenharmony_ci unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE]; 75bf215546Sopenharmony_ci for (j = 0; j < TGSI_QUAD_SIZE; j++) { 76bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[0].i[j] = b_w; 77bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[1].i[j] = b_h; 78bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[2].i[j] = b_d; 79bf215546Sopenharmony_ci } 80bf215546Sopenharmony_ci } 81bf215546Sopenharmony_ci} 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_cistatic bool 84bf215546Sopenharmony_cics_run(const struct sp_compute_shader *cs, 85bf215546Sopenharmony_ci int g_w, int g_h, int g_d, 86bf215546Sopenharmony_ci struct tgsi_exec_machine *machine, bool restart) 87bf215546Sopenharmony_ci{ 88bf215546Sopenharmony_ci if (!restart) { 89bf215546Sopenharmony_ci if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) { 90bf215546Sopenharmony_ci unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID]; 91bf215546Sopenharmony_ci int j; 92bf215546Sopenharmony_ci for (j = 0; j < TGSI_QUAD_SIZE; j++) { 93bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[0].i[j] = g_w; 94bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[1].i[j] = g_h; 95bf215546Sopenharmony_ci machine->SystemValue[i].xyzw[2].i[j] = g_d; 96bf215546Sopenharmony_ci } 97bf215546Sopenharmony_ci } 98bf215546Sopenharmony_ci } 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci tgsi_exec_machine_run(machine, restart ? machine->pc : 0); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci if (machine->pc != -1) 103bf215546Sopenharmony_ci return true; 104bf215546Sopenharmony_ci return false; 105bf215546Sopenharmony_ci} 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_cistatic void 108bf215546Sopenharmony_cirun_workgroup(const struct sp_compute_shader *cs, 109bf215546Sopenharmony_ci int g_w, int g_h, int g_d, int num_threads, 110bf215546Sopenharmony_ci struct tgsi_exec_machine **machines) 111bf215546Sopenharmony_ci{ 112bf215546Sopenharmony_ci int i; 113bf215546Sopenharmony_ci bool grp_hit_barrier, restart_threads = false; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci do { 116bf215546Sopenharmony_ci grp_hit_barrier = false; 117bf215546Sopenharmony_ci for (i = 0; i < num_threads; i++) { 118bf215546Sopenharmony_ci grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads); 119bf215546Sopenharmony_ci } 120bf215546Sopenharmony_ci restart_threads = false; 121bf215546Sopenharmony_ci if (grp_hit_barrier) { 122bf215546Sopenharmony_ci grp_hit_barrier = false; 123bf215546Sopenharmony_ci restart_threads = true; 124bf215546Sopenharmony_ci } 125bf215546Sopenharmony_ci } while (restart_threads); 126bf215546Sopenharmony_ci} 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_cistatic void 129bf215546Sopenharmony_cics_delete(const struct sp_compute_shader *cs, 130bf215546Sopenharmony_ci struct tgsi_exec_machine *machine) 131bf215546Sopenharmony_ci{ 132bf215546Sopenharmony_ci if (machine->Tokens == cs->tokens) { 133bf215546Sopenharmony_ci tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL); 134bf215546Sopenharmony_ci } 135bf215546Sopenharmony_ci} 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_cistatic void 138bf215546Sopenharmony_cifill_grid_size(struct pipe_context *context, 139bf215546Sopenharmony_ci const struct pipe_grid_info *info, 140bf215546Sopenharmony_ci uint32_t grid_size[3]) 141bf215546Sopenharmony_ci{ 142bf215546Sopenharmony_ci struct pipe_transfer *transfer; 143bf215546Sopenharmony_ci uint32_t *params; 144bf215546Sopenharmony_ci if (!info->indirect) { 145bf215546Sopenharmony_ci grid_size[0] = info->grid[0]; 146bf215546Sopenharmony_ci grid_size[1] = info->grid[1]; 147bf215546Sopenharmony_ci grid_size[2] = info->grid[2]; 148bf215546Sopenharmony_ci return; 149bf215546Sopenharmony_ci } 150bf215546Sopenharmony_ci params = pipe_buffer_map_range(context, info->indirect, 151bf215546Sopenharmony_ci info->indirect_offset, 152bf215546Sopenharmony_ci 3 * sizeof(uint32_t), 153bf215546Sopenharmony_ci PIPE_MAP_READ, 154bf215546Sopenharmony_ci &transfer); 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci if (!transfer) 157bf215546Sopenharmony_ci return; 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci grid_size[0] = params[0]; 160bf215546Sopenharmony_ci grid_size[1] = params[1]; 161bf215546Sopenharmony_ci grid_size[2] = params[2]; 162bf215546Sopenharmony_ci pipe_buffer_unmap(context, transfer); 163bf215546Sopenharmony_ci} 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_civoid 166bf215546Sopenharmony_cisoftpipe_launch_grid(struct pipe_context *context, 167bf215546Sopenharmony_ci const struct pipe_grid_info *info) 168bf215546Sopenharmony_ci{ 169bf215546Sopenharmony_ci struct softpipe_context *softpipe = softpipe_context(context); 170bf215546Sopenharmony_ci struct sp_compute_shader *cs = softpipe->cs; 171bf215546Sopenharmony_ci int num_threads_in_group; 172bf215546Sopenharmony_ci struct tgsi_exec_machine **machines; 173bf215546Sopenharmony_ci int bwidth, bheight, bdepth; 174bf215546Sopenharmony_ci int local_x, local_y, local_z, i; 175bf215546Sopenharmony_ci int g_w, g_h, g_d; 176bf215546Sopenharmony_ci uint32_t grid_size[3] = {0}; 177bf215546Sopenharmony_ci void *local_mem = NULL; 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci softpipe_update_compute_samplers(softpipe); 180bf215546Sopenharmony_ci bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH]; 181bf215546Sopenharmony_ci bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT]; 182bf215546Sopenharmony_ci bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]; 183bf215546Sopenharmony_ci num_threads_in_group = DIV_ROUND_UP(bwidth, TGSI_QUAD_SIZE) * bheight * bdepth; 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci fill_grid_size(context, info, grid_size); 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci if (cs->shader.req_local_mem) { 188bf215546Sopenharmony_ci local_mem = CALLOC(1, cs->shader.req_local_mem); 189bf215546Sopenharmony_ci } 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group); 192bf215546Sopenharmony_ci if (!machines) { 193bf215546Sopenharmony_ci FREE(local_mem); 194bf215546Sopenharmony_ci return; 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci /* initialise machines + GRID_SIZE + THREAD_ID + BLOCK_SIZE */ 198bf215546Sopenharmony_ci int idx = 0; 199bf215546Sopenharmony_ci for (local_z = 0; local_z < bdepth; local_z++) { 200bf215546Sopenharmony_ci for (local_y = 0; local_y < bheight; local_y++) { 201bf215546Sopenharmony_ci for (local_x = 0; local_x < bwidth; local_x += TGSI_QUAD_SIZE) { 202bf215546Sopenharmony_ci machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE); 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci machines[idx]->LocalMem = local_mem; 205bf215546Sopenharmony_ci machines[idx]->LocalMemSize = cs->shader.req_local_mem; 206bf215546Sopenharmony_ci machines[idx]->NonHelperMask = (1 << (MIN2(TGSI_QUAD_SIZE, bwidth - local_x))) - 1; 207bf215546Sopenharmony_ci cs_prepare(cs, machines[idx], 208bf215546Sopenharmony_ci local_x, local_y, local_z, 209bf215546Sopenharmony_ci grid_size[0], grid_size[1], grid_size[2], 210bf215546Sopenharmony_ci bwidth, bheight, bdepth, 211bf215546Sopenharmony_ci (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE], 212bf215546Sopenharmony_ci (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE], 213bf215546Sopenharmony_ci (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]); 214bf215546Sopenharmony_ci tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS, 215bf215546Sopenharmony_ci softpipe->mapped_constants[PIPE_SHADER_COMPUTE], 216bf215546Sopenharmony_ci softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]); 217bf215546Sopenharmony_ci idx++; 218bf215546Sopenharmony_ci } 219bf215546Sopenharmony_ci } 220bf215546Sopenharmony_ci } 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci for (g_d = 0; g_d < grid_size[2]; g_d++) { 223bf215546Sopenharmony_ci for (g_h = 0; g_h < grid_size[1]; g_h++) { 224bf215546Sopenharmony_ci for (g_w = 0; g_w < grid_size[0]; g_w++) { 225bf215546Sopenharmony_ci run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines); 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci } 228bf215546Sopenharmony_ci } 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci if (softpipe->active_statistics_queries) { 231bf215546Sopenharmony_ci softpipe->pipeline_statistics.cs_invocations += 232bf215546Sopenharmony_ci grid_size[0] * grid_size[1] * grid_size[2]; 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci for (i = 0; i < num_threads_in_group; i++) { 236bf215546Sopenharmony_ci cs_delete(cs, machines[i]); 237bf215546Sopenharmony_ci tgsi_exec_machine_destroy(machines[i]); 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci FREE(local_mem); 241bf215546Sopenharmony_ci FREE(machines); 242bf215546Sopenharmony_ci} 243