1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "brw_private.h" 25bf215546Sopenharmony_ci#include "compiler/shader_info.h" 26bf215546Sopenharmony_ci#include "intel/dev/intel_debug.h" 27bf215546Sopenharmony_ci#include "intel/dev/intel_device_info.h" 28bf215546Sopenharmony_ci#include "util/ralloc.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ciunsigned 31bf215546Sopenharmony_cibrw_required_dispatch_width(const struct shader_info *info) 32bf215546Sopenharmony_ci{ 33bf215546Sopenharmony_ci if ((int)info->subgroup_size >= (int)SUBGROUP_SIZE_REQUIRE_8) { 34bf215546Sopenharmony_ci assert(gl_shader_stage_uses_workgroup(info->stage)); 35bf215546Sopenharmony_ci /* These enum values are expressly chosen to be equal to the subgroup 36bf215546Sopenharmony_ci * size that they require. 37bf215546Sopenharmony_ci */ 38bf215546Sopenharmony_ci return (unsigned)info->subgroup_size; 39bf215546Sopenharmony_ci } else { 40bf215546Sopenharmony_ci return 0; 41bf215546Sopenharmony_ci } 42bf215546Sopenharmony_ci} 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_cistatic inline bool 45bf215546Sopenharmony_citest_bit(unsigned mask, unsigned bit) { 46bf215546Sopenharmony_ci return mask & (1u << bit); 47bf215546Sopenharmony_ci} 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_cibool 50bf215546Sopenharmony_cibrw_simd_should_compile(void *mem_ctx, 51bf215546Sopenharmony_ci unsigned simd, 52bf215546Sopenharmony_ci const struct intel_device_info *devinfo, 53bf215546Sopenharmony_ci struct brw_cs_prog_data *prog_data, 54bf215546Sopenharmony_ci unsigned required, 55bf215546Sopenharmony_ci const char **error) 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci{ 58bf215546Sopenharmony_ci assert(!test_bit(prog_data->prog_mask, simd)); 59bf215546Sopenharmony_ci assert(error); 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci const unsigned width = 8u << simd; 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci /* For shaders with variable size workgroup, we will always compile all the 64bf215546Sopenharmony_ci * variants, since the choice will happen only at dispatch time. 65bf215546Sopenharmony_ci */ 66bf215546Sopenharmony_ci const bool workgroup_size_variable = prog_data->local_size[0] == 0; 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci if (!workgroup_size_variable) { 69bf215546Sopenharmony_ci if (test_bit(prog_data->prog_spilled, simd)) { 70bf215546Sopenharmony_ci *error = ralloc_asprintf( 71bf215546Sopenharmony_ci mem_ctx, "SIMD%u skipped because would spill", width); 72bf215546Sopenharmony_ci return false; 73bf215546Sopenharmony_ci } 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci const unsigned workgroup_size = prog_data->local_size[0] * 76bf215546Sopenharmony_ci prog_data->local_size[1] * 77bf215546Sopenharmony_ci prog_data->local_size[2]; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci unsigned max_threads = devinfo->max_cs_workgroup_threads; 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci if (required && required != width) { 82bf215546Sopenharmony_ci *error = ralloc_asprintf( 83bf215546Sopenharmony_ci mem_ctx, "SIMD%u skipped because required dispatch width is %u", 84bf215546Sopenharmony_ci width, required); 85bf215546Sopenharmony_ci return false; 86bf215546Sopenharmony_ci } 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci if (simd > 0 && test_bit(prog_data->prog_mask, simd - 1) && 89bf215546Sopenharmony_ci workgroup_size <= (width / 2)) { 90bf215546Sopenharmony_ci *error = ralloc_asprintf( 91bf215546Sopenharmony_ci mem_ctx, "SIMD%u skipped because workgroup size %u already fits in SIMD%u", 92bf215546Sopenharmony_ci width, workgroup_size, width / 2); 93bf215546Sopenharmony_ci return false; 94bf215546Sopenharmony_ci } 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci if (DIV_ROUND_UP(workgroup_size, width) > max_threads) { 97bf215546Sopenharmony_ci *error = ralloc_asprintf( 98bf215546Sopenharmony_ci mem_ctx, "SIMD%u can't fit all %u invocations in %u threads", 99bf215546Sopenharmony_ci width, workgroup_size, max_threads); 100bf215546Sopenharmony_ci return false; 101bf215546Sopenharmony_ci } 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci /* The SIMD32 is only enabled for cases it is needed unless forced. 104bf215546Sopenharmony_ci * 105bf215546Sopenharmony_ci * TODO: Use performance_analysis and drop this rule. 106bf215546Sopenharmony_ci */ 107bf215546Sopenharmony_ci if (width == 32) { 108bf215546Sopenharmony_ci if (!INTEL_DEBUG(DEBUG_DO32) && prog_data->prog_mask) { 109bf215546Sopenharmony_ci *error = ralloc_strdup( 110bf215546Sopenharmony_ci mem_ctx, "SIMD32 skipped because not required"); 111bf215546Sopenharmony_ci return false; 112bf215546Sopenharmony_ci } 113bf215546Sopenharmony_ci } 114bf215546Sopenharmony_ci } 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci const bool env_skip[3] = { 117bf215546Sopenharmony_ci INTEL_DEBUG(DEBUG_NO8), 118bf215546Sopenharmony_ci INTEL_DEBUG(DEBUG_NO16), 119bf215546Sopenharmony_ci INTEL_DEBUG(DEBUG_NO32), 120bf215546Sopenharmony_ci }; 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci if (unlikely(env_skip[simd])) { 123bf215546Sopenharmony_ci *error = ralloc_asprintf( 124bf215546Sopenharmony_ci mem_ctx, "SIMD%u skipped because INTEL_DEBUG=no%u", 125bf215546Sopenharmony_ci width, width); 126bf215546Sopenharmony_ci return false; 127bf215546Sopenharmony_ci } 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci return true; 130bf215546Sopenharmony_ci} 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_civoid 133bf215546Sopenharmony_cibrw_simd_mark_compiled(unsigned simd, struct brw_cs_prog_data *prog_data, bool spilled) 134bf215546Sopenharmony_ci{ 135bf215546Sopenharmony_ci assert(!test_bit(prog_data->prog_mask, simd)); 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci prog_data->prog_mask |= 1u << simd; 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci /* If a SIMD spilled, all the larger ones would spill too. */ 140bf215546Sopenharmony_ci if (spilled) { 141bf215546Sopenharmony_ci for (unsigned i = simd; i < 3; i++) 142bf215546Sopenharmony_ci prog_data->prog_spilled |= 1u << i; 143bf215546Sopenharmony_ci } 144bf215546Sopenharmony_ci} 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ciint 147bf215546Sopenharmony_cibrw_simd_select(const struct brw_cs_prog_data *prog_data) 148bf215546Sopenharmony_ci{ 149bf215546Sopenharmony_ci assert((prog_data->prog_mask & ~0x7u) == 0); 150bf215546Sopenharmony_ci const unsigned not_spilled_mask = 151bf215546Sopenharmony_ci prog_data->prog_mask & ~prog_data->prog_spilled; 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci /* Util functions index bits from 1 instead of 0, adjust before return. */ 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci if (not_spilled_mask) 156bf215546Sopenharmony_ci return util_last_bit(not_spilled_mask) - 1; 157bf215546Sopenharmony_ci else if (prog_data->prog_mask) 158bf215546Sopenharmony_ci return ffs(prog_data->prog_mask) - 1; 159bf215546Sopenharmony_ci else 160bf215546Sopenharmony_ci return -1; 161bf215546Sopenharmony_ci} 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ciint 164bf215546Sopenharmony_cibrw_simd_select_for_workgroup_size(const struct intel_device_info *devinfo, 165bf215546Sopenharmony_ci const struct brw_cs_prog_data *prog_data, 166bf215546Sopenharmony_ci const unsigned *sizes) 167bf215546Sopenharmony_ci{ 168bf215546Sopenharmony_ci assert(sizes); 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci if (prog_data->local_size[0] == sizes[0] && 171bf215546Sopenharmony_ci prog_data->local_size[1] == sizes[1] && 172bf215546Sopenharmony_ci prog_data->local_size[2] == sizes[2]) 173bf215546Sopenharmony_ci return brw_simd_select(prog_data); 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(NULL); 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci struct brw_cs_prog_data cloned = *prog_data; 178bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; i++) 179bf215546Sopenharmony_ci cloned.local_size[i] = sizes[i]; 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci cloned.prog_mask = 0; 182bf215546Sopenharmony_ci cloned.prog_spilled = 0; 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci const char *error[3] = {0}; 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci for (unsigned simd = 0; simd < 3; simd++) { 187bf215546Sopenharmony_ci /* We are not recompiling, so use original results of prog_mask and 188bf215546Sopenharmony_ci * prog_spilled as they will already contain all possible compilations. 189bf215546Sopenharmony_ci */ 190bf215546Sopenharmony_ci if (brw_simd_should_compile(mem_ctx, simd, devinfo, &cloned, 191bf215546Sopenharmony_ci 0 /* required_dispatch_width */, &error[simd]) && 192bf215546Sopenharmony_ci test_bit(prog_data->prog_mask, simd)) { 193bf215546Sopenharmony_ci brw_simd_mark_compiled(simd, &cloned, test_bit(prog_data->prog_spilled, simd)); 194bf215546Sopenharmony_ci } 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci ralloc_free(mem_ctx); 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci return brw_simd_select(&cloned); 200bf215546Sopenharmony_ci} 201