1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "nir_deref.h" 27 28struct var_info { 29 nir_variable *var; 30 31 bool is_constant; 32 bool found_read; 33 bool duplicate; 34 35 /* Block that has all the variable stores. All the blocks with reads 36 * should be dominated by this block. 37 */ 38 nir_block *block; 39 40 /* If is_constant, hold the collected constant data for this var. */ 41 uint32_t constant_data_size; 42 void *constant_data; 43}; 44 45static int 46var_info_cmp(const void *_a, const void *_b) 47{ 48 const struct var_info *a = _a; 49 const struct var_info *b = _b; 50 uint32_t a_size = a->constant_data_size; 51 uint32_t b_size = b->constant_data_size; 52 53 if (a->is_constant != b->is_constant) { 54 return (int)a->is_constant - (int)b->is_constant; 55 } else if (a_size < b_size) { 56 return -1; 57 } else if (a_size > b_size) { 58 return 1; 59 } else if (a_size == 0) { 60 /* Don't call memcmp with invalid pointers. */ 61 return 0; 62 } else { 63 return memcmp(a->constant_data, b->constant_data, a_size); 64 } 65} 66 67static nir_ssa_def * 68build_constant_load(nir_builder *b, nir_deref_instr *deref, 69 glsl_type_size_align_func size_align) 70{ 71 nir_variable *var = nir_deref_instr_get_variable(deref); 72 73 const unsigned bit_size = glsl_get_bit_size(deref->type); 74 const unsigned num_components = glsl_get_vector_elements(deref->type); 75 76 UNUSED unsigned var_size, var_align; 77 size_align(var->type, &var_size, &var_align); 78 assert(var->data.location % var_align == 0); 79 80 UNUSED unsigned deref_size, deref_align; 81 size_align(deref->type, &deref_size, &deref_align); 82 83 nir_ssa_def *src = nir_build_deref_offset(b, deref, size_align); 84 nir_ssa_def *load = 85 nir_load_constant(b, num_components, bit_size, src, 86 .base = var->data.location, 87 .range = var_size, 88 .align_mul = deref_align, 89 .align_offset = 0); 90 91 if (load->bit_size < 8) { 92 /* Booleans are special-cased to be 32-bit */ 93 assert(glsl_type_is_boolean(deref->type)); 94 assert(deref_size == num_components * 4); 95 load->bit_size = 32; 96 return nir_b2b1(b, load); 97 } else { 98 assert(deref_size == num_components * bit_size / 8); 99 return load; 100 } 101} 102 103static void 104handle_constant_store(void *mem_ctx, struct var_info *info, 105 nir_deref_instr *deref, nir_const_value *val, 106 unsigned writemask, 107 glsl_type_size_align_func size_align) 108{ 109 assert(!nir_deref_instr_has_indirect(deref)); 110 const unsigned bit_size = glsl_get_bit_size(deref->type); 111 const unsigned num_components = glsl_get_vector_elements(deref->type); 112 113 if (info->constant_data_size == 0) { 114 unsigned var_size, var_align; 115 size_align(info->var->type, &var_size, &var_align); 116 info->constant_data_size = var_size; 117 info->constant_data = rzalloc_size(mem_ctx, var_size); 118 } 119 120 const unsigned offset = nir_deref_instr_get_const_offset(deref, size_align); 121 if (offset >= info->constant_data_size) 122 return; 123 124 char *dst = (char *)info->constant_data + offset; 125 126 for (unsigned i = 0; i < num_components; i++) { 127 if (!(writemask & (1 << i))) 128 continue; 129 130 switch (bit_size) { 131 case 1: 132 /* Booleans are special-cased to be 32-bit */ 133 ((int32_t *)dst)[i] = -(int)val[i].b; 134 break; 135 136 case 8: 137 ((uint8_t *)dst)[i] = val[i].u8; 138 break; 139 140 case 16: 141 ((uint16_t *)dst)[i] = val[i].u16; 142 break; 143 144 case 32: 145 ((uint32_t *)dst)[i] = val[i].u32; 146 break; 147 148 case 64: 149 ((uint64_t *)dst)[i] = val[i].u64; 150 break; 151 152 default: 153 unreachable("Invalid bit size"); 154 } 155 } 156} 157 158/** Lower large constant variables to shader constant data 159 * 160 * This pass looks for large (type_size(var->type) > threshold) variables 161 * which are statically constant and moves them into shader constant data. 162 * This is especially useful when large tables are baked into the shader 163 * source code because they can be moved into a UBO by the driver to reduce 164 * register pressure and make indirect access cheaper. 165 */ 166bool 167nir_opt_large_constants(nir_shader *shader, 168 glsl_type_size_align_func size_align, 169 unsigned threshold) 170{ 171 /* Default to a natural alignment if none is provided */ 172 if (size_align == NULL) 173 size_align = glsl_get_natural_size_align_bytes; 174 175 /* This only works with a single entrypoint */ 176 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 177 178 unsigned num_locals = nir_function_impl_index_vars(impl); 179 180 if (num_locals == 0) { 181 nir_shader_preserve_all_metadata(shader); 182 return false; 183 } 184 185 struct var_info *var_infos = ralloc_array(NULL, struct var_info, num_locals); 186 nir_foreach_function_temp_variable(var, impl) { 187 var_infos[var->index] = (struct var_info) { 188 .var = var, 189 .is_constant = true, 190 .found_read = false, 191 }; 192 } 193 194 nir_metadata_require(impl, nir_metadata_dominance); 195 196 /* First, walk through the shader and figure out what variables we can 197 * lower to the constant blob. 198 */ 199 nir_foreach_block(block, impl) { 200 nir_foreach_instr(instr, block) { 201 if (instr->type == nir_instr_type_deref) { 202 /* If we ever see a complex use of a deref_var, we have to assume 203 * that variable is non-constant because we can't guarantee we 204 * will find all of the writers of that variable. 205 */ 206 nir_deref_instr *deref = nir_instr_as_deref(instr); 207 if (deref->deref_type == nir_deref_type_var && 208 deref->var->data.mode == nir_var_function_temp && 209 nir_deref_instr_has_complex_use(deref, 0)) 210 var_infos[deref->var->index].is_constant = false; 211 continue; 212 } 213 214 if (instr->type != nir_instr_type_intrinsic) 215 continue; 216 217 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 218 219 bool src_is_const = false; 220 nir_deref_instr *src_deref = NULL, *dst_deref = NULL; 221 unsigned writemask = 0; 222 switch (intrin->intrinsic) { 223 case nir_intrinsic_store_deref: 224 dst_deref = nir_src_as_deref(intrin->src[0]); 225 src_is_const = nir_src_is_const(intrin->src[1]); 226 writemask = nir_intrinsic_write_mask(intrin); 227 break; 228 229 case nir_intrinsic_load_deref: 230 src_deref = nir_src_as_deref(intrin->src[0]); 231 break; 232 233 case nir_intrinsic_copy_deref: 234 assert(!"Lowering of copy_deref with large constants is prohibited"); 235 break; 236 237 default: 238 continue; 239 } 240 241 if (dst_deref && nir_deref_mode_must_be(dst_deref, nir_var_function_temp)) { 242 nir_variable *var = nir_deref_instr_get_variable(dst_deref); 243 if (var == NULL) 244 continue; 245 246 assert(var->data.mode == nir_var_function_temp); 247 248 struct var_info *info = &var_infos[var->index]; 249 if (!info->is_constant) 250 continue; 251 252 if (!info->block) 253 info->block = block; 254 255 /* We only consider variables constant if they only have constant 256 * stores, all the stores come before any reads, and all stores 257 * come from the same block. We also can't handle indirect stores. 258 */ 259 if (!src_is_const || info->found_read || block != info->block || 260 nir_deref_instr_has_indirect(dst_deref)) { 261 info->is_constant = false; 262 } else { 263 nir_const_value *val = nir_src_as_const_value(intrin->src[1]); 264 handle_constant_store(var_infos, info, dst_deref, val, writemask, 265 size_align); 266 } 267 } 268 269 if (src_deref && nir_deref_mode_must_be(src_deref, nir_var_function_temp)) { 270 nir_variable *var = nir_deref_instr_get_variable(src_deref); 271 if (var == NULL) 272 continue; 273 274 assert(var->data.mode == nir_var_function_temp); 275 276 /* We only consider variables constant if all the reads are 277 * dominated by the block that writes to it. 278 */ 279 struct var_info *info = &var_infos[var->index]; 280 if (!info->is_constant) 281 continue; 282 283 if (!info->block || !nir_block_dominates(info->block, block)) 284 info->is_constant = false; 285 286 info->found_read = true; 287 } 288 } 289 } 290 291 /* Allocate constant data space for each variable that just has constant 292 * data. We sort them by size and content so we can easily find 293 * duplicates. 294 */ 295 const unsigned old_constant_data_size = shader->constant_data_size; 296 qsort(var_infos, num_locals, sizeof(struct var_info), var_info_cmp); 297 for (int i = 0; i < num_locals; i++) { 298 struct var_info *info = &var_infos[i]; 299 300 /* Fix up indices after we sorted. */ 301 info->var->index = i; 302 303 if (!info->is_constant) 304 continue; 305 306 unsigned var_size, var_align; 307 size_align(info->var->type, &var_size, &var_align); 308 if (var_size <= threshold || !info->found_read) { 309 /* Don't bother lowering small stuff or data that's never read */ 310 info->is_constant = false; 311 continue; 312 } 313 314 if (i > 0 && var_info_cmp(info, &var_infos[i - 1]) == 0) { 315 info->var->data.location = var_infos[i - 1].var->data.location; 316 info->duplicate = true; 317 } else { 318 info->var->data.location = ALIGN_POT(shader->constant_data_size, var_align); 319 shader->constant_data_size = info->var->data.location + var_size; 320 } 321 } 322 323 if (shader->constant_data_size == old_constant_data_size) { 324 nir_shader_preserve_all_metadata(shader); 325 ralloc_free(var_infos); 326 return false; 327 } 328 329 assert(shader->constant_data_size > old_constant_data_size); 330 shader->constant_data = rerzalloc_size(shader, shader->constant_data, 331 old_constant_data_size, 332 shader->constant_data_size); 333 for (int i = 0; i < num_locals; i++) { 334 struct var_info *info = &var_infos[i]; 335 if (!info->duplicate && info->is_constant) { 336 memcpy((char *)shader->constant_data + info->var->data.location, 337 info->constant_data, info->constant_data_size); 338 } 339 } 340 341 nir_builder b; 342 nir_builder_init(&b, impl); 343 344 nir_foreach_block(block, impl) { 345 nir_foreach_instr_safe(instr, block) { 346 if (instr->type != nir_instr_type_intrinsic) 347 continue; 348 349 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 350 351 switch (intrin->intrinsic) { 352 case nir_intrinsic_load_deref: { 353 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 354 if (!nir_deref_mode_is(deref, nir_var_function_temp)) 355 continue; 356 357 nir_variable *var = nir_deref_instr_get_variable(deref); 358 if (var == NULL) 359 continue; 360 361 struct var_info *info = &var_infos[var->index]; 362 if (info->is_constant) { 363 b.cursor = nir_after_instr(&intrin->instr); 364 nir_ssa_def *val = build_constant_load(&b, deref, size_align); 365 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 366 val); 367 nir_instr_remove(&intrin->instr); 368 nir_deref_instr_remove_if_unused(deref); 369 } 370 break; 371 } 372 373 case nir_intrinsic_store_deref: { 374 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 375 if (!nir_deref_mode_is(deref, nir_var_function_temp)) 376 continue; 377 378 nir_variable *var = nir_deref_instr_get_variable(deref); 379 if (var == NULL) 380 continue; 381 382 struct var_info *info = &var_infos[var->index]; 383 if (info->is_constant) { 384 nir_instr_remove(&intrin->instr); 385 nir_deref_instr_remove_if_unused(deref); 386 } 387 break; 388 } 389 case nir_intrinsic_copy_deref: 390 default: 391 continue; 392 } 393 } 394 } 395 396 /* Clean up the now unused variables */ 397 for (int i = 0; i < num_locals; i++) { 398 struct var_info *info = &var_infos[i]; 399 if (info->is_constant) 400 exec_node_remove(&info->var->node); 401 } 402 403 ralloc_free(var_infos); 404 405 nir_metadata_preserve(impl, nir_metadata_block_index | 406 nir_metadata_dominance); 407 return true; 408} 409