1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2020 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir_builder.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include <string.h> 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/** Returns the type to use for a copy of the given size. 29bf215546Sopenharmony_ci * 30bf215546Sopenharmony_ci * The actual type doesn't matter here all that much as we're just going to do 31bf215546Sopenharmony_ci * a load/store on it and never any arithmetic. 32bf215546Sopenharmony_ci */ 33bf215546Sopenharmony_cistatic const struct glsl_type * 34bf215546Sopenharmony_cicopy_type_for_byte_size(unsigned size) 35bf215546Sopenharmony_ci{ 36bf215546Sopenharmony_ci switch (size) { 37bf215546Sopenharmony_ci case 1: return glsl_vector_type(GLSL_TYPE_UINT8, 1); 38bf215546Sopenharmony_ci case 2: return glsl_vector_type(GLSL_TYPE_UINT16, 1); 39bf215546Sopenharmony_ci case 4: return glsl_vector_type(GLSL_TYPE_UINT, 1); 40bf215546Sopenharmony_ci case 8: return glsl_vector_type(GLSL_TYPE_UINT, 2); 41bf215546Sopenharmony_ci case 16: return glsl_vector_type(GLSL_TYPE_UINT, 4); 42bf215546Sopenharmony_ci default: 43bf215546Sopenharmony_ci unreachable("Unsupported size"); 44bf215546Sopenharmony_ci } 45bf215546Sopenharmony_ci} 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_cistatic nir_ssa_def * 48bf215546Sopenharmony_cimemcpy_load_deref_elem(nir_builder *b, nir_deref_instr *parent, 49bf215546Sopenharmony_ci nir_ssa_def *index) 50bf215546Sopenharmony_ci{ 51bf215546Sopenharmony_ci nir_deref_instr *deref; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci index = nir_i2i(b, index, nir_dest_bit_size(parent->dest)); 54bf215546Sopenharmony_ci assert(parent->deref_type == nir_deref_type_cast); 55bf215546Sopenharmony_ci deref = nir_build_deref_ptr_as_array(b, parent, index); 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci return nir_load_deref(b, deref); 58bf215546Sopenharmony_ci} 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_cistatic nir_ssa_def * 61bf215546Sopenharmony_cimemcpy_load_deref_elem_imm(nir_builder *b, nir_deref_instr *parent, 62bf215546Sopenharmony_ci uint64_t index) 63bf215546Sopenharmony_ci{ 64bf215546Sopenharmony_ci nir_ssa_def *idx = nir_imm_intN_t(b, index, parent->dest.ssa.bit_size); 65bf215546Sopenharmony_ci return memcpy_load_deref_elem(b, parent, idx); 66bf215546Sopenharmony_ci} 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_cistatic void 69bf215546Sopenharmony_cimemcpy_store_deref_elem(nir_builder *b, nir_deref_instr *parent, 70bf215546Sopenharmony_ci nir_ssa_def *index, nir_ssa_def *value) 71bf215546Sopenharmony_ci{ 72bf215546Sopenharmony_ci nir_deref_instr *deref; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci index = nir_i2i(b, index, nir_dest_bit_size(parent->dest)); 75bf215546Sopenharmony_ci assert(parent->deref_type == nir_deref_type_cast); 76bf215546Sopenharmony_ci deref = nir_build_deref_ptr_as_array(b, parent, index); 77bf215546Sopenharmony_ci nir_store_deref(b, deref, value, ~0); 78bf215546Sopenharmony_ci} 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_cistatic void 81bf215546Sopenharmony_cimemcpy_store_deref_elem_imm(nir_builder *b, nir_deref_instr *parent, 82bf215546Sopenharmony_ci uint64_t index, nir_ssa_def *value) 83bf215546Sopenharmony_ci{ 84bf215546Sopenharmony_ci nir_ssa_def *idx = nir_imm_intN_t(b, index, parent->dest.ssa.bit_size); 85bf215546Sopenharmony_ci memcpy_store_deref_elem(b, parent, idx, value); 86bf215546Sopenharmony_ci} 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_cistatic bool 89bf215546Sopenharmony_cilower_memcpy_impl(nir_function_impl *impl) 90bf215546Sopenharmony_ci{ 91bf215546Sopenharmony_ci nir_builder b; 92bf215546Sopenharmony_ci nir_builder_init(&b, impl); 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci bool found_const_memcpy = false, found_non_const_memcpy = false; 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci nir_foreach_block_safe(block, impl) { 97bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 98bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 99bf215546Sopenharmony_ci continue; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci nir_intrinsic_instr *cpy = nir_instr_as_intrinsic(instr); 102bf215546Sopenharmony_ci if (cpy->intrinsic != nir_intrinsic_memcpy_deref) 103bf215546Sopenharmony_ci continue; 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci b.cursor = nir_instr_remove(&cpy->instr); 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci nir_deref_instr *dst = nir_src_as_deref(cpy->src[0]); 108bf215546Sopenharmony_ci nir_deref_instr *src = nir_src_as_deref(cpy->src[1]); 109bf215546Sopenharmony_ci if (nir_src_is_const(cpy->src[2])) { 110bf215546Sopenharmony_ci found_const_memcpy = true; 111bf215546Sopenharmony_ci uint64_t size = nir_src_as_uint(cpy->src[2]); 112bf215546Sopenharmony_ci uint64_t offset = 0; 113bf215546Sopenharmony_ci while (offset < size) { 114bf215546Sopenharmony_ci uint64_t remaining = size - offset; 115bf215546Sopenharmony_ci /* Find the largest chunk size power-of-two (MSB in remaining) 116bf215546Sopenharmony_ci * and limit our chunk to 16B (a vec4). It's important to do as 117bf215546Sopenharmony_ci * many 16B chunks as possible first so that the index 118bf215546Sopenharmony_ci * computation is correct for 119bf215546Sopenharmony_ci * memcpy_(load|store)_deref_elem_imm. 120bf215546Sopenharmony_ci */ 121bf215546Sopenharmony_ci unsigned copy_size = 1u << MIN2(util_last_bit64(remaining) - 1, 4); 122bf215546Sopenharmony_ci const struct glsl_type *copy_type = 123bf215546Sopenharmony_ci copy_type_for_byte_size(copy_size); 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci nir_deref_instr *copy_dst = 126bf215546Sopenharmony_ci nir_build_deref_cast(&b, &dst->dest.ssa, dst->modes, 127bf215546Sopenharmony_ci copy_type, copy_size); 128bf215546Sopenharmony_ci nir_deref_instr *copy_src = 129bf215546Sopenharmony_ci nir_build_deref_cast(&b, &src->dest.ssa, src->modes, 130bf215546Sopenharmony_ci copy_type, copy_size); 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci uint64_t index = offset / copy_size; 133bf215546Sopenharmony_ci nir_ssa_def *value = 134bf215546Sopenharmony_ci memcpy_load_deref_elem_imm(&b, copy_src, index); 135bf215546Sopenharmony_ci memcpy_store_deref_elem_imm(&b, copy_dst, index, value); 136bf215546Sopenharmony_ci offset += copy_size; 137bf215546Sopenharmony_ci } 138bf215546Sopenharmony_ci } else { 139bf215546Sopenharmony_ci found_non_const_memcpy = true; 140bf215546Sopenharmony_ci assert(cpy->src[2].is_ssa); 141bf215546Sopenharmony_ci nir_ssa_def *size = cpy->src[2].ssa; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci /* In this case, we don't have any idea what the size is so we 144bf215546Sopenharmony_ci * emit a loop which copies one byte at a time. 145bf215546Sopenharmony_ci */ 146bf215546Sopenharmony_ci nir_deref_instr *copy_dst = 147bf215546Sopenharmony_ci nir_build_deref_cast(&b, &dst->dest.ssa, dst->modes, 148bf215546Sopenharmony_ci glsl_uint8_t_type(), 1); 149bf215546Sopenharmony_ci nir_deref_instr *copy_src = 150bf215546Sopenharmony_ci nir_build_deref_cast(&b, &src->dest.ssa, src->modes, 151bf215546Sopenharmony_ci glsl_uint8_t_type(), 1); 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci nir_variable *i = nir_local_variable_create(impl, 154bf215546Sopenharmony_ci glsl_uintN_t_type(size->bit_size), NULL); 155bf215546Sopenharmony_ci nir_store_var(&b, i, nir_imm_intN_t(&b, 0, size->bit_size), ~0); 156bf215546Sopenharmony_ci nir_push_loop(&b); 157bf215546Sopenharmony_ci { 158bf215546Sopenharmony_ci nir_ssa_def *index = nir_load_var(&b, i); 159bf215546Sopenharmony_ci nir_push_if(&b, nir_uge(&b, index, size)); 160bf215546Sopenharmony_ci { 161bf215546Sopenharmony_ci nir_jump(&b, nir_jump_break); 162bf215546Sopenharmony_ci } 163bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci nir_ssa_def *value = 166bf215546Sopenharmony_ci memcpy_load_deref_elem(&b, copy_src, index); 167bf215546Sopenharmony_ci memcpy_store_deref_elem(&b, copy_dst, index, value); 168bf215546Sopenharmony_ci nir_store_var(&b, i, nir_iadd_imm(&b, index, 1), ~0); 169bf215546Sopenharmony_ci } 170bf215546Sopenharmony_ci nir_pop_loop(&b, NULL); 171bf215546Sopenharmony_ci } 172bf215546Sopenharmony_ci } 173bf215546Sopenharmony_ci } 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci if (found_non_const_memcpy) { 176bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_none); 177bf215546Sopenharmony_ci } else if (found_const_memcpy) { 178bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 179bf215546Sopenharmony_ci nir_metadata_dominance); 180bf215546Sopenharmony_ci } else { 181bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 182bf215546Sopenharmony_ci } 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci return found_const_memcpy || found_non_const_memcpy; 185bf215546Sopenharmony_ci} 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_cibool 188bf215546Sopenharmony_cinir_lower_memcpy(nir_shader *shader) 189bf215546Sopenharmony_ci{ 190bf215546Sopenharmony_ci bool progress = false; 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 193bf215546Sopenharmony_ci if (function->impl && lower_memcpy_impl(function->impl)) 194bf215546Sopenharmony_ci progress = true; 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci return progress; 198bf215546Sopenharmony_ci} 199