1/* 2 * Copyright © 2020 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir_builder.h" 25 26#include <string.h> 27 28/** Returns the type to use for a copy of the given size. 29 * 30 * The actual type doesn't matter here all that much as we're just going to do 31 * a load/store on it and never any arithmetic. 32 */ 33static const struct glsl_type * 34copy_type_for_byte_size(unsigned size) 35{ 36 switch (size) { 37 case 1: return glsl_vector_type(GLSL_TYPE_UINT8, 1); 38 case 2: return glsl_vector_type(GLSL_TYPE_UINT16, 1); 39 case 4: return glsl_vector_type(GLSL_TYPE_UINT, 1); 40 case 8: return glsl_vector_type(GLSL_TYPE_UINT, 2); 41 case 16: return glsl_vector_type(GLSL_TYPE_UINT, 4); 42 default: 43 unreachable("Unsupported size"); 44 } 45} 46 47static nir_ssa_def * 48memcpy_load_deref_elem(nir_builder *b, nir_deref_instr *parent, 49 nir_ssa_def *index) 50{ 51 nir_deref_instr *deref; 52 53 index = nir_i2i(b, index, nir_dest_bit_size(parent->dest)); 54 assert(parent->deref_type == nir_deref_type_cast); 55 deref = nir_build_deref_ptr_as_array(b, parent, index); 56 57 return nir_load_deref(b, deref); 58} 59 60static nir_ssa_def * 61memcpy_load_deref_elem_imm(nir_builder *b, nir_deref_instr *parent, 62 uint64_t index) 63{ 64 nir_ssa_def *idx = nir_imm_intN_t(b, index, parent->dest.ssa.bit_size); 65 return memcpy_load_deref_elem(b, parent, idx); 66} 67 68static void 69memcpy_store_deref_elem(nir_builder *b, nir_deref_instr *parent, 70 nir_ssa_def *index, nir_ssa_def *value) 71{ 72 nir_deref_instr *deref; 73 74 index = nir_i2i(b, index, nir_dest_bit_size(parent->dest)); 75 assert(parent->deref_type == nir_deref_type_cast); 76 deref = nir_build_deref_ptr_as_array(b, parent, index); 77 nir_store_deref(b, deref, value, ~0); 78} 79 80static void 81memcpy_store_deref_elem_imm(nir_builder *b, nir_deref_instr *parent, 82 uint64_t index, nir_ssa_def *value) 83{ 84 nir_ssa_def *idx = nir_imm_intN_t(b, index, parent->dest.ssa.bit_size); 85 memcpy_store_deref_elem(b, parent, idx, value); 86} 87 88static bool 89lower_memcpy_impl(nir_function_impl *impl) 90{ 91 nir_builder b; 92 nir_builder_init(&b, impl); 93 94 bool found_const_memcpy = false, found_non_const_memcpy = false; 95 96 nir_foreach_block_safe(block, impl) { 97 nir_foreach_instr_safe(instr, block) { 98 if (instr->type != nir_instr_type_intrinsic) 99 continue; 100 101 nir_intrinsic_instr *cpy = nir_instr_as_intrinsic(instr); 102 if (cpy->intrinsic != nir_intrinsic_memcpy_deref) 103 continue; 104 105 b.cursor = nir_instr_remove(&cpy->instr); 106 107 nir_deref_instr *dst = nir_src_as_deref(cpy->src[0]); 108 nir_deref_instr *src = nir_src_as_deref(cpy->src[1]); 109 if (nir_src_is_const(cpy->src[2])) { 110 found_const_memcpy = true; 111 uint64_t size = nir_src_as_uint(cpy->src[2]); 112 uint64_t offset = 0; 113 while (offset < size) { 114 uint64_t remaining = size - offset; 115 /* Find the largest chunk size power-of-two (MSB in remaining) 116 * and limit our chunk to 16B (a vec4). It's important to do as 117 * many 16B chunks as possible first so that the index 118 * computation is correct for 119 * memcpy_(load|store)_deref_elem_imm. 120 */ 121 unsigned copy_size = 1u << MIN2(util_last_bit64(remaining) - 1, 4); 122 const struct glsl_type *copy_type = 123 copy_type_for_byte_size(copy_size); 124 125 nir_deref_instr *copy_dst = 126 nir_build_deref_cast(&b, &dst->dest.ssa, dst->modes, 127 copy_type, copy_size); 128 nir_deref_instr *copy_src = 129 nir_build_deref_cast(&b, &src->dest.ssa, src->modes, 130 copy_type, copy_size); 131 132 uint64_t index = offset / copy_size; 133 nir_ssa_def *value = 134 memcpy_load_deref_elem_imm(&b, copy_src, index); 135 memcpy_store_deref_elem_imm(&b, copy_dst, index, value); 136 offset += copy_size; 137 } 138 } else { 139 found_non_const_memcpy = true; 140 assert(cpy->src[2].is_ssa); 141 nir_ssa_def *size = cpy->src[2].ssa; 142 143 /* In this case, we don't have any idea what the size is so we 144 * emit a loop which copies one byte at a time. 145 */ 146 nir_deref_instr *copy_dst = 147 nir_build_deref_cast(&b, &dst->dest.ssa, dst->modes, 148 glsl_uint8_t_type(), 1); 149 nir_deref_instr *copy_src = 150 nir_build_deref_cast(&b, &src->dest.ssa, src->modes, 151 glsl_uint8_t_type(), 1); 152 153 nir_variable *i = nir_local_variable_create(impl, 154 glsl_uintN_t_type(size->bit_size), NULL); 155 nir_store_var(&b, i, nir_imm_intN_t(&b, 0, size->bit_size), ~0); 156 nir_push_loop(&b); 157 { 158 nir_ssa_def *index = nir_load_var(&b, i); 159 nir_push_if(&b, nir_uge(&b, index, size)); 160 { 161 nir_jump(&b, nir_jump_break); 162 } 163 nir_pop_if(&b, NULL); 164 165 nir_ssa_def *value = 166 memcpy_load_deref_elem(&b, copy_src, index); 167 memcpy_store_deref_elem(&b, copy_dst, index, value); 168 nir_store_var(&b, i, nir_iadd_imm(&b, index, 1), ~0); 169 } 170 nir_pop_loop(&b, NULL); 171 } 172 } 173 } 174 175 if (found_non_const_memcpy) { 176 nir_metadata_preserve(impl, nir_metadata_none); 177 } else if (found_const_memcpy) { 178 nir_metadata_preserve(impl, nir_metadata_block_index | 179 nir_metadata_dominance); 180 } else { 181 nir_metadata_preserve(impl, nir_metadata_all); 182 } 183 184 return found_const_memcpy || found_non_const_memcpy; 185} 186 187bool 188nir_lower_memcpy(nir_shader *shader) 189{ 190 bool progress = false; 191 192 nir_foreach_function(function, shader) { 193 if (function->impl && lower_memcpy_impl(function->impl)) 194 progress = true; 195 } 196 197 return progress; 198} 199