1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file lower_int64.cpp 26 * 27 * Lower 64-bit operations to 32-bit operations. Each 64-bit value is lowered 28 * to a uvec2. For each operation that can be lowered, there is a function 29 * called __builtin_foo with the same number of parameters that takes uvec2 30 * sources and produces uvec2 results. An operation like 31 * 32 * uint64_t(x) / uint64_t(y) 33 * 34 * becomes 35 * 36 * packUint2x32(__builtin_udiv64(unpackUint2x32(x), unpackUint2x32(y))); 37 */ 38 39#include "main/macros.h" 40#include "compiler/glsl_types.h" 41#include "ir.h" 42#include "ir_rvalue_visitor.h" 43#include "ir_builder.h" 44#include "ir_optimization.h" 45#include "util/hash_table.h" 46#include "builtin_functions.h" 47 48typedef ir_function_signature *(*function_generator)(void *mem_ctx, 49 builtin_available_predicate avail); 50 51using namespace ir_builder; 52 53namespace lower_64bit { 54void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src); 55 56ir_dereference_variable *compact_destination(ir_factory &, 57 const glsl_type *type, 58 ir_variable *result[4]); 59 60ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir, 61 ir_expression *ir, 62 ir_function_signature *callee); 63}; 64 65using namespace lower_64bit; 66 67namespace { 68 69class lower_64bit_visitor : public ir_rvalue_visitor { 70public: 71 lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower) 72 : progress(false), lower(lower), 73 function_list(), added_functions(&function_list, mem_ctx) 74 { 75 functions = _mesa_hash_table_create(mem_ctx, 76 _mesa_hash_string, 77 _mesa_key_string_equal); 78 79 foreach_in_list(ir_instruction, node, instructions) { 80 ir_function *const f = node->as_function(); 81 82 if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0) 83 continue; 84 85 add_function(f); 86 } 87 } 88 89 ~lower_64bit_visitor() 90 { 91 _mesa_hash_table_destroy(functions, NULL); 92 } 93 94 void handle_rvalue(ir_rvalue **rvalue); 95 96 void add_function(ir_function *f) 97 { 98 _mesa_hash_table_insert(functions, f->name, f); 99 } 100 101 ir_function *find_function(const char *name) 102 { 103 struct hash_entry *const entry = 104 _mesa_hash_table_search(functions, name); 105 106 return entry != NULL ? (ir_function *) entry->data : NULL; 107 } 108 109 bool progress; 110 111private: 112 unsigned lower; /** Bitfield of which operations to lower */ 113 114 /** Hashtable containing all of the known functions in the IR */ 115 struct hash_table *functions; 116 117public: 118 exec_list function_list; 119 120private: 121 ir_factory added_functions; 122 123 ir_rvalue *handle_op(ir_expression *ir, const char *function_name, 124 function_generator generator); 125}; 126 127} /* anonymous namespace */ 128 129/** 130 * Determine if a particular type of lowering should occur 131 */ 132#define lowering(x) (this->lower & x) 133 134bool 135lower_64bit_integer_instructions(exec_list *instructions, 136 unsigned what_to_lower) 137{ 138 if (instructions->is_empty()) 139 return false; 140 141 ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw(); 142 void *const mem_ctx = ralloc_parent(first_inst); 143 lower_64bit_visitor v(mem_ctx, instructions, what_to_lower); 144 145 visit_list_elements(&v, instructions); 146 147 if (v.progress && !v.function_list.is_empty()) { 148 /* Move all of the nodes from function_list to the head if the incoming 149 * instruction list. 150 */ 151 exec_node *const after = &instructions->head_sentinel; 152 exec_node *const before = instructions->head_sentinel.next; 153 exec_node *const head = v.function_list.head_sentinel.next; 154 exec_node *const tail = v.function_list.tail_sentinel.prev; 155 156 before->next = head; 157 head->prev = before; 158 159 after->prev = tail; 160 tail->next = after; 161 } 162 163 return v.progress; 164} 165 166 167/** 168 * Expand individual 64-bit values to uvec2 values 169 * 170 * Each operation is in one of a few forms. 171 * 172 * vector op vector 173 * vector op scalar 174 * scalar op vector 175 * scalar op scalar 176 * 177 * In the 'vector op vector' case, the two vectors must have the same size. 178 * In a way, the 'scalar op scalar' form is special case of the 'vector op 179 * vector' form. 180 * 181 * This method generates a new set of uvec2 values for each element of a 182 * single operand. If the operand is a scalar, the uvec2 is replicated 183 * multiple times. A value like 184 * 185 * u64vec3(a) + u64vec3(b) 186 * 187 * becomes 188 * 189 * u64vec3 tmp0 = u64vec3(a) + u64vec3(b); 190 * uvec2 tmp1 = unpackUint2x32(tmp0.x); 191 * uvec2 tmp2 = unpackUint2x32(tmp0.y); 192 * uvec2 tmp3 = unpackUint2x32(tmp0.z); 193 * 194 * and the returned operands array contains ir_variable pointers to 195 * 196 * { tmp1, tmp2, tmp3, tmp1 } 197 */ 198void 199lower_64bit::expand_source(ir_factory &body, 200 ir_rvalue *val, 201 ir_variable **expanded_src) 202{ 203 assert(val->type->is_integer_64()); 204 205 ir_variable *const temp = body.make_temp(val->type, "tmp"); 206 207 body.emit(assign(temp, val)); 208 209 const ir_expression_operation unpack_opcode = 210 val->type->base_type == GLSL_TYPE_UINT64 211 ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32; 212 213 const glsl_type *const type = 214 val->type->base_type == GLSL_TYPE_UINT64 215 ? glsl_type::uvec2_type : glsl_type::ivec2_type; 216 217 unsigned i; 218 for (i = 0; i < val->type->vector_elements; i++) { 219 expanded_src[i] = body.make_temp(type, "expanded_64bit_source"); 220 221 body.emit(assign(expanded_src[i], 222 expr(unpack_opcode, swizzle(temp, i, 1)))); 223 } 224 225 for (/* empty */; i < 4; i++) 226 expanded_src[i] = expanded_src[0]; 227} 228 229/** 230 * Convert a series of uvec2 results into a single 64-bit integer vector 231 */ 232ir_dereference_variable * 233lower_64bit::compact_destination(ir_factory &body, 234 const glsl_type *type, 235 ir_variable *result[4]) 236{ 237 const ir_expression_operation pack_opcode = 238 type->base_type == GLSL_TYPE_UINT64 239 ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32; 240 241 ir_variable *const compacted_result = 242 body.make_temp(type, "compacted_64bit_result"); 243 244 for (unsigned i = 0; i < type->vector_elements; i++) { 245 body.emit(assign(compacted_result, 246 expr(pack_opcode, result[i]), 247 1U << i)); 248 } 249 250 void *const mem_ctx = ralloc_parent(compacted_result); 251 return new(mem_ctx) ir_dereference_variable(compacted_result); 252} 253 254ir_rvalue * 255lower_64bit::lower_op_to_function_call(ir_instruction *base_ir, 256 ir_expression *ir, 257 ir_function_signature *callee) 258{ 259 const unsigned num_operands = ir->num_operands; 260 ir_variable *src[4][4]; 261 ir_variable *dst[4]; 262 void *const mem_ctx = ralloc_parent(ir); 263 exec_list instructions; 264 unsigned source_components = 0; 265 const glsl_type *const result_type = 266 ir->type->base_type == GLSL_TYPE_UINT64 267 ? glsl_type::uvec2_type : glsl_type::ivec2_type; 268 269 ir_factory body(&instructions, mem_ctx); 270 271 for (unsigned i = 0; i < num_operands; i++) { 272 expand_source(body, ir->operands[i], src[i]); 273 274 if (ir->operands[i]->type->vector_elements > source_components) 275 source_components = ir->operands[i]->type->vector_elements; 276 } 277 278 for (unsigned i = 0; i < source_components; i++) { 279 dst[i] = body.make_temp(result_type, "expanded_64bit_result"); 280 281 exec_list parameters; 282 283 for (unsigned j = 0; j < num_operands; j++) 284 parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i])); 285 286 ir_dereference_variable *const return_deref = 287 new(mem_ctx) ir_dereference_variable(dst[i]); 288 289 ir_call *const c = new(mem_ctx) ir_call(callee, 290 return_deref, 291 ¶meters); 292 293 body.emit(c); 294 } 295 296 ir_rvalue *const rv = compact_destination(body, ir->type, dst); 297 298 /* Move all of the nodes from instructions between base_ir and the 299 * instruction before it. 300 */ 301 exec_node *const after = base_ir; 302 exec_node *const before = after->prev; 303 exec_node *const head = instructions.head_sentinel.next; 304 exec_node *const tail = instructions.tail_sentinel.prev; 305 306 before->next = head; 307 head->prev = before; 308 309 after->prev = tail; 310 tail->next = after; 311 312 return rv; 313} 314 315ir_rvalue * 316lower_64bit_visitor::handle_op(ir_expression *ir, 317 const char *function_name, 318 function_generator generator) 319{ 320 for (unsigned i = 0; i < ir->num_operands; i++) 321 if (!ir->operands[i]->type->is_integer_64()) 322 return ir; 323 324 /* Get a handle to the correct ir_function_signature for the core 325 * operation. 326 */ 327 ir_function_signature *callee = NULL; 328 ir_function *f = find_function(function_name); 329 330 if (f != NULL) { 331 callee = (ir_function_signature *) f->signatures.get_head(); 332 assert(callee != NULL && callee->ir_type == ir_type_function_signature); 333 } else { 334 f = new(base_ir) ir_function(function_name); 335 callee = generator(base_ir, NULL); 336 337 f->add_signature(callee); 338 339 add_function(f); 340 } 341 342 this->progress = true; 343 return lower_op_to_function_call(this->base_ir, ir, callee); 344} 345 346void 347lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) 348{ 349 if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression) 350 return; 351 352 ir_expression *const ir = (*rvalue)->as_expression(); 353 assert(ir != NULL); 354 355 switch (ir->operation) { 356 case ir_binop_div: 357 if (lowering(DIV64)) { 358 if (ir->type->base_type == GLSL_TYPE_UINT64) { 359 *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64); 360 } else { 361 *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64); 362 } 363 } 364 break; 365 366 case ir_binop_mod: 367 if (lowering(MOD64)) { 368 if (ir->type->base_type == GLSL_TYPE_UINT64) { 369 *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64); 370 } else { 371 *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64); 372 } 373 } 374 break; 375 376 default: 377 break; 378 } 379} 380