1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file lower_int64.cpp
26 *
27 * Lower 64-bit operations to 32-bit operations.  Each 64-bit value is lowered
28 * to a uvec2.  For each operation that can be lowered, there is a function
29 * called __builtin_foo with the same number of parameters that takes uvec2
30 * sources and produces uvec2 results.  An operation like
31 *
32 *     uint64_t(x) / uint64_t(y)
33 *
34 * becomes
35 *
36 *     packUint2x32(__builtin_udiv64(unpackUint2x32(x), unpackUint2x32(y)));
37 */
38
39#include "main/macros.h"
40#include "compiler/glsl_types.h"
41#include "ir.h"
42#include "ir_rvalue_visitor.h"
43#include "ir_builder.h"
44#include "ir_optimization.h"
45#include "util/hash_table.h"
46#include "builtin_functions.h"
47
48typedef ir_function_signature *(*function_generator)(void *mem_ctx,
49                                                     builtin_available_predicate avail);
50
51using namespace ir_builder;
52
53namespace lower_64bit {
54void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);
55
56ir_dereference_variable *compact_destination(ir_factory &,
57                                             const glsl_type *type,
58                                             ir_variable *result[4]);
59
60ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
61                                     ir_expression *ir,
62                                     ir_function_signature *callee);
63};
64
65using namespace lower_64bit;
66
67namespace {
68
69class lower_64bit_visitor : public ir_rvalue_visitor {
70public:
71   lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower)
72      : progress(false), lower(lower),
73        function_list(), added_functions(&function_list, mem_ctx)
74   {
75      functions = _mesa_hash_table_create(mem_ctx,
76                                          _mesa_hash_string,
77                                          _mesa_key_string_equal);
78
79      foreach_in_list(ir_instruction, node, instructions) {
80         ir_function *const f = node->as_function();
81
82         if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0)
83            continue;
84
85         add_function(f);
86      }
87   }
88
89   ~lower_64bit_visitor()
90   {
91      _mesa_hash_table_destroy(functions, NULL);
92   }
93
94   void handle_rvalue(ir_rvalue **rvalue);
95
96   void add_function(ir_function *f)
97   {
98      _mesa_hash_table_insert(functions, f->name, f);
99   }
100
101   ir_function *find_function(const char *name)
102   {
103      struct hash_entry *const entry =
104         _mesa_hash_table_search(functions, name);
105
106      return entry != NULL ? (ir_function *) entry->data : NULL;
107   }
108
109   bool progress;
110
111private:
112   unsigned lower; /** Bitfield of which operations to lower */
113
114   /** Hashtable containing all of the known functions in the IR */
115   struct hash_table *functions;
116
117public:
118   exec_list function_list;
119
120private:
121   ir_factory added_functions;
122
123   ir_rvalue *handle_op(ir_expression *ir, const char *function_name,
124                        function_generator generator);
125};
126
127} /* anonymous namespace */
128
129/**
130 * Determine if a particular type of lowering should occur
131 */
132#define lowering(x) (this->lower & x)
133
134bool
135lower_64bit_integer_instructions(exec_list *instructions,
136                                 unsigned what_to_lower)
137{
138   if (instructions->is_empty())
139      return false;
140
141   ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw();
142   void *const mem_ctx = ralloc_parent(first_inst);
143   lower_64bit_visitor v(mem_ctx, instructions, what_to_lower);
144
145   visit_list_elements(&v, instructions);
146
147   if (v.progress && !v.function_list.is_empty()) {
148      /* Move all of the nodes from function_list to the head if the incoming
149       * instruction list.
150       */
151      exec_node *const after = &instructions->head_sentinel;
152      exec_node *const before = instructions->head_sentinel.next;
153      exec_node *const head = v.function_list.head_sentinel.next;
154      exec_node *const tail = v.function_list.tail_sentinel.prev;
155
156      before->next = head;
157      head->prev = before;
158
159      after->prev = tail;
160      tail->next = after;
161   }
162
163   return v.progress;
164}
165
166
167/**
168 * Expand individual 64-bit values to uvec2 values
169 *
170 * Each operation is in one of a few forms.
171 *
172 *     vector op vector
173 *     vector op scalar
174 *     scalar op vector
175 *     scalar op scalar
176 *
177 * In the 'vector op vector' case, the two vectors must have the same size.
178 * In a way, the 'scalar op scalar' form is special case of the 'vector op
179 * vector' form.
180 *
181 * This method generates a new set of uvec2 values for each element of a
182 * single operand.  If the operand is a scalar, the uvec2 is replicated
183 * multiple times.  A value like
184 *
185 *     u64vec3(a) + u64vec3(b)
186 *
187 * becomes
188 *
189 *     u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
190 *     uvec2 tmp1 = unpackUint2x32(tmp0.x);
191 *     uvec2 tmp2 = unpackUint2x32(tmp0.y);
192 *     uvec2 tmp3 = unpackUint2x32(tmp0.z);
193 *
194 * and the returned operands array contains ir_variable pointers to
195 *
196 *     { tmp1, tmp2, tmp3, tmp1 }
197 */
198void
199lower_64bit::expand_source(ir_factory &body,
200                           ir_rvalue *val,
201                           ir_variable **expanded_src)
202{
203   assert(val->type->is_integer_64());
204
205   ir_variable *const temp = body.make_temp(val->type, "tmp");
206
207   body.emit(assign(temp, val));
208
209   const ir_expression_operation unpack_opcode =
210      val->type->base_type == GLSL_TYPE_UINT64
211      ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;
212
213   const glsl_type *const type =
214      val->type->base_type == GLSL_TYPE_UINT64
215      ? glsl_type::uvec2_type : glsl_type::ivec2_type;
216
217   unsigned i;
218   for (i = 0; i < val->type->vector_elements; i++) {
219      expanded_src[i] = body.make_temp(type, "expanded_64bit_source");
220
221      body.emit(assign(expanded_src[i],
222                       expr(unpack_opcode, swizzle(temp, i, 1))));
223   }
224
225   for (/* empty */; i < 4; i++)
226      expanded_src[i] = expanded_src[0];
227}
228
229/**
230 * Convert a series of uvec2 results into a single 64-bit integer vector
231 */
232ir_dereference_variable *
233lower_64bit::compact_destination(ir_factory &body,
234                                 const glsl_type *type,
235                                 ir_variable *result[4])
236{
237   const ir_expression_operation pack_opcode =
238      type->base_type == GLSL_TYPE_UINT64
239      ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32;
240
241   ir_variable *const compacted_result =
242      body.make_temp(type, "compacted_64bit_result");
243
244   for (unsigned i = 0; i < type->vector_elements; i++) {
245      body.emit(assign(compacted_result,
246                       expr(pack_opcode, result[i]),
247                       1U << i));
248   }
249
250   void *const mem_ctx = ralloc_parent(compacted_result);
251   return new(mem_ctx) ir_dereference_variable(compacted_result);
252}
253
254ir_rvalue *
255lower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
256                                       ir_expression *ir,
257                                       ir_function_signature *callee)
258{
259   const unsigned num_operands = ir->num_operands;
260   ir_variable *src[4][4];
261   ir_variable *dst[4];
262   void *const mem_ctx = ralloc_parent(ir);
263   exec_list instructions;
264   unsigned source_components = 0;
265   const glsl_type *const result_type =
266      ir->type->base_type == GLSL_TYPE_UINT64
267      ? glsl_type::uvec2_type : glsl_type::ivec2_type;
268
269   ir_factory body(&instructions, mem_ctx);
270
271   for (unsigned i = 0; i < num_operands; i++) {
272      expand_source(body, ir->operands[i], src[i]);
273
274      if (ir->operands[i]->type->vector_elements > source_components)
275         source_components = ir->operands[i]->type->vector_elements;
276   }
277
278   for (unsigned i = 0; i < source_components; i++) {
279      dst[i] = body.make_temp(result_type, "expanded_64bit_result");
280
281      exec_list parameters;
282
283      for (unsigned j = 0; j < num_operands; j++)
284         parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i]));
285
286      ir_dereference_variable *const return_deref =
287         new(mem_ctx) ir_dereference_variable(dst[i]);
288
289      ir_call *const c = new(mem_ctx) ir_call(callee,
290                                              return_deref,
291                                              &parameters);
292
293      body.emit(c);
294   }
295
296   ir_rvalue *const rv = compact_destination(body, ir->type, dst);
297
298   /* Move all of the nodes from instructions between base_ir and the
299    * instruction before it.
300    */
301   exec_node *const after = base_ir;
302   exec_node *const before = after->prev;
303   exec_node *const head = instructions.head_sentinel.next;
304   exec_node *const tail = instructions.tail_sentinel.prev;
305
306   before->next = head;
307   head->prev = before;
308
309   after->prev = tail;
310   tail->next = after;
311
312   return rv;
313}
314
315ir_rvalue *
316lower_64bit_visitor::handle_op(ir_expression *ir,
317                               const char *function_name,
318                               function_generator generator)
319{
320   for (unsigned i = 0; i < ir->num_operands; i++)
321      if (!ir->operands[i]->type->is_integer_64())
322         return ir;
323
324   /* Get a handle to the correct ir_function_signature for the core
325    * operation.
326    */
327   ir_function_signature *callee = NULL;
328   ir_function *f = find_function(function_name);
329
330   if (f != NULL) {
331      callee = (ir_function_signature *) f->signatures.get_head();
332      assert(callee != NULL && callee->ir_type == ir_type_function_signature);
333   } else {
334      f = new(base_ir) ir_function(function_name);
335      callee = generator(base_ir, NULL);
336
337      f->add_signature(callee);
338
339      add_function(f);
340   }
341
342   this->progress = true;
343   return lower_op_to_function_call(this->base_ir, ir, callee);
344}
345
346void
347lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
348{
349   if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression)
350      return;
351
352   ir_expression *const ir = (*rvalue)->as_expression();
353   assert(ir != NULL);
354
355   switch (ir->operation) {
356   case ir_binop_div:
357      if (lowering(DIV64)) {
358         if (ir->type->base_type == GLSL_TYPE_UINT64) {
359            *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64);
360         } else {
361            *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64);
362         }
363      }
364      break;
365
366   case ir_binop_mod:
367      if (lowering(MOD64)) {
368         if (ir->type->base_type == GLSL_TYPE_UINT64) {
369            *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64);
370         } else {
371            *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64);
372         }
373      }
374      break;
375
376   default:
377      break;
378   }
379}
380