1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2014 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci/** @file brw_fs_combine_constants.cpp
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci * This file contains the opt_combine_constants() pass that runs after the
27bf215546Sopenharmony_ci * regular optimization loop. It passes over the instruction list and
28bf215546Sopenharmony_ci * selectively promotes immediate values to registers by emitting a mov(1)
29bf215546Sopenharmony_ci * instruction.
30bf215546Sopenharmony_ci *
31bf215546Sopenharmony_ci * This is useful on Gen 7 particularly, because a few instructions can be
32bf215546Sopenharmony_ci * coissued (i.e., issued in the same cycle as another thread on the same EU
33bf215546Sopenharmony_ci * issues an instruction) under some circumstances, one of which is that they
34bf215546Sopenharmony_ci * cannot use immediate values.
35bf215546Sopenharmony_ci */
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci#include "brw_fs.h"
38bf215546Sopenharmony_ci#include "brw_cfg.h"
39bf215546Sopenharmony_ci#include "util/half_float.h"
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_ciusing namespace brw;
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_cistatic const bool debug = false;
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci/* Returns whether an instruction could co-issue if its immediate source were
46bf215546Sopenharmony_ci * replaced with a GRF source.
47bf215546Sopenharmony_ci */
48bf215546Sopenharmony_cistatic bool
49bf215546Sopenharmony_cicould_coissue(const struct intel_device_info *devinfo, const fs_inst *inst)
50bf215546Sopenharmony_ci{
51bf215546Sopenharmony_ci   if (devinfo->ver != 7)
52bf215546Sopenharmony_ci      return false;
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci   switch (inst->opcode) {
55bf215546Sopenharmony_ci   case BRW_OPCODE_MOV:
56bf215546Sopenharmony_ci   case BRW_OPCODE_CMP:
57bf215546Sopenharmony_ci   case BRW_OPCODE_ADD:
58bf215546Sopenharmony_ci   case BRW_OPCODE_MUL:
59bf215546Sopenharmony_ci      /* Only float instructions can coissue.  We don't have a great
60bf215546Sopenharmony_ci       * understanding of whether or not something like float(int(a) + int(b))
61bf215546Sopenharmony_ci       * would be considered float (based on the destination type) or integer
62bf215546Sopenharmony_ci       * (based on the source types), so we take the conservative choice of
63bf215546Sopenharmony_ci       * only promoting when both destination and source are float.
64bf215546Sopenharmony_ci       */
65bf215546Sopenharmony_ci      return inst->dst.type == BRW_REGISTER_TYPE_F &&
66bf215546Sopenharmony_ci             inst->src[0].type == BRW_REGISTER_TYPE_F;
67bf215546Sopenharmony_ci   default:
68bf215546Sopenharmony_ci      return false;
69bf215546Sopenharmony_ci   }
70bf215546Sopenharmony_ci}
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci/**
73bf215546Sopenharmony_ci * Returns true for instructions that don't support immediate sources.
74bf215546Sopenharmony_ci */
75bf215546Sopenharmony_cistatic bool
76bf215546Sopenharmony_cimust_promote_imm(const struct intel_device_info *devinfo, const fs_inst *inst)
77bf215546Sopenharmony_ci{
78bf215546Sopenharmony_ci   switch (inst->opcode) {
79bf215546Sopenharmony_ci   case SHADER_OPCODE_POW:
80bf215546Sopenharmony_ci      return devinfo->ver < 8;
81bf215546Sopenharmony_ci   case BRW_OPCODE_MAD:
82bf215546Sopenharmony_ci   case BRW_OPCODE_ADD3:
83bf215546Sopenharmony_ci   case BRW_OPCODE_LRP:
84bf215546Sopenharmony_ci      return true;
85bf215546Sopenharmony_ci   default:
86bf215546Sopenharmony_ci      return false;
87bf215546Sopenharmony_ci   }
88bf215546Sopenharmony_ci}
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci/** A box for putting fs_regs in a linked list. */
91bf215546Sopenharmony_cistruct reg_link {
92bf215546Sopenharmony_ci   DECLARE_RALLOC_CXX_OPERATORS(reg_link)
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci   reg_link(fs_reg *reg) : reg(reg) {}
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci   struct exec_node link;
97bf215546Sopenharmony_ci   fs_reg *reg;
98bf215546Sopenharmony_ci};
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_cistatic struct exec_node *
101bf215546Sopenharmony_cilink(void *mem_ctx, fs_reg *reg)
102bf215546Sopenharmony_ci{
103bf215546Sopenharmony_ci   reg_link *l = new(mem_ctx) reg_link(reg);
104bf215546Sopenharmony_ci   return &l->link;
105bf215546Sopenharmony_ci}
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci/**
108bf215546Sopenharmony_ci * Information about an immediate value.
109bf215546Sopenharmony_ci */
110bf215546Sopenharmony_cistruct imm {
111bf215546Sopenharmony_ci   /** The common ancestor of all blocks using this immediate value. */
112bf215546Sopenharmony_ci   bblock_t *block;
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci   /**
115bf215546Sopenharmony_ci    * The instruction generating the immediate value, if all uses are contained
116bf215546Sopenharmony_ci    * within a single basic block. Otherwise, NULL.
117bf215546Sopenharmony_ci    */
118bf215546Sopenharmony_ci   fs_inst *inst;
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci   /**
121bf215546Sopenharmony_ci    * A list of fs_regs that refer to this immediate.  If we promote it, we'll
122bf215546Sopenharmony_ci    * have to patch these up to refer to the new GRF.
123bf215546Sopenharmony_ci    */
124bf215546Sopenharmony_ci   exec_list *uses;
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci   /** The immediate value */
127bf215546Sopenharmony_ci   union {
128bf215546Sopenharmony_ci      char bytes[8];
129bf215546Sopenharmony_ci      double df;
130bf215546Sopenharmony_ci      int64_t d64;
131bf215546Sopenharmony_ci      float f;
132bf215546Sopenharmony_ci      int32_t d;
133bf215546Sopenharmony_ci      int16_t w;
134bf215546Sopenharmony_ci   };
135bf215546Sopenharmony_ci   uint8_t size;
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci   /** When promoting half-float we need to account for certain restrictions */
138bf215546Sopenharmony_ci   bool is_half_float;
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   /**
141bf215546Sopenharmony_ci    * The GRF register and subregister number where we've decided to store the
142bf215546Sopenharmony_ci    * constant value.
143bf215546Sopenharmony_ci    */
144bf215546Sopenharmony_ci   uint8_t subreg_offset;
145bf215546Sopenharmony_ci   uint16_t nr;
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci   /** The number of coissuable instructions using this immediate. */
148bf215546Sopenharmony_ci   uint16_t uses_by_coissue;
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci   /**
151bf215546Sopenharmony_ci    * Whether this constant is used by an instruction that can't handle an
152bf215546Sopenharmony_ci    * immediate source (and already has to be promoted to a GRF).
153bf215546Sopenharmony_ci    */
154bf215546Sopenharmony_ci   bool must_promote;
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci   uint16_t first_use_ip;
157bf215546Sopenharmony_ci   uint16_t last_use_ip;
158bf215546Sopenharmony_ci};
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci/** The working set of information about immediates. */
161bf215546Sopenharmony_cistruct table {
162bf215546Sopenharmony_ci   struct imm *imm;
163bf215546Sopenharmony_ci   int size;
164bf215546Sopenharmony_ci   int len;
165bf215546Sopenharmony_ci};
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_cistatic struct imm *
168bf215546Sopenharmony_cifind_imm(struct table *table, void *data, uint8_t size)
169bf215546Sopenharmony_ci{
170bf215546Sopenharmony_ci   for (int i = 0; i < table->len; i++) {
171bf215546Sopenharmony_ci      if (table->imm[i].size == size &&
172bf215546Sopenharmony_ci          !memcmp(table->imm[i].bytes, data, size)) {
173bf215546Sopenharmony_ci         return &table->imm[i];
174bf215546Sopenharmony_ci      }
175bf215546Sopenharmony_ci   }
176bf215546Sopenharmony_ci   return NULL;
177bf215546Sopenharmony_ci}
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_cistatic struct imm *
180bf215546Sopenharmony_cinew_imm(struct table *table, void *mem_ctx)
181bf215546Sopenharmony_ci{
182bf215546Sopenharmony_ci   if (table->len == table->size) {
183bf215546Sopenharmony_ci      table->size *= 2;
184bf215546Sopenharmony_ci      table->imm = reralloc(mem_ctx, table->imm, struct imm, table->size);
185bf215546Sopenharmony_ci   }
186bf215546Sopenharmony_ci   return &table->imm[table->len++];
187bf215546Sopenharmony_ci}
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci/**
190bf215546Sopenharmony_ci * Comparator used for sorting an array of imm structures.
191bf215546Sopenharmony_ci *
192bf215546Sopenharmony_ci * We sort by basic block number, then last use IP, then first use IP (least
193bf215546Sopenharmony_ci * to greatest). This sorting causes immediates live in the same area to be
194bf215546Sopenharmony_ci * allocated to the same register in the hopes that all values will be dead
195bf215546Sopenharmony_ci * about the same time and the register can be reused.
196bf215546Sopenharmony_ci */
197bf215546Sopenharmony_cistatic int
198bf215546Sopenharmony_cicompare(const void *_a, const void *_b)
199bf215546Sopenharmony_ci{
200bf215546Sopenharmony_ci   const struct imm *a = (const struct imm *)_a,
201bf215546Sopenharmony_ci                    *b = (const struct imm *)_b;
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   int block_diff = a->block->num - b->block->num;
204bf215546Sopenharmony_ci   if (block_diff)
205bf215546Sopenharmony_ci      return block_diff;
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci   int end_diff = a->last_use_ip - b->last_use_ip;
208bf215546Sopenharmony_ci   if (end_diff)
209bf215546Sopenharmony_ci      return end_diff;
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_ci   return a->first_use_ip - b->first_use_ip;
212bf215546Sopenharmony_ci}
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_cistatic bool
215bf215546Sopenharmony_ciget_constant_value(const struct intel_device_info *devinfo,
216bf215546Sopenharmony_ci                   const fs_inst *inst, uint32_t src_idx,
217bf215546Sopenharmony_ci                   void *out, brw_reg_type *out_type)
218bf215546Sopenharmony_ci{
219bf215546Sopenharmony_ci   const bool can_do_source_mods = inst->can_do_source_mods(devinfo);
220bf215546Sopenharmony_ci   const fs_reg *src = &inst->src[src_idx];
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci   *out_type = src->type;
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci   switch (*out_type) {
225bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_DF: {
226bf215546Sopenharmony_ci      double val = !can_do_source_mods ? src->df : fabs(src->df);
227bf215546Sopenharmony_ci      memcpy(out, &val, 8);
228bf215546Sopenharmony_ci      break;
229bf215546Sopenharmony_ci   }
230bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_F: {
231bf215546Sopenharmony_ci      float val = !can_do_source_mods ? src->f : fabsf(src->f);
232bf215546Sopenharmony_ci      memcpy(out, &val, 4);
233bf215546Sopenharmony_ci      break;
234bf215546Sopenharmony_ci   }
235bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_HF: {
236bf215546Sopenharmony_ci      uint16_t val = src->d & 0xffffu;
237bf215546Sopenharmony_ci      if (can_do_source_mods)
238bf215546Sopenharmony_ci         val = _mesa_float_to_half(fabsf(_mesa_half_to_float(val)));
239bf215546Sopenharmony_ci      memcpy(out, &val, 2);
240bf215546Sopenharmony_ci      break;
241bf215546Sopenharmony_ci   }
242bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_Q: {
243bf215546Sopenharmony_ci      int64_t val = !can_do_source_mods ? src->d64 : llabs(src->d64);
244bf215546Sopenharmony_ci      memcpy(out, &val, 8);
245bf215546Sopenharmony_ci      break;
246bf215546Sopenharmony_ci   }
247bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_UQ:
248bf215546Sopenharmony_ci      memcpy(out, &src->u64, 8);
249bf215546Sopenharmony_ci      break;
250bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_D: {
251bf215546Sopenharmony_ci      int32_t val = !can_do_source_mods ? src->d : abs(src->d);
252bf215546Sopenharmony_ci      memcpy(out, &val, 4);
253bf215546Sopenharmony_ci      break;
254bf215546Sopenharmony_ci   }
255bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_UD:
256bf215546Sopenharmony_ci      memcpy(out, &src->ud, 4);
257bf215546Sopenharmony_ci      break;
258bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_W: {
259bf215546Sopenharmony_ci      int16_t val = src->d & 0xffffu;
260bf215546Sopenharmony_ci      if (can_do_source_mods)
261bf215546Sopenharmony_ci         val = abs(val);
262bf215546Sopenharmony_ci      memcpy(out, &val, 2);
263bf215546Sopenharmony_ci      break;
264bf215546Sopenharmony_ci   }
265bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_UW:
266bf215546Sopenharmony_ci      memcpy(out, &src->ud, 2);
267bf215546Sopenharmony_ci      break;
268bf215546Sopenharmony_ci   default:
269bf215546Sopenharmony_ci      return false;
270bf215546Sopenharmony_ci   };
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci   return true;
273bf215546Sopenharmony_ci}
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_cistatic struct brw_reg
276bf215546Sopenharmony_cibuild_imm_reg_for_copy(struct imm *imm)
277bf215546Sopenharmony_ci{
278bf215546Sopenharmony_ci   switch (imm->size) {
279bf215546Sopenharmony_ci   case 8:
280bf215546Sopenharmony_ci      return brw_imm_d(imm->d64);
281bf215546Sopenharmony_ci   case 4:
282bf215546Sopenharmony_ci      return brw_imm_d(imm->d);
283bf215546Sopenharmony_ci   case 2:
284bf215546Sopenharmony_ci      return brw_imm_w(imm->w);
285bf215546Sopenharmony_ci   default:
286bf215546Sopenharmony_ci      unreachable("not implemented");
287bf215546Sopenharmony_ci   }
288bf215546Sopenharmony_ci}
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_cistatic inline uint32_t
291bf215546Sopenharmony_ciget_alignment_for_imm(const struct imm *imm)
292bf215546Sopenharmony_ci{
293bf215546Sopenharmony_ci   if (imm->is_half_float)
294bf215546Sopenharmony_ci      return 4; /* At least MAD seems to require this */
295bf215546Sopenharmony_ci   else
296bf215546Sopenharmony_ci      return imm->size;
297bf215546Sopenharmony_ci}
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_cistatic bool
300bf215546Sopenharmony_cineeds_negate(const fs_reg *reg, const struct imm *imm)
301bf215546Sopenharmony_ci{
302bf215546Sopenharmony_ci   switch (reg->type) {
303bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_DF:
304bf215546Sopenharmony_ci      return signbit(reg->df) != signbit(imm->df);
305bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_F:
306bf215546Sopenharmony_ci      return signbit(reg->f) != signbit(imm->f);
307bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_Q:
308bf215546Sopenharmony_ci      return (reg->d64 < 0) != (imm->d64 < 0);
309bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_D:
310bf215546Sopenharmony_ci      return (reg->d < 0) != (imm->d < 0);
311bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_HF:
312bf215546Sopenharmony_ci      return (reg->d & 0x8000u) != (imm->w & 0x8000u);
313bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_W:
314bf215546Sopenharmony_ci      return ((int16_t)reg->d < 0) != (imm->w < 0);
315bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_UQ:
316bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_UD:
317bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_UW:
318bf215546Sopenharmony_ci      return false;
319bf215546Sopenharmony_ci   default:
320bf215546Sopenharmony_ci      unreachable("not implemented");
321bf215546Sopenharmony_ci   };
322bf215546Sopenharmony_ci}
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_cistatic bool
325bf215546Sopenharmony_cirepresentable_as_hf(float f, uint16_t *hf)
326bf215546Sopenharmony_ci{
327bf215546Sopenharmony_ci   union fi u;
328bf215546Sopenharmony_ci   uint16_t h = _mesa_float_to_half(f);
329bf215546Sopenharmony_ci   u.f = _mesa_half_to_float(h);
330bf215546Sopenharmony_ci
331bf215546Sopenharmony_ci   if (u.f == f) {
332bf215546Sopenharmony_ci      *hf = h;
333bf215546Sopenharmony_ci      return true;
334bf215546Sopenharmony_ci   }
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci   return false;
337bf215546Sopenharmony_ci}
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_cistatic bool
340bf215546Sopenharmony_cirepresentable_as_w(int d, int16_t *w)
341bf215546Sopenharmony_ci{
342bf215546Sopenharmony_ci   int res = ((d & 0xffff8000) + 0x8000) & 0xffff7fff;
343bf215546Sopenharmony_ci   if (!res) {
344bf215546Sopenharmony_ci      *w = d;
345bf215546Sopenharmony_ci      return true;
346bf215546Sopenharmony_ci   }
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci   return false;
349bf215546Sopenharmony_ci}
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_cistatic bool
352bf215546Sopenharmony_cirepresentable_as_uw(unsigned ud, uint16_t *uw)
353bf215546Sopenharmony_ci{
354bf215546Sopenharmony_ci   if (!(ud & 0xffff0000)) {
355bf215546Sopenharmony_ci      *uw = ud;
356bf215546Sopenharmony_ci      return true;
357bf215546Sopenharmony_ci   }
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci   return false;
360bf215546Sopenharmony_ci}
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_cistatic bool
363bf215546Sopenharmony_cisupports_src_as_imm(const struct intel_device_info *devinfo, enum opcode op)
364bf215546Sopenharmony_ci{
365bf215546Sopenharmony_ci   switch (op) {
366bf215546Sopenharmony_ci   case BRW_OPCODE_ADD3:
367bf215546Sopenharmony_ci      return devinfo->verx10 >= 125;
368bf215546Sopenharmony_ci   case BRW_OPCODE_MAD:
369bf215546Sopenharmony_ci      return devinfo->ver == 12 && devinfo->verx10 < 125;
370bf215546Sopenharmony_ci   default:
371bf215546Sopenharmony_ci      return false;
372bf215546Sopenharmony_ci   }
373bf215546Sopenharmony_ci}
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_cistatic bool
376bf215546Sopenharmony_cican_promote_src_as_imm(const struct intel_device_info *devinfo, fs_inst *inst,
377bf215546Sopenharmony_ci                       unsigned src_idx)
378bf215546Sopenharmony_ci{
379bf215546Sopenharmony_ci   bool can_promote = false;
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci   /* Experiment shows that we can only support src0 as immediate */
382bf215546Sopenharmony_ci   if (src_idx != 0)
383bf215546Sopenharmony_ci      return false;
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci   if (!supports_src_as_imm(devinfo, inst->opcode))
386bf215546Sopenharmony_ci      return false;
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci   /* TODO - Fix the codepath below to use a bfloat16 immediate on XeHP,
389bf215546Sopenharmony_ci    *        since HF/F mixed mode has been removed from the hardware.
390bf215546Sopenharmony_ci    */
391bf215546Sopenharmony_ci   switch (inst->src[src_idx].type) {
392bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_F: {
393bf215546Sopenharmony_ci      uint16_t hf;
394bf215546Sopenharmony_ci      if (representable_as_hf(inst->src[src_idx].f, &hf)) {
395bf215546Sopenharmony_ci         inst->src[src_idx] = retype(brw_imm_uw(hf), BRW_REGISTER_TYPE_HF);
396bf215546Sopenharmony_ci         can_promote = true;
397bf215546Sopenharmony_ci      }
398bf215546Sopenharmony_ci      break;
399bf215546Sopenharmony_ci   }
400bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_W: {
401bf215546Sopenharmony_ci      int16_t w;
402bf215546Sopenharmony_ci      if (representable_as_w(inst->src[src_idx].d, &w)) {
403bf215546Sopenharmony_ci         inst->src[src_idx] = brw_imm_w(w);
404bf215546Sopenharmony_ci         can_promote = true;
405bf215546Sopenharmony_ci      }
406bf215546Sopenharmony_ci      break;
407bf215546Sopenharmony_ci   }
408bf215546Sopenharmony_ci   case BRW_REGISTER_TYPE_UW: {
409bf215546Sopenharmony_ci      uint16_t uw;
410bf215546Sopenharmony_ci      if (representable_as_uw(inst->src[src_idx].ud, &uw)) {
411bf215546Sopenharmony_ci         inst->src[src_idx] = brw_imm_uw(uw);
412bf215546Sopenharmony_ci         can_promote = true;
413bf215546Sopenharmony_ci      }
414bf215546Sopenharmony_ci      break;
415bf215546Sopenharmony_ci   }
416bf215546Sopenharmony_ci   default:
417bf215546Sopenharmony_ci      break;
418bf215546Sopenharmony_ci   }
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci   return can_promote;
421bf215546Sopenharmony_ci}
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_cibool
424bf215546Sopenharmony_cifs_visitor::opt_combine_constants()
425bf215546Sopenharmony_ci{
426bf215546Sopenharmony_ci   void *const_ctx = ralloc_context(NULL);
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci   struct table table;
429bf215546Sopenharmony_ci   table.size = 8;
430bf215546Sopenharmony_ci   table.len = 0;
431bf215546Sopenharmony_ci   table.imm = ralloc_array(const_ctx, struct imm, table.size);
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_ci   const brw::idom_tree &idom = idom_analysis.require();
434bf215546Sopenharmony_ci   unsigned ip = -1;
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci   /* Make a pass through all instructions and count the number of times each
437bf215546Sopenharmony_ci    * constant is used by coissueable instructions or instructions that cannot
438bf215546Sopenharmony_ci    * take immediate arguments.
439bf215546Sopenharmony_ci    */
440bf215546Sopenharmony_ci   foreach_block_and_inst(block, fs_inst, inst, cfg) {
441bf215546Sopenharmony_ci      ip++;
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_ci      if (!could_coissue(devinfo, inst) && !must_promote_imm(devinfo, inst))
444bf215546Sopenharmony_ci         continue;
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci      for (int i = 0; i < inst->sources; i++) {
447bf215546Sopenharmony_ci         if (inst->src[i].file != IMM)
448bf215546Sopenharmony_ci            continue;
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci         if (can_promote_src_as_imm(devinfo, inst, i))
451bf215546Sopenharmony_ci            continue;
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci         char data[8];
454bf215546Sopenharmony_ci         brw_reg_type type;
455bf215546Sopenharmony_ci         if (!get_constant_value(devinfo, inst, i, data, &type))
456bf215546Sopenharmony_ci            continue;
457bf215546Sopenharmony_ci
458bf215546Sopenharmony_ci         uint8_t size = type_sz(type);
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci         struct imm *imm = find_imm(&table, data, size);
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci         if (imm) {
463bf215546Sopenharmony_ci            bblock_t *intersection = idom.intersect(block, imm->block);
464bf215546Sopenharmony_ci            if (intersection != imm->block)
465bf215546Sopenharmony_ci               imm->inst = NULL;
466bf215546Sopenharmony_ci            imm->block = intersection;
467bf215546Sopenharmony_ci            imm->uses->push_tail(link(const_ctx, &inst->src[i]));
468bf215546Sopenharmony_ci            imm->uses_by_coissue += could_coissue(devinfo, inst);
469bf215546Sopenharmony_ci            imm->must_promote = imm->must_promote || must_promote_imm(devinfo, inst);
470bf215546Sopenharmony_ci            imm->last_use_ip = ip;
471bf215546Sopenharmony_ci            if (type == BRW_REGISTER_TYPE_HF)
472bf215546Sopenharmony_ci               imm->is_half_float = true;
473bf215546Sopenharmony_ci         } else {
474bf215546Sopenharmony_ci            imm = new_imm(&table, const_ctx);
475bf215546Sopenharmony_ci            imm->block = block;
476bf215546Sopenharmony_ci            imm->inst = inst;
477bf215546Sopenharmony_ci            imm->uses = new(const_ctx) exec_list();
478bf215546Sopenharmony_ci            imm->uses->push_tail(link(const_ctx, &inst->src[i]));
479bf215546Sopenharmony_ci            memcpy(imm->bytes, data, size);
480bf215546Sopenharmony_ci            imm->size = size;
481bf215546Sopenharmony_ci            imm->is_half_float = type == BRW_REGISTER_TYPE_HF;
482bf215546Sopenharmony_ci            imm->uses_by_coissue = could_coissue(devinfo, inst);
483bf215546Sopenharmony_ci            imm->must_promote = must_promote_imm(devinfo, inst);
484bf215546Sopenharmony_ci            imm->first_use_ip = ip;
485bf215546Sopenharmony_ci            imm->last_use_ip = ip;
486bf215546Sopenharmony_ci         }
487bf215546Sopenharmony_ci      }
488bf215546Sopenharmony_ci   }
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci   /* Remove constants from the table that don't have enough uses to make them
491bf215546Sopenharmony_ci    * profitable to store in a register.
492bf215546Sopenharmony_ci    */
493bf215546Sopenharmony_ci   for (int i = 0; i < table.len;) {
494bf215546Sopenharmony_ci      struct imm *imm = &table.imm[i];
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci      if (!imm->must_promote && imm->uses_by_coissue < 4) {
497bf215546Sopenharmony_ci         table.imm[i] = table.imm[table.len - 1];
498bf215546Sopenharmony_ci         table.len--;
499bf215546Sopenharmony_ci         continue;
500bf215546Sopenharmony_ci      }
501bf215546Sopenharmony_ci      i++;
502bf215546Sopenharmony_ci   }
503bf215546Sopenharmony_ci   if (table.len == 0) {
504bf215546Sopenharmony_ci      ralloc_free(const_ctx);
505bf215546Sopenharmony_ci      return false;
506bf215546Sopenharmony_ci   }
507bf215546Sopenharmony_ci   if (cfg->num_blocks != 1)
508bf215546Sopenharmony_ci      qsort(table.imm, table.len, sizeof(struct imm), compare);
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci   /* Insert MOVs to load the constant values into GRFs. */
511bf215546Sopenharmony_ci   fs_reg reg(VGRF, alloc.allocate(1));
512bf215546Sopenharmony_ci   reg.stride = 0;
513bf215546Sopenharmony_ci   for (int i = 0; i < table.len; i++) {
514bf215546Sopenharmony_ci      struct imm *imm = &table.imm[i];
515bf215546Sopenharmony_ci      /* Insert it either before the instruction that generated the immediate
516bf215546Sopenharmony_ci       * or after the last non-control flow instruction of the common ancestor.
517bf215546Sopenharmony_ci       */
518bf215546Sopenharmony_ci      exec_node *n = (imm->inst ? imm->inst :
519bf215546Sopenharmony_ci                      imm->block->last_non_control_flow_inst()->next);
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_ci      /* From the BDW and CHV PRM, 3D Media GPGPU, Special Restrictions:
522bf215546Sopenharmony_ci       *
523bf215546Sopenharmony_ci       *   "In Align16 mode, the channel selects and channel enables apply to a
524bf215546Sopenharmony_ci       *    pair of half-floats, because these parameters are defined for DWord
525bf215546Sopenharmony_ci       *    elements ONLY. This is applicable when both source and destination
526bf215546Sopenharmony_ci       *    are half-floats."
527bf215546Sopenharmony_ci       *
528bf215546Sopenharmony_ci       * This means that Align16 instructions that use promoted HF immediates
529bf215546Sopenharmony_ci       * and use a <0,1,0>:HF region would read 2 HF slots instead of
530bf215546Sopenharmony_ci       * replicating the single one we want. To avoid this, we always populate
531bf215546Sopenharmony_ci       * both HF slots within a DWord with the constant.
532bf215546Sopenharmony_ci       */
533bf215546Sopenharmony_ci      const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1;
534bf215546Sopenharmony_ci      const fs_builder ibld = bld.at(imm->block, n).exec_all().group(width, 0);
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci      /* Put the immediate in an offset aligned to its size. Some instructions
537bf215546Sopenharmony_ci       * seem to have additional alignment requirements, so account for that
538bf215546Sopenharmony_ci       * too.
539bf215546Sopenharmony_ci       */
540bf215546Sopenharmony_ci      reg.offset = ALIGN(reg.offset, get_alignment_for_imm(imm));
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci      /* Ensure we have enough space in the register to copy the immediate */
543bf215546Sopenharmony_ci      struct brw_reg imm_reg = build_imm_reg_for_copy(imm);
544bf215546Sopenharmony_ci      if (reg.offset + type_sz(imm_reg.type) * width > REG_SIZE) {
545bf215546Sopenharmony_ci         reg.nr = alloc.allocate(1);
546bf215546Sopenharmony_ci         reg.offset = 0;
547bf215546Sopenharmony_ci      }
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci      ibld.MOV(retype(reg, imm_reg.type), imm_reg);
550bf215546Sopenharmony_ci      imm->nr = reg.nr;
551bf215546Sopenharmony_ci      imm->subreg_offset = reg.offset;
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_ci      reg.offset += imm->size * width;
554bf215546Sopenharmony_ci   }
555bf215546Sopenharmony_ci   shader_stats.promoted_constants = table.len;
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_ci   /* Rewrite the immediate sources to refer to the new GRFs. */
558bf215546Sopenharmony_ci   for (int i = 0; i < table.len; i++) {
559bf215546Sopenharmony_ci      foreach_list_typed(reg_link, link, link, table.imm[i].uses) {
560bf215546Sopenharmony_ci         fs_reg *reg = link->reg;
561bf215546Sopenharmony_ci#ifdef DEBUG
562bf215546Sopenharmony_ci         switch (reg->type) {
563bf215546Sopenharmony_ci         case BRW_REGISTER_TYPE_DF:
564bf215546Sopenharmony_ci            assert((isnan(reg->df) && isnan(table.imm[i].df)) ||
565bf215546Sopenharmony_ci                   (fabs(reg->df) == fabs(table.imm[i].df)));
566bf215546Sopenharmony_ci            break;
567bf215546Sopenharmony_ci         case BRW_REGISTER_TYPE_F:
568bf215546Sopenharmony_ci            assert((isnan(reg->f) && isnan(table.imm[i].f)) ||
569bf215546Sopenharmony_ci                   (fabsf(reg->f) == fabsf(table.imm[i].f)));
570bf215546Sopenharmony_ci            break;
571bf215546Sopenharmony_ci         case BRW_REGISTER_TYPE_HF:
572bf215546Sopenharmony_ci            assert((isnan(_mesa_half_to_float(reg->d & 0xffffu)) &&
573bf215546Sopenharmony_ci                    isnan(_mesa_half_to_float(table.imm[i].w))) ||
574bf215546Sopenharmony_ci                   (fabsf(_mesa_half_to_float(reg->d & 0xffffu)) ==
575bf215546Sopenharmony_ci                    fabsf(_mesa_half_to_float(table.imm[i].w))));
576bf215546Sopenharmony_ci            break;
577bf215546Sopenharmony_ci         case BRW_REGISTER_TYPE_Q:
578bf215546Sopenharmony_ci            assert(abs(reg->d64) == abs(table.imm[i].d64));
579bf215546Sopenharmony_ci            break;
580bf215546Sopenharmony_ci         case BRW_REGISTER_TYPE_UQ:
581bf215546Sopenharmony_ci            assert(reg->d64 == table.imm[i].d64);
582bf215546Sopenharmony_ci            break;
583bf215546Sopenharmony_ci         case BRW_REGISTER_TYPE_D:
584bf215546Sopenharmony_ci            assert(abs(reg->d) == abs(table.imm[i].d));
585bf215546Sopenharmony_ci            break;
586bf215546Sopenharmony_ci         case BRW_REGISTER_TYPE_UD:
587bf215546Sopenharmony_ci            assert(reg->d == table.imm[i].d);
588bf215546Sopenharmony_ci            break;
589bf215546Sopenharmony_ci         case BRW_REGISTER_TYPE_W:
590bf215546Sopenharmony_ci            assert(abs((int16_t) (reg->d & 0xffff)) == table.imm[i].w);
591bf215546Sopenharmony_ci            break;
592bf215546Sopenharmony_ci         case BRW_REGISTER_TYPE_UW:
593bf215546Sopenharmony_ci            assert((reg->ud & 0xffffu) == (uint16_t) table.imm[i].w);
594bf215546Sopenharmony_ci            break;
595bf215546Sopenharmony_ci         default:
596bf215546Sopenharmony_ci            break;
597bf215546Sopenharmony_ci         }
598bf215546Sopenharmony_ci#endif
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci         reg->file = VGRF;
601bf215546Sopenharmony_ci         reg->offset = table.imm[i].subreg_offset;
602bf215546Sopenharmony_ci         reg->stride = 0;
603bf215546Sopenharmony_ci         reg->negate = needs_negate(reg, &table.imm[i]);
604bf215546Sopenharmony_ci         reg->nr = table.imm[i].nr;
605bf215546Sopenharmony_ci      }
606bf215546Sopenharmony_ci   }
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci   if (debug) {
609bf215546Sopenharmony_ci      for (int i = 0; i < table.len; i++) {
610bf215546Sopenharmony_ci         struct imm *imm = &table.imm[i];
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci         printf("0x%016" PRIx64 " - block %3d, reg %3d sub %2d, "
613bf215546Sopenharmony_ci                "Uses: (%2d, %2d), IP: %4d to %4d, length %4d\n",
614bf215546Sopenharmony_ci                (uint64_t)(imm->d & BITFIELD64_MASK(imm->size * 8)),
615bf215546Sopenharmony_ci                imm->block->num,
616bf215546Sopenharmony_ci                imm->nr,
617bf215546Sopenharmony_ci                imm->subreg_offset,
618bf215546Sopenharmony_ci                imm->must_promote,
619bf215546Sopenharmony_ci                imm->uses_by_coissue,
620bf215546Sopenharmony_ci                imm->first_use_ip,
621bf215546Sopenharmony_ci                imm->last_use_ip,
622bf215546Sopenharmony_ci                imm->last_use_ip - imm->first_use_ip);
623bf215546Sopenharmony_ci      }
624bf215546Sopenharmony_ci   }
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci   ralloc_free(const_ctx);
627bf215546Sopenharmony_ci   invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
628bf215546Sopenharmony_ci
629bf215546Sopenharmony_ci   return true;
630bf215546Sopenharmony_ci}
631