1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#ifndef ACO_IR_H
26bf215546Sopenharmony_ci#define ACO_IR_H
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include "aco_opcodes.h"
29bf215546Sopenharmony_ci#include "aco_util.h"
30bf215546Sopenharmony_ci#include "aco_interface.h"
31bf215546Sopenharmony_ci#include "aco_shader_info.h"
32bf215546Sopenharmony_ci#include "vulkan/radv_shader.h"
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci#include "nir.h"
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci#include <bitset>
37bf215546Sopenharmony_ci#include <memory>
38bf215546Sopenharmony_ci#include <vector>
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cistruct radv_shader_args;
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_cinamespace aco {
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ciextern uint64_t debug_flags;
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_cienum {
47bf215546Sopenharmony_ci   DEBUG_VALIDATE_IR = 0x1,
48bf215546Sopenharmony_ci   DEBUG_VALIDATE_RA = 0x2,
49bf215546Sopenharmony_ci   DEBUG_PERFWARN = 0x4,
50bf215546Sopenharmony_ci   DEBUG_FORCE_WAITCNT = 0x8,
51bf215546Sopenharmony_ci   DEBUG_NO_VN = 0x10,
52bf215546Sopenharmony_ci   DEBUG_NO_OPT = 0x20,
53bf215546Sopenharmony_ci   DEBUG_NO_SCHED = 0x40,
54bf215546Sopenharmony_ci   DEBUG_PERF_INFO = 0x80,
55bf215546Sopenharmony_ci   DEBUG_LIVE_INFO = 0x100,
56bf215546Sopenharmony_ci};
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci/**
59bf215546Sopenharmony_ci * Representation of the instruction's microcode encoding format
60bf215546Sopenharmony_ci * Note: Some Vector ALU Formats can be combined, such that:
61bf215546Sopenharmony_ci * - VOP2* | VOP3 represents a VOP2 instruction in VOP3 encoding
62bf215546Sopenharmony_ci * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
63bf215546Sopenharmony_ci * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
64bf215546Sopenharmony_ci *
65bf215546Sopenharmony_ci * (*) The same is applicable for VOP1 and VOPC instructions.
66bf215546Sopenharmony_ci */
67bf215546Sopenharmony_cienum class Format : std::uint16_t {
68bf215546Sopenharmony_ci   /* Pseudo Instruction Format */
69bf215546Sopenharmony_ci   PSEUDO = 0,
70bf215546Sopenharmony_ci   /* Scalar ALU & Control Formats */
71bf215546Sopenharmony_ci   SOP1 = 1,
72bf215546Sopenharmony_ci   SOP2 = 2,
73bf215546Sopenharmony_ci   SOPK = 3,
74bf215546Sopenharmony_ci   SOPP = 4,
75bf215546Sopenharmony_ci   SOPC = 5,
76bf215546Sopenharmony_ci   /* Scalar Memory Format */
77bf215546Sopenharmony_ci   SMEM = 6,
78bf215546Sopenharmony_ci   /* LDS/GDS Format */
79bf215546Sopenharmony_ci   DS = 8,
80bf215546Sopenharmony_ci   /* Vector Memory Buffer Formats */
81bf215546Sopenharmony_ci   MTBUF = 9,
82bf215546Sopenharmony_ci   MUBUF = 10,
83bf215546Sopenharmony_ci   /* Vector Memory Image Format */
84bf215546Sopenharmony_ci   MIMG = 11,
85bf215546Sopenharmony_ci   /* Export Format */
86bf215546Sopenharmony_ci   EXP = 12,
87bf215546Sopenharmony_ci   /* Flat Formats */
88bf215546Sopenharmony_ci   FLAT = 13,
89bf215546Sopenharmony_ci   GLOBAL = 14,
90bf215546Sopenharmony_ci   SCRATCH = 15,
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci   PSEUDO_BRANCH = 16,
93bf215546Sopenharmony_ci   PSEUDO_BARRIER = 17,
94bf215546Sopenharmony_ci   PSEUDO_REDUCTION = 18,
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci   /* Vector ALU Formats */
97bf215546Sopenharmony_ci   VOP3P = 19,
98bf215546Sopenharmony_ci   VOP1 = 1 << 8,
99bf215546Sopenharmony_ci   VOP2 = 1 << 9,
100bf215546Sopenharmony_ci   VOPC = 1 << 10,
101bf215546Sopenharmony_ci   VOP3 = 1 << 11,
102bf215546Sopenharmony_ci   /* Vector Parameter Interpolation Format */
103bf215546Sopenharmony_ci   VINTRP = 1 << 12,
104bf215546Sopenharmony_ci   DPP16 = 1 << 13,
105bf215546Sopenharmony_ci   SDWA = 1 << 14,
106bf215546Sopenharmony_ci   DPP8 = 1 << 15,
107bf215546Sopenharmony_ci};
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_cienum class instr_class : uint8_t {
110bf215546Sopenharmony_ci   valu32 = 0,
111bf215546Sopenharmony_ci   valu_convert32 = 1,
112bf215546Sopenharmony_ci   valu64 = 2,
113bf215546Sopenharmony_ci   valu_quarter_rate32 = 3,
114bf215546Sopenharmony_ci   valu_fma = 4,
115bf215546Sopenharmony_ci   valu_transcendental32 = 5,
116bf215546Sopenharmony_ci   valu_double = 6,
117bf215546Sopenharmony_ci   valu_double_add = 7,
118bf215546Sopenharmony_ci   valu_double_convert = 8,
119bf215546Sopenharmony_ci   valu_double_transcendental = 9,
120bf215546Sopenharmony_ci   salu = 10,
121bf215546Sopenharmony_ci   smem = 11,
122bf215546Sopenharmony_ci   barrier = 12,
123bf215546Sopenharmony_ci   branch = 13,
124bf215546Sopenharmony_ci   sendmsg = 14,
125bf215546Sopenharmony_ci   ds = 15,
126bf215546Sopenharmony_ci   exp = 16,
127bf215546Sopenharmony_ci   vmem = 17,
128bf215546Sopenharmony_ci   waitcnt = 18,
129bf215546Sopenharmony_ci   other = 19,
130bf215546Sopenharmony_ci   count,
131bf215546Sopenharmony_ci};
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_cienum storage_class : uint8_t {
134bf215546Sopenharmony_ci   storage_none = 0x0,   /* no synchronization and can be reordered around aliasing stores */
135bf215546Sopenharmony_ci   storage_buffer = 0x1, /* SSBOs and global memory */
136bf215546Sopenharmony_ci   storage_atomic_counter = 0x2, /* not used for Vulkan */
137bf215546Sopenharmony_ci   storage_image = 0x4,
138bf215546Sopenharmony_ci   storage_shared = 0x8,       /* or TCS output */
139bf215546Sopenharmony_ci   storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
140bf215546Sopenharmony_ci   storage_task_payload = 0x20,/* Task-Mesh payload */
141bf215546Sopenharmony_ci   storage_scratch = 0x40,
142bf215546Sopenharmony_ci   storage_vgpr_spill = 0x80,
143bf215546Sopenharmony_ci   storage_count = 8, /* not counting storage_none */
144bf215546Sopenharmony_ci};
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_cienum memory_semantics : uint8_t {
147bf215546Sopenharmony_ci   semantic_none = 0x0,
148bf215546Sopenharmony_ci   /* for loads: don't move any access after this load to before this load (even other loads)
149bf215546Sopenharmony_ci    * for barriers: don't move any access after the barrier to before any
150bf215546Sopenharmony_ci    * atomics/control_barriers/sendmsg_gs_done before the barrier */
151bf215546Sopenharmony_ci   semantic_acquire = 0x1,
152bf215546Sopenharmony_ci   /* for stores: don't move any access before this store to after this store
153bf215546Sopenharmony_ci    * for barriers: don't move any access before the barrier to after any
154bf215546Sopenharmony_ci    * atomics/control_barriers/sendmsg_gs_done after the barrier */
155bf215546Sopenharmony_ci   semantic_release = 0x2,
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   /* the rest are for load/stores/atomics only */
158bf215546Sopenharmony_ci   /* cannot be DCE'd or CSE'd */
159bf215546Sopenharmony_ci   semantic_volatile = 0x4,
160bf215546Sopenharmony_ci   /* does not interact with barriers and assumes this lane is the only lane
161bf215546Sopenharmony_ci    * accessing this memory */
162bf215546Sopenharmony_ci   semantic_private = 0x8,
163bf215546Sopenharmony_ci   /* this operation can be reordered around operations of the same storage.
164bf215546Sopenharmony_ci    * says nothing about barriers */
165bf215546Sopenharmony_ci   semantic_can_reorder = 0x10,
166bf215546Sopenharmony_ci   /* this is a atomic instruction (may only read or write memory) */
167bf215546Sopenharmony_ci   semantic_atomic = 0x20,
168bf215546Sopenharmony_ci   /* this is instruction both reads and writes memory */
169bf215546Sopenharmony_ci   semantic_rmw = 0x40,
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   semantic_acqrel = semantic_acquire | semantic_release,
172bf215546Sopenharmony_ci   semantic_atomicrmw = semantic_volatile | semantic_atomic | semantic_rmw,
173bf215546Sopenharmony_ci};
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_cienum sync_scope : uint8_t {
176bf215546Sopenharmony_ci   scope_invocation = 0,
177bf215546Sopenharmony_ci   scope_subgroup = 1,
178bf215546Sopenharmony_ci   scope_workgroup = 2,
179bf215546Sopenharmony_ci   scope_queuefamily = 3,
180bf215546Sopenharmony_ci   scope_device = 4,
181bf215546Sopenharmony_ci};
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_cistruct memory_sync_info {
184bf215546Sopenharmony_ci   memory_sync_info() : storage(storage_none), semantics(semantic_none), scope(scope_invocation) {}
185bf215546Sopenharmony_ci   memory_sync_info(int storage_, int semantics_ = 0, sync_scope scope_ = scope_invocation)
186bf215546Sopenharmony_ci       : storage((storage_class)storage_), semantics((memory_semantics)semantics_), scope(scope_)
187bf215546Sopenharmony_ci   {}
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   storage_class storage : 8;
190bf215546Sopenharmony_ci   memory_semantics semantics : 8;
191bf215546Sopenharmony_ci   sync_scope scope : 8;
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci   bool operator==(const memory_sync_info& rhs) const
194bf215546Sopenharmony_ci   {
195bf215546Sopenharmony_ci      return storage == rhs.storage && semantics == rhs.semantics && scope == rhs.scope;
196bf215546Sopenharmony_ci   }
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci   bool can_reorder() const
199bf215546Sopenharmony_ci   {
200bf215546Sopenharmony_ci      if (semantics & semantic_acqrel)
201bf215546Sopenharmony_ci         return false;
202bf215546Sopenharmony_ci      /* Also check storage so that zero-initialized memory_sync_info can be
203bf215546Sopenharmony_ci       * reordered. */
204bf215546Sopenharmony_ci      return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile);
205bf215546Sopenharmony_ci   }
206bf215546Sopenharmony_ci};
207bf215546Sopenharmony_cistatic_assert(sizeof(memory_sync_info) == 3, "Unexpected padding");
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_cienum fp_round {
210bf215546Sopenharmony_ci   fp_round_ne = 0,
211bf215546Sopenharmony_ci   fp_round_pi = 1,
212bf215546Sopenharmony_ci   fp_round_ni = 2,
213bf215546Sopenharmony_ci   fp_round_tz = 3,
214bf215546Sopenharmony_ci};
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_cienum fp_denorm {
217bf215546Sopenharmony_ci   /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
218bf215546Sopenharmony_ci    * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
219bf215546Sopenharmony_ci   fp_denorm_flush = 0x0,
220bf215546Sopenharmony_ci   fp_denorm_keep_in = 0x1,
221bf215546Sopenharmony_ci   fp_denorm_keep_out = 0x2,
222bf215546Sopenharmony_ci   fp_denorm_keep = 0x3,
223bf215546Sopenharmony_ci};
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_cistruct float_mode {
226bf215546Sopenharmony_ci   /* matches encoding of the MODE register */
227bf215546Sopenharmony_ci   union {
228bf215546Sopenharmony_ci      struct {
229bf215546Sopenharmony_ci         fp_round round32 : 2;
230bf215546Sopenharmony_ci         fp_round round16_64 : 2;
231bf215546Sopenharmony_ci         unsigned denorm32 : 2;
232bf215546Sopenharmony_ci         unsigned denorm16_64 : 2;
233bf215546Sopenharmony_ci      };
234bf215546Sopenharmony_ci      struct {
235bf215546Sopenharmony_ci         uint8_t round : 4;
236bf215546Sopenharmony_ci         uint8_t denorm : 4;
237bf215546Sopenharmony_ci      };
238bf215546Sopenharmony_ci      uint8_t val = 0;
239bf215546Sopenharmony_ci   };
240bf215546Sopenharmony_ci   /* if false, optimizations which may remove infs/nan/-0.0 can be done */
241bf215546Sopenharmony_ci   bool preserve_signed_zero_inf_nan32 : 1;
242bf215546Sopenharmony_ci   bool preserve_signed_zero_inf_nan16_64 : 1;
243bf215546Sopenharmony_ci   /* if false, optimizations which may remove denormal flushing can be done */
244bf215546Sopenharmony_ci   bool must_flush_denorms32 : 1;
245bf215546Sopenharmony_ci   bool must_flush_denorms16_64 : 1;
246bf215546Sopenharmony_ci   bool care_about_round32 : 1;
247bf215546Sopenharmony_ci   bool care_about_round16_64 : 1;
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci   /* Returns true if instructions using the mode "other" can safely use the
250bf215546Sopenharmony_ci    * current one instead. */
251bf215546Sopenharmony_ci   bool canReplace(float_mode other) const noexcept
252bf215546Sopenharmony_ci   {
253bf215546Sopenharmony_ci      return val == other.val &&
254bf215546Sopenharmony_ci             (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
255bf215546Sopenharmony_ci             (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
256bf215546Sopenharmony_ci             (must_flush_denorms32 || !other.must_flush_denorms32) &&
257bf215546Sopenharmony_ci             (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
258bf215546Sopenharmony_ci             (care_about_round32 || !other.care_about_round32) &&
259bf215546Sopenharmony_ci             (care_about_round16_64 || !other.care_about_round16_64);
260bf215546Sopenharmony_ci   }
261bf215546Sopenharmony_ci};
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_cistruct wait_imm {
264bf215546Sopenharmony_ci   static const uint8_t unset_counter = 0xff;
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci   uint8_t vm;
267bf215546Sopenharmony_ci   uint8_t exp;
268bf215546Sopenharmony_ci   uint8_t lgkm;
269bf215546Sopenharmony_ci   uint8_t vs;
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   wait_imm();
272bf215546Sopenharmony_ci   wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_);
273bf215546Sopenharmony_ci   wait_imm(enum amd_gfx_level chip, uint16_t packed);
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci   uint16_t pack(enum amd_gfx_level chip) const;
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci   bool combine(const wait_imm& other);
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci   bool empty() const;
280bf215546Sopenharmony_ci};
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ciconstexpr Format
283bf215546Sopenharmony_ciasVOP3(Format format)
284bf215546Sopenharmony_ci{
285bf215546Sopenharmony_ci   return (Format)((uint32_t)Format::VOP3 | (uint32_t)format);
286bf215546Sopenharmony_ci};
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ciconstexpr Format
289bf215546Sopenharmony_ciasSDWA(Format format)
290bf215546Sopenharmony_ci{
291bf215546Sopenharmony_ci   assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
292bf215546Sopenharmony_ci   return (Format)((uint32_t)Format::SDWA | (uint32_t)format);
293bf215546Sopenharmony_ci}
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ciconstexpr Format
296bf215546Sopenharmony_ciwithoutDPP(Format format)
297bf215546Sopenharmony_ci{
298bf215546Sopenharmony_ci   return (Format)((uint32_t)format & ~((uint32_t)Format::DPP16 | (uint32_t)Format::DPP8));
299bf215546Sopenharmony_ci}
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_cienum class RegType {
302bf215546Sopenharmony_ci   none = 0,
303bf215546Sopenharmony_ci   sgpr,
304bf215546Sopenharmony_ci   vgpr,
305bf215546Sopenharmony_ci   linear_vgpr,
306bf215546Sopenharmony_ci};
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_cistruct RegClass {
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci   enum RC : uint8_t {
311bf215546Sopenharmony_ci      s1 = 1,
312bf215546Sopenharmony_ci      s2 = 2,
313bf215546Sopenharmony_ci      s3 = 3,
314bf215546Sopenharmony_ci      s4 = 4,
315bf215546Sopenharmony_ci      s6 = 6,
316bf215546Sopenharmony_ci      s8 = 8,
317bf215546Sopenharmony_ci      s16 = 16,
318bf215546Sopenharmony_ci      v1 = s1 | (1 << 5),
319bf215546Sopenharmony_ci      v2 = s2 | (1 << 5),
320bf215546Sopenharmony_ci      v3 = s3 | (1 << 5),
321bf215546Sopenharmony_ci      v4 = s4 | (1 << 5),
322bf215546Sopenharmony_ci      v5 = 5 | (1 << 5),
323bf215546Sopenharmony_ci      v6 = 6 | (1 << 5),
324bf215546Sopenharmony_ci      v7 = 7 | (1 << 5),
325bf215546Sopenharmony_ci      v8 = 8 | (1 << 5),
326bf215546Sopenharmony_ci      /* byte-sized register class */
327bf215546Sopenharmony_ci      v1b = v1 | (1 << 7),
328bf215546Sopenharmony_ci      v2b = v2 | (1 << 7),
329bf215546Sopenharmony_ci      v3b = v3 | (1 << 7),
330bf215546Sopenharmony_ci      v4b = v4 | (1 << 7),
331bf215546Sopenharmony_ci      v6b = v6 | (1 << 7),
332bf215546Sopenharmony_ci      v8b = v8 | (1 << 7),
333bf215546Sopenharmony_ci      /* these are used for WWM and spills to vgpr */
334bf215546Sopenharmony_ci      v1_linear = v1 | (1 << 6),
335bf215546Sopenharmony_ci      v2_linear = v2 | (1 << 6),
336bf215546Sopenharmony_ci   };
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci   RegClass() = default;
339bf215546Sopenharmony_ci   constexpr RegClass(RC rc_) : rc(rc_) {}
340bf215546Sopenharmony_ci   constexpr RegClass(RegType type, unsigned size)
341bf215546Sopenharmony_ci       : rc((RC)((type == RegType::vgpr ? 1 << 5 : 0) | size))
342bf215546Sopenharmony_ci   {}
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci   constexpr operator RC() const { return rc; }
345bf215546Sopenharmony_ci   explicit operator bool() = delete;
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci   constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
348bf215546Sopenharmony_ci   constexpr bool is_linear_vgpr() const { return rc & (1 << 6); };
349bf215546Sopenharmony_ci   constexpr bool is_subdword() const { return rc & (1 << 7); }
350bf215546Sopenharmony_ci   constexpr unsigned bytes() const { return ((unsigned)rc & 0x1F) * (is_subdword() ? 1 : 4); }
351bf215546Sopenharmony_ci   // TODO: use size() less in favor of bytes()
352bf215546Sopenharmony_ci   constexpr unsigned size() const { return (bytes() + 3) >> 2; }
353bf215546Sopenharmony_ci   constexpr bool is_linear() const { return rc <= RC::s16 || is_linear_vgpr(); }
354bf215546Sopenharmony_ci   constexpr RegClass as_linear() const { return RegClass((RC)(rc | (1 << 6))); }
355bf215546Sopenharmony_ci   constexpr RegClass as_subdword() const { return RegClass((RC)(rc | 1 << 7)); }
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci   static constexpr RegClass get(RegType type, unsigned bytes)
358bf215546Sopenharmony_ci   {
359bf215546Sopenharmony_ci      if (type == RegType::sgpr) {
360bf215546Sopenharmony_ci         return RegClass(type, DIV_ROUND_UP(bytes, 4u));
361bf215546Sopenharmony_ci      } else {
362bf215546Sopenharmony_ci         return bytes % 4u ? RegClass(type, bytes).as_subdword() : RegClass(type, bytes / 4u);
363bf215546Sopenharmony_ci      }
364bf215546Sopenharmony_ci   }
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci   constexpr RegClass resize(unsigned bytes) const
367bf215546Sopenharmony_ci   {
368bf215546Sopenharmony_ci      if (is_linear_vgpr()) {
369bf215546Sopenharmony_ci         assert(bytes % 4u == 0);
370bf215546Sopenharmony_ci         return get(RegType::vgpr, bytes).as_linear();
371bf215546Sopenharmony_ci      }
372bf215546Sopenharmony_ci      return get(type(), bytes);
373bf215546Sopenharmony_ci   }
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ciprivate:
376bf215546Sopenharmony_ci   RC rc;
377bf215546Sopenharmony_ci};
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci/* transitional helper expressions */
380bf215546Sopenharmony_cistatic constexpr RegClass s1{RegClass::s1};
381bf215546Sopenharmony_cistatic constexpr RegClass s2{RegClass::s2};
382bf215546Sopenharmony_cistatic constexpr RegClass s3{RegClass::s3};
383bf215546Sopenharmony_cistatic constexpr RegClass s4{RegClass::s4};
384bf215546Sopenharmony_cistatic constexpr RegClass s8{RegClass::s8};
385bf215546Sopenharmony_cistatic constexpr RegClass s16{RegClass::s16};
386bf215546Sopenharmony_cistatic constexpr RegClass v1{RegClass::v1};
387bf215546Sopenharmony_cistatic constexpr RegClass v2{RegClass::v2};
388bf215546Sopenharmony_cistatic constexpr RegClass v3{RegClass::v3};
389bf215546Sopenharmony_cistatic constexpr RegClass v4{RegClass::v4};
390bf215546Sopenharmony_cistatic constexpr RegClass v5{RegClass::v5};
391bf215546Sopenharmony_cistatic constexpr RegClass v6{RegClass::v6};
392bf215546Sopenharmony_cistatic constexpr RegClass v7{RegClass::v7};
393bf215546Sopenharmony_cistatic constexpr RegClass v8{RegClass::v8};
394bf215546Sopenharmony_cistatic constexpr RegClass v1b{RegClass::v1b};
395bf215546Sopenharmony_cistatic constexpr RegClass v2b{RegClass::v2b};
396bf215546Sopenharmony_cistatic constexpr RegClass v3b{RegClass::v3b};
397bf215546Sopenharmony_cistatic constexpr RegClass v4b{RegClass::v4b};
398bf215546Sopenharmony_cistatic constexpr RegClass v6b{RegClass::v6b};
399bf215546Sopenharmony_cistatic constexpr RegClass v8b{RegClass::v8b};
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_ci/**
402bf215546Sopenharmony_ci * Temp Class
403bf215546Sopenharmony_ci * Each temporary virtual register has a
404bf215546Sopenharmony_ci * register class (i.e. size and type)
405bf215546Sopenharmony_ci * and SSA id.
406bf215546Sopenharmony_ci */
407bf215546Sopenharmony_cistruct Temp {
408bf215546Sopenharmony_ci   Temp() noexcept : id_(0), reg_class(0) {}
409bf215546Sopenharmony_ci   constexpr Temp(uint32_t id, RegClass cls) noexcept : id_(id), reg_class(uint8_t(cls)) {}
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci   constexpr uint32_t id() const noexcept { return id_; }
412bf215546Sopenharmony_ci   constexpr RegClass regClass() const noexcept { return (RegClass::RC)reg_class; }
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci   constexpr unsigned bytes() const noexcept { return regClass().bytes(); }
415bf215546Sopenharmony_ci   constexpr unsigned size() const noexcept { return regClass().size(); }
416bf215546Sopenharmony_ci   constexpr RegType type() const noexcept { return regClass().type(); }
417bf215546Sopenharmony_ci   constexpr bool is_linear() const noexcept { return regClass().is_linear(); }
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci   constexpr bool operator<(Temp other) const noexcept { return id() < other.id(); }
420bf215546Sopenharmony_ci   constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
421bf215546Sopenharmony_ci   constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ciprivate:
424bf215546Sopenharmony_ci   uint32_t id_ : 24;
425bf215546Sopenharmony_ci   uint32_t reg_class : 8;
426bf215546Sopenharmony_ci};
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci/**
429bf215546Sopenharmony_ci * PhysReg
430bf215546Sopenharmony_ci * Represents the physical register for each
431bf215546Sopenharmony_ci * Operand and Definition.
432bf215546Sopenharmony_ci */
433bf215546Sopenharmony_cistruct PhysReg {
434bf215546Sopenharmony_ci   constexpr PhysReg() = default;
435bf215546Sopenharmony_ci   explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
436bf215546Sopenharmony_ci   constexpr unsigned reg() const { return reg_b >> 2; }
437bf215546Sopenharmony_ci   constexpr unsigned byte() const { return reg_b & 0x3; }
438bf215546Sopenharmony_ci   constexpr operator unsigned() const { return reg(); }
439bf215546Sopenharmony_ci   constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
440bf215546Sopenharmony_ci   constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
441bf215546Sopenharmony_ci   constexpr bool operator<(PhysReg other) const { return reg_b < other.reg_b; }
442bf215546Sopenharmony_ci   constexpr PhysReg advance(int bytes) const
443bf215546Sopenharmony_ci   {
444bf215546Sopenharmony_ci      PhysReg res = *this;
445bf215546Sopenharmony_ci      res.reg_b += bytes;
446bf215546Sopenharmony_ci      return res;
447bf215546Sopenharmony_ci   }
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci   uint16_t reg_b = 0;
450bf215546Sopenharmony_ci};
451bf215546Sopenharmony_ci
452bf215546Sopenharmony_ci/* helper expressions for special registers */
453bf215546Sopenharmony_cistatic constexpr PhysReg m0{124};
454bf215546Sopenharmony_cistatic constexpr PhysReg flat_scr_lo{102}; /* GFX8-GFX9, encoded differently on GFX6-7 */
455bf215546Sopenharmony_cistatic constexpr PhysReg flat_scr_hi{103}; /* GFX8-GFX9, encoded differently on GFX6-7 */
456bf215546Sopenharmony_cistatic constexpr PhysReg vcc{106};
457bf215546Sopenharmony_cistatic constexpr PhysReg vcc_hi{107};
458bf215546Sopenharmony_cistatic constexpr PhysReg tba{108}; /* GFX6-GFX8 */
459bf215546Sopenharmony_cistatic constexpr PhysReg tma{110}; /* GFX6-GFX8 */
460bf215546Sopenharmony_cistatic constexpr PhysReg ttmp0{112};
461bf215546Sopenharmony_cistatic constexpr PhysReg ttmp1{113};
462bf215546Sopenharmony_cistatic constexpr PhysReg ttmp2{114};
463bf215546Sopenharmony_cistatic constexpr PhysReg ttmp3{115};
464bf215546Sopenharmony_cistatic constexpr PhysReg ttmp4{116};
465bf215546Sopenharmony_cistatic constexpr PhysReg ttmp5{117};
466bf215546Sopenharmony_cistatic constexpr PhysReg ttmp6{118};
467bf215546Sopenharmony_cistatic constexpr PhysReg ttmp7{119};
468bf215546Sopenharmony_cistatic constexpr PhysReg ttmp8{120};
469bf215546Sopenharmony_cistatic constexpr PhysReg ttmp9{121};
470bf215546Sopenharmony_cistatic constexpr PhysReg ttmp10{122};
471bf215546Sopenharmony_cistatic constexpr PhysReg ttmp11{123};
472bf215546Sopenharmony_cistatic constexpr PhysReg sgpr_null{125}; /* GFX10+ */
473bf215546Sopenharmony_cistatic constexpr PhysReg exec{126};
474bf215546Sopenharmony_cistatic constexpr PhysReg exec_lo{126};
475bf215546Sopenharmony_cistatic constexpr PhysReg exec_hi{127};
476bf215546Sopenharmony_cistatic constexpr PhysReg vccz{251};
477bf215546Sopenharmony_cistatic constexpr PhysReg execz{252};
478bf215546Sopenharmony_cistatic constexpr PhysReg scc{253};
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci/**
481bf215546Sopenharmony_ci * Operand Class
482bf215546Sopenharmony_ci * Initially, each Operand refers to either
483bf215546Sopenharmony_ci * a temporary virtual register
484bf215546Sopenharmony_ci * or to a constant value
485bf215546Sopenharmony_ci * Temporary registers get mapped to physical register during RA
486bf215546Sopenharmony_ci * Constant values are inlined into the instruction sequence.
487bf215546Sopenharmony_ci */
488bf215546Sopenharmony_ciclass Operand final {
489bf215546Sopenharmony_cipublic:
490bf215546Sopenharmony_ci   constexpr Operand()
491bf215546Sopenharmony_ci       : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false), isKill_(false),
492bf215546Sopenharmony_ci         isUndef_(true), isFirstKill_(false), constSize(0), isLateKill_(false), is16bit_(false),
493bf215546Sopenharmony_ci         is24bit_(false), signext(false)
494bf215546Sopenharmony_ci   {}
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci   explicit Operand(Temp r) noexcept
497bf215546Sopenharmony_ci   {
498bf215546Sopenharmony_ci      data_.temp = r;
499bf215546Sopenharmony_ci      if (r.id()) {
500bf215546Sopenharmony_ci         isTemp_ = true;
501bf215546Sopenharmony_ci      } else {
502bf215546Sopenharmony_ci         isUndef_ = true;
503bf215546Sopenharmony_ci         setFixed(PhysReg{128});
504bf215546Sopenharmony_ci      }
505bf215546Sopenharmony_ci   };
506bf215546Sopenharmony_ci   explicit Operand(Temp r, PhysReg reg) noexcept
507bf215546Sopenharmony_ci   {
508bf215546Sopenharmony_ci      assert(r.id()); /* Don't allow fixing an undef to a register */
509bf215546Sopenharmony_ci      data_.temp = r;
510bf215546Sopenharmony_ci      isTemp_ = true;
511bf215546Sopenharmony_ci      setFixed(reg);
512bf215546Sopenharmony_ci   };
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_ci   /* 8-bit constant */
515bf215546Sopenharmony_ci   static Operand c8(uint8_t v) noexcept
516bf215546Sopenharmony_ci   {
517bf215546Sopenharmony_ci      /* 8-bit constants are only used for copies and copies from any 8-bit
518bf215546Sopenharmony_ci       * constant can be implemented with a SDWA v_mul_u32_u24. So consider all
519bf215546Sopenharmony_ci       * to be inline constants. */
520bf215546Sopenharmony_ci      Operand op;
521bf215546Sopenharmony_ci      op.control_ = 0;
522bf215546Sopenharmony_ci      op.data_.i = v;
523bf215546Sopenharmony_ci      op.isConstant_ = true;
524bf215546Sopenharmony_ci      op.constSize = 0;
525bf215546Sopenharmony_ci      op.setFixed(PhysReg{0u});
526bf215546Sopenharmony_ci      return op;
527bf215546Sopenharmony_ci   };
528bf215546Sopenharmony_ci
529bf215546Sopenharmony_ci   /* 16-bit constant */
530bf215546Sopenharmony_ci   static Operand c16(uint16_t v) noexcept
531bf215546Sopenharmony_ci   {
532bf215546Sopenharmony_ci      Operand op;
533bf215546Sopenharmony_ci      op.control_ = 0;
534bf215546Sopenharmony_ci      op.data_.i = v;
535bf215546Sopenharmony_ci      op.isConstant_ = true;
536bf215546Sopenharmony_ci      op.constSize = 1;
537bf215546Sopenharmony_ci      if (v <= 64)
538bf215546Sopenharmony_ci         op.setFixed(PhysReg{128u + v});
539bf215546Sopenharmony_ci      else if (v >= 0xFFF0) /* [-16 .. -1] */
540bf215546Sopenharmony_ci         op.setFixed(PhysReg{(unsigned)(192 - (int16_t)v)});
541bf215546Sopenharmony_ci      else if (v == 0x3800) /* 0.5 */
542bf215546Sopenharmony_ci         op.setFixed(PhysReg{240});
543bf215546Sopenharmony_ci      else if (v == 0xB800) /* -0.5 */
544bf215546Sopenharmony_ci         op.setFixed(PhysReg{241});
545bf215546Sopenharmony_ci      else if (v == 0x3C00) /* 1.0 */
546bf215546Sopenharmony_ci         op.setFixed(PhysReg{242});
547bf215546Sopenharmony_ci      else if (v == 0xBC00) /* -1.0 */
548bf215546Sopenharmony_ci         op.setFixed(PhysReg{243});
549bf215546Sopenharmony_ci      else if (v == 0x4000) /* 2.0 */
550bf215546Sopenharmony_ci         op.setFixed(PhysReg{244});
551bf215546Sopenharmony_ci      else if (v == 0xC000) /* -2.0 */
552bf215546Sopenharmony_ci         op.setFixed(PhysReg{245});
553bf215546Sopenharmony_ci      else if (v == 0x4400) /* 4.0 */
554bf215546Sopenharmony_ci         op.setFixed(PhysReg{246});
555bf215546Sopenharmony_ci      else if (v == 0xC400) /* -4.0 */
556bf215546Sopenharmony_ci         op.setFixed(PhysReg{247});
557bf215546Sopenharmony_ci      else if (v == 0x3118) /* 1/2 PI */
558bf215546Sopenharmony_ci         op.setFixed(PhysReg{248});
559bf215546Sopenharmony_ci      else /* Literal Constant */
560bf215546Sopenharmony_ci         op.setFixed(PhysReg{255});
561bf215546Sopenharmony_ci      return op;
562bf215546Sopenharmony_ci   }
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci   /* 32-bit constant */
565bf215546Sopenharmony_ci   static Operand c32(uint32_t v) noexcept { return c32_or_c64(v, false); }
566bf215546Sopenharmony_ci
567bf215546Sopenharmony_ci   /* 64-bit constant */
568bf215546Sopenharmony_ci   static Operand c64(uint64_t v) noexcept
569bf215546Sopenharmony_ci   {
570bf215546Sopenharmony_ci      Operand op;
571bf215546Sopenharmony_ci      op.control_ = 0;
572bf215546Sopenharmony_ci      op.isConstant_ = true;
573bf215546Sopenharmony_ci      op.constSize = 3;
574bf215546Sopenharmony_ci      if (v <= 64) {
575bf215546Sopenharmony_ci         op.data_.i = (uint32_t)v;
576bf215546Sopenharmony_ci         op.setFixed(PhysReg{128 + (uint32_t)v});
577bf215546Sopenharmony_ci      } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
578bf215546Sopenharmony_ci         op.data_.i = (uint32_t)v;
579bf215546Sopenharmony_ci         op.setFixed(PhysReg{192 - (uint32_t)v});
580bf215546Sopenharmony_ci      } else if (v == 0x3FE0000000000000) { /* 0.5 */
581bf215546Sopenharmony_ci         op.data_.i = 0x3f000000;
582bf215546Sopenharmony_ci         op.setFixed(PhysReg{240});
583bf215546Sopenharmony_ci      } else if (v == 0xBFE0000000000000) { /* -0.5 */
584bf215546Sopenharmony_ci         op.data_.i = 0xbf000000;
585bf215546Sopenharmony_ci         op.setFixed(PhysReg{241});
586bf215546Sopenharmony_ci      } else if (v == 0x3FF0000000000000) { /* 1.0 */
587bf215546Sopenharmony_ci         op.data_.i = 0x3f800000;
588bf215546Sopenharmony_ci         op.setFixed(PhysReg{242});
589bf215546Sopenharmony_ci      } else if (v == 0xBFF0000000000000) { /* -1.0 */
590bf215546Sopenharmony_ci         op.data_.i = 0xbf800000;
591bf215546Sopenharmony_ci         op.setFixed(PhysReg{243});
592bf215546Sopenharmony_ci      } else if (v == 0x4000000000000000) { /* 2.0 */
593bf215546Sopenharmony_ci         op.data_.i = 0x40000000;
594bf215546Sopenharmony_ci         op.setFixed(PhysReg{244});
595bf215546Sopenharmony_ci      } else if (v == 0xC000000000000000) { /* -2.0 */
596bf215546Sopenharmony_ci         op.data_.i = 0xc0000000;
597bf215546Sopenharmony_ci         op.setFixed(PhysReg{245});
598bf215546Sopenharmony_ci      } else if (v == 0x4010000000000000) { /* 4.0 */
599bf215546Sopenharmony_ci         op.data_.i = 0x40800000;
600bf215546Sopenharmony_ci         op.setFixed(PhysReg{246});
601bf215546Sopenharmony_ci      } else if (v == 0xC010000000000000) { /* -4.0 */
602bf215546Sopenharmony_ci         op.data_.i = 0xc0800000;
603bf215546Sopenharmony_ci         op.setFixed(PhysReg{247});
604bf215546Sopenharmony_ci      } else { /* Literal Constant: we don't know if it is a long or double.*/
605bf215546Sopenharmony_ci         op.signext = v >> 63;
606bf215546Sopenharmony_ci         op.data_.i = v & 0xffffffffu;
607bf215546Sopenharmony_ci         op.setFixed(PhysReg{255});
608bf215546Sopenharmony_ci         assert(op.constantValue64() == v &&
609bf215546Sopenharmony_ci                "attempt to create a unrepresentable 64-bit literal constant");
610bf215546Sopenharmony_ci      }
611bf215546Sopenharmony_ci      return op;
612bf215546Sopenharmony_ci   }
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci   /* 32-bit constant stored as a 32-bit or 64-bit operand */
615bf215546Sopenharmony_ci   static Operand c32_or_c64(uint32_t v, bool is64bit) noexcept
616bf215546Sopenharmony_ci   {
617bf215546Sopenharmony_ci      Operand op;
618bf215546Sopenharmony_ci      op.control_ = 0;
619bf215546Sopenharmony_ci      op.data_.i = v;
620bf215546Sopenharmony_ci      op.isConstant_ = true;
621bf215546Sopenharmony_ci      op.constSize = is64bit ? 3 : 2;
622bf215546Sopenharmony_ci      if (v <= 64)
623bf215546Sopenharmony_ci         op.setFixed(PhysReg{128 + v});
624bf215546Sopenharmony_ci      else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
625bf215546Sopenharmony_ci         op.setFixed(PhysReg{192 - v});
626bf215546Sopenharmony_ci      else if (v == 0x3f000000) /* 0.5 */
627bf215546Sopenharmony_ci         op.setFixed(PhysReg{240});
628bf215546Sopenharmony_ci      else if (v == 0xbf000000) /* -0.5 */
629bf215546Sopenharmony_ci         op.setFixed(PhysReg{241});
630bf215546Sopenharmony_ci      else if (v == 0x3f800000) /* 1.0 */
631bf215546Sopenharmony_ci         op.setFixed(PhysReg{242});
632bf215546Sopenharmony_ci      else if (v == 0xbf800000) /* -1.0 */
633bf215546Sopenharmony_ci         op.setFixed(PhysReg{243});
634bf215546Sopenharmony_ci      else if (v == 0x40000000) /* 2.0 */
635bf215546Sopenharmony_ci         op.setFixed(PhysReg{244});
636bf215546Sopenharmony_ci      else if (v == 0xc0000000) /* -2.0 */
637bf215546Sopenharmony_ci         op.setFixed(PhysReg{245});
638bf215546Sopenharmony_ci      else if (v == 0x40800000) /* 4.0 */
639bf215546Sopenharmony_ci         op.setFixed(PhysReg{246});
640bf215546Sopenharmony_ci      else if (v == 0xc0800000) /* -4.0 */
641bf215546Sopenharmony_ci         op.setFixed(PhysReg{247});
642bf215546Sopenharmony_ci      else { /* Literal Constant */
643bf215546Sopenharmony_ci         assert(!is64bit && "attempt to create a 64-bit literal constant");
644bf215546Sopenharmony_ci         op.setFixed(PhysReg{255});
645bf215546Sopenharmony_ci      }
646bf215546Sopenharmony_ci      return op;
647bf215546Sopenharmony_ci   }
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_ci   static Operand literal32(uint32_t v) noexcept
650bf215546Sopenharmony_ci   {
651bf215546Sopenharmony_ci      Operand op;
652bf215546Sopenharmony_ci      op.control_ = 0;
653bf215546Sopenharmony_ci      op.data_.i = v;
654bf215546Sopenharmony_ci      op.isConstant_ = true;
655bf215546Sopenharmony_ci      op.constSize = 2;
656bf215546Sopenharmony_ci      op.setFixed(PhysReg{255});
657bf215546Sopenharmony_ci      return op;
658bf215546Sopenharmony_ci   }
659bf215546Sopenharmony_ci
660bf215546Sopenharmony_ci   explicit Operand(RegClass type) noexcept
661bf215546Sopenharmony_ci   {
662bf215546Sopenharmony_ci      isUndef_ = true;
663bf215546Sopenharmony_ci      data_.temp = Temp(0, type);
664bf215546Sopenharmony_ci      setFixed(PhysReg{128});
665bf215546Sopenharmony_ci   };
666bf215546Sopenharmony_ci   explicit Operand(PhysReg reg, RegClass type) noexcept
667bf215546Sopenharmony_ci   {
668bf215546Sopenharmony_ci      data_.temp = Temp(0, type);
669bf215546Sopenharmony_ci      setFixed(reg);
670bf215546Sopenharmony_ci   }
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci   static Operand zero(unsigned bytes = 4) noexcept
673bf215546Sopenharmony_ci   {
674bf215546Sopenharmony_ci      if (bytes == 8)
675bf215546Sopenharmony_ci         return Operand::c64(0);
676bf215546Sopenharmony_ci      else if (bytes == 4)
677bf215546Sopenharmony_ci         return Operand::c32(0);
678bf215546Sopenharmony_ci      else if (bytes == 2)
679bf215546Sopenharmony_ci         return Operand::c16(0);
680bf215546Sopenharmony_ci      assert(bytes == 1);
681bf215546Sopenharmony_ci      return Operand::c8(0);
682bf215546Sopenharmony_ci   }
683bf215546Sopenharmony_ci
684bf215546Sopenharmony_ci   /* This is useful over the constructors when you want to take a gfx level
685bf215546Sopenharmony_ci    * for 1/2 PI or an unknown operand size.
686bf215546Sopenharmony_ci    */
687bf215546Sopenharmony_ci   static Operand get_const(enum amd_gfx_level chip, uint64_t val, unsigned bytes)
688bf215546Sopenharmony_ci   {
689bf215546Sopenharmony_ci      if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) {
690bf215546Sopenharmony_ci         /* 1/2 PI can be an inline constant on GFX8+ */
691bf215546Sopenharmony_ci         Operand op = Operand::c32(val);
692bf215546Sopenharmony_ci         op.setFixed(PhysReg{248});
693bf215546Sopenharmony_ci         return op;
694bf215546Sopenharmony_ci      }
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_ci      if (bytes == 8)
697bf215546Sopenharmony_ci         return Operand::c64(val);
698bf215546Sopenharmony_ci      else if (bytes == 4)
699bf215546Sopenharmony_ci         return Operand::c32(val);
700bf215546Sopenharmony_ci      else if (bytes == 2)
701bf215546Sopenharmony_ci         return Operand::c16(val);
702bf215546Sopenharmony_ci      assert(bytes == 1);
703bf215546Sopenharmony_ci      return Operand::c8(val);
704bf215546Sopenharmony_ci   }
705bf215546Sopenharmony_ci
706bf215546Sopenharmony_ci   static bool is_constant_representable(uint64_t val, unsigned bytes, bool zext = false,
707bf215546Sopenharmony_ci                                         bool sext = false)
708bf215546Sopenharmony_ci   {
709bf215546Sopenharmony_ci      if (bytes <= 4)
710bf215546Sopenharmony_ci         return true;
711bf215546Sopenharmony_ci
712bf215546Sopenharmony_ci      if (zext && (val & 0xFFFFFFFF00000000) == 0x0000000000000000)
713bf215546Sopenharmony_ci         return true;
714bf215546Sopenharmony_ci      uint64_t upper33 = val & 0xFFFFFFFF80000000;
715bf215546Sopenharmony_ci      if (sext && (upper33 == 0xFFFFFFFF80000000 || upper33 == 0))
716bf215546Sopenharmony_ci         return true;
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci      return val >= 0xFFFFFFFFFFFFFFF0 || val <= 64 || /* [-16 .. 64] */
719bf215546Sopenharmony_ci             val == 0x3FE0000000000000 ||              /* 0.5 */
720bf215546Sopenharmony_ci             val == 0xBFE0000000000000 ||              /* -0.5 */
721bf215546Sopenharmony_ci             val == 0x3FF0000000000000 ||              /* 1.0 */
722bf215546Sopenharmony_ci             val == 0xBFF0000000000000 ||              /* -1.0 */
723bf215546Sopenharmony_ci             val == 0x4000000000000000 ||              /* 2.0 */
724bf215546Sopenharmony_ci             val == 0xC000000000000000 ||              /* -2.0 */
725bf215546Sopenharmony_ci             val == 0x4010000000000000 ||              /* 4.0 */
726bf215546Sopenharmony_ci             val == 0xC010000000000000;                /* -4.0 */
727bf215546Sopenharmony_ci   }
728bf215546Sopenharmony_ci
729bf215546Sopenharmony_ci   constexpr bool isTemp() const noexcept { return isTemp_; }
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_ci   constexpr void setTemp(Temp t) noexcept
732bf215546Sopenharmony_ci   {
733bf215546Sopenharmony_ci      assert(!isConstant_);
734bf215546Sopenharmony_ci      isTemp_ = true;
735bf215546Sopenharmony_ci      data_.temp = t;
736bf215546Sopenharmony_ci   }
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_ci   constexpr Temp getTemp() const noexcept { return data_.temp; }
739bf215546Sopenharmony_ci
740bf215546Sopenharmony_ci   constexpr uint32_t tempId() const noexcept { return data_.temp.id(); }
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_ci   constexpr bool hasRegClass() const noexcept { return isTemp() || isUndefined(); }
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci   constexpr RegClass regClass() const noexcept { return data_.temp.regClass(); }
745bf215546Sopenharmony_ci
746bf215546Sopenharmony_ci   constexpr unsigned bytes() const noexcept
747bf215546Sopenharmony_ci   {
748bf215546Sopenharmony_ci      if (isConstant())
749bf215546Sopenharmony_ci         return 1 << constSize;
750bf215546Sopenharmony_ci      else
751bf215546Sopenharmony_ci         return data_.temp.bytes();
752bf215546Sopenharmony_ci   }
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ci   constexpr unsigned size() const noexcept
755bf215546Sopenharmony_ci   {
756bf215546Sopenharmony_ci      if (isConstant())
757bf215546Sopenharmony_ci         return constSize > 2 ? 2 : 1;
758bf215546Sopenharmony_ci      else
759bf215546Sopenharmony_ci         return data_.temp.size();
760bf215546Sopenharmony_ci   }
761bf215546Sopenharmony_ci
762bf215546Sopenharmony_ci   constexpr bool isFixed() const noexcept { return isFixed_; }
763bf215546Sopenharmony_ci
764bf215546Sopenharmony_ci   constexpr PhysReg physReg() const noexcept { return reg_; }
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci   constexpr void setFixed(PhysReg reg) noexcept
767bf215546Sopenharmony_ci   {
768bf215546Sopenharmony_ci      isFixed_ = reg != unsigned(-1);
769bf215546Sopenharmony_ci      reg_ = reg;
770bf215546Sopenharmony_ci   }
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci   constexpr bool isConstant() const noexcept { return isConstant_; }
773bf215546Sopenharmony_ci
774bf215546Sopenharmony_ci   constexpr bool isLiteral() const noexcept { return isConstant() && reg_ == 255; }
775bf215546Sopenharmony_ci
776bf215546Sopenharmony_ci   constexpr bool isUndefined() const noexcept { return isUndef_; }
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_ci   constexpr uint32_t constantValue() const noexcept { return data_.i; }
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci   constexpr bool constantEquals(uint32_t cmp) const noexcept
781bf215546Sopenharmony_ci   {
782bf215546Sopenharmony_ci      return isConstant() && constantValue() == cmp;
783bf215546Sopenharmony_ci   }
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_ci   constexpr uint64_t constantValue64() const noexcept
786bf215546Sopenharmony_ci   {
787bf215546Sopenharmony_ci      if (constSize == 3) {
788bf215546Sopenharmony_ci         if (reg_ <= 192)
789bf215546Sopenharmony_ci            return reg_ - 128;
790bf215546Sopenharmony_ci         else if (reg_ <= 208)
791bf215546Sopenharmony_ci            return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
792bf215546Sopenharmony_ci
793bf215546Sopenharmony_ci         switch (reg_) {
794bf215546Sopenharmony_ci         case 240: return 0x3FE0000000000000;
795bf215546Sopenharmony_ci         case 241: return 0xBFE0000000000000;
796bf215546Sopenharmony_ci         case 242: return 0x3FF0000000000000;
797bf215546Sopenharmony_ci         case 243: return 0xBFF0000000000000;
798bf215546Sopenharmony_ci         case 244: return 0x4000000000000000;
799bf215546Sopenharmony_ci         case 245: return 0xC000000000000000;
800bf215546Sopenharmony_ci         case 246: return 0x4010000000000000;
801bf215546Sopenharmony_ci         case 247: return 0xC010000000000000;
802bf215546Sopenharmony_ci         case 255:
803bf215546Sopenharmony_ci            return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
804bf215546Sopenharmony_ci         }
805bf215546Sopenharmony_ci         unreachable("invalid register for 64-bit constant");
806bf215546Sopenharmony_ci      } else {
807bf215546Sopenharmony_ci         return data_.i;
808bf215546Sopenharmony_ci      }
809bf215546Sopenharmony_ci   }
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_ci   /* Value if this were used with vop3/opsel or vop3p. */
812bf215546Sopenharmony_ci   constexpr uint16_t constantValue16(bool opsel) const noexcept
813bf215546Sopenharmony_ci   {
814bf215546Sopenharmony_ci      assert(bytes() == 2 || bytes() == 4);
815bf215546Sopenharmony_ci      if (opsel) {
816bf215546Sopenharmony_ci         if (bytes() == 2 && int16_t(data_.i) >= -16 && int16_t(data_.i) <= 64 && !isLiteral())
817bf215546Sopenharmony_ci            return int16_t(data_.i) >> 16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */
818bf215546Sopenharmony_ci         else
819bf215546Sopenharmony_ci            return data_.i >> 16;
820bf215546Sopenharmony_ci      }
821bf215546Sopenharmony_ci      return data_.i;
822bf215546Sopenharmony_ci   }
823bf215546Sopenharmony_ci
824bf215546Sopenharmony_ci   constexpr bool isOfType(RegType type) const noexcept
825bf215546Sopenharmony_ci   {
826bf215546Sopenharmony_ci      return hasRegClass() && regClass().type() == type;
827bf215546Sopenharmony_ci   }
828bf215546Sopenharmony_ci
829bf215546Sopenharmony_ci   /* Indicates that the killed operand's live range intersects with the
830bf215546Sopenharmony_ci    * instruction's definitions. Unlike isKill() and isFirstKill(), this is
831bf215546Sopenharmony_ci    * not set by liveness analysis. */
832bf215546Sopenharmony_ci   constexpr void setLateKill(bool flag) noexcept { isLateKill_ = flag; }
833bf215546Sopenharmony_ci
834bf215546Sopenharmony_ci   constexpr bool isLateKill() const noexcept { return isLateKill_; }
835bf215546Sopenharmony_ci
836bf215546Sopenharmony_ci   constexpr void setKill(bool flag) noexcept
837bf215546Sopenharmony_ci   {
838bf215546Sopenharmony_ci      isKill_ = flag;
839bf215546Sopenharmony_ci      if (!flag)
840bf215546Sopenharmony_ci         setFirstKill(false);
841bf215546Sopenharmony_ci   }
842bf215546Sopenharmony_ci
843bf215546Sopenharmony_ci   constexpr bool isKill() const noexcept { return isKill_ || isFirstKill(); }
844bf215546Sopenharmony_ci
845bf215546Sopenharmony_ci   constexpr void setFirstKill(bool flag) noexcept
846bf215546Sopenharmony_ci   {
847bf215546Sopenharmony_ci      isFirstKill_ = flag;
848bf215546Sopenharmony_ci      if (flag)
849bf215546Sopenharmony_ci         setKill(flag);
850bf215546Sopenharmony_ci   }
851bf215546Sopenharmony_ci
852bf215546Sopenharmony_ci   /* When there are multiple operands killing the same temporary,
853bf215546Sopenharmony_ci    * isFirstKill() is only returns true for the first one. */
854bf215546Sopenharmony_ci   constexpr bool isFirstKill() const noexcept { return isFirstKill_; }
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci   constexpr bool isKillBeforeDef() const noexcept { return isKill() && !isLateKill(); }
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_ci   constexpr bool isFirstKillBeforeDef() const noexcept { return isFirstKill() && !isLateKill(); }
859bf215546Sopenharmony_ci
860bf215546Sopenharmony_ci   constexpr bool operator==(Operand other) const noexcept
861bf215546Sopenharmony_ci   {
862bf215546Sopenharmony_ci      if (other.size() != size())
863bf215546Sopenharmony_ci         return false;
864bf215546Sopenharmony_ci      if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
865bf215546Sopenharmony_ci         return false;
866bf215546Sopenharmony_ci      if (isFixed() && other.isFixed() && physReg() != other.physReg())
867bf215546Sopenharmony_ci         return false;
868bf215546Sopenharmony_ci      if (isLiteral())
869bf215546Sopenharmony_ci         return other.isLiteral() && other.constantValue() == constantValue();
870bf215546Sopenharmony_ci      else if (isConstant())
871bf215546Sopenharmony_ci         return other.isConstant() && other.physReg() == physReg();
872bf215546Sopenharmony_ci      else if (isUndefined())
873bf215546Sopenharmony_ci         return other.isUndefined() && other.regClass() == regClass();
874bf215546Sopenharmony_ci      else
875bf215546Sopenharmony_ci         return other.isTemp() && other.getTemp() == getTemp();
876bf215546Sopenharmony_ci   }
877bf215546Sopenharmony_ci
878bf215546Sopenharmony_ci   constexpr bool operator!=(Operand other) const noexcept { return !operator==(other); }
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_ci   constexpr void set16bit(bool flag) noexcept { is16bit_ = flag; }
881bf215546Sopenharmony_ci
882bf215546Sopenharmony_ci   constexpr bool is16bit() const noexcept { return is16bit_; }
883bf215546Sopenharmony_ci
884bf215546Sopenharmony_ci   constexpr void set24bit(bool flag) noexcept { is24bit_ = flag; }
885bf215546Sopenharmony_ci
886bf215546Sopenharmony_ci   constexpr bool is24bit() const noexcept { return is24bit_; }
887bf215546Sopenharmony_ci
888bf215546Sopenharmony_ciprivate:
889bf215546Sopenharmony_ci   union {
890bf215546Sopenharmony_ci      Temp temp;
891bf215546Sopenharmony_ci      uint32_t i;
892bf215546Sopenharmony_ci      float f;
893bf215546Sopenharmony_ci   } data_ = {Temp(0, s1)};
894bf215546Sopenharmony_ci   PhysReg reg_;
895bf215546Sopenharmony_ci   union {
896bf215546Sopenharmony_ci      struct {
897bf215546Sopenharmony_ci         uint8_t isTemp_ : 1;
898bf215546Sopenharmony_ci         uint8_t isFixed_ : 1;
899bf215546Sopenharmony_ci         uint8_t isConstant_ : 1;
900bf215546Sopenharmony_ci         uint8_t isKill_ : 1;
901bf215546Sopenharmony_ci         uint8_t isUndef_ : 1;
902bf215546Sopenharmony_ci         uint8_t isFirstKill_ : 1;
903bf215546Sopenharmony_ci         uint8_t constSize : 2;
904bf215546Sopenharmony_ci         uint8_t isLateKill_ : 1;
905bf215546Sopenharmony_ci         uint8_t is16bit_ : 1;
906bf215546Sopenharmony_ci         uint8_t is24bit_ : 1;
907bf215546Sopenharmony_ci         uint8_t signext : 1;
908bf215546Sopenharmony_ci      };
909bf215546Sopenharmony_ci      /* can't initialize bit-fields in c++11, so work around using a union */
910bf215546Sopenharmony_ci      uint16_t control_ = 0;
911bf215546Sopenharmony_ci   };
912bf215546Sopenharmony_ci};
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci/**
915bf215546Sopenharmony_ci * Definition Class
916bf215546Sopenharmony_ci * Definitions are the results of Instructions
917bf215546Sopenharmony_ci * and refer to temporary virtual registers
918bf215546Sopenharmony_ci * which are later mapped to physical registers
919bf215546Sopenharmony_ci */
920bf215546Sopenharmony_ciclass Definition final {
921bf215546Sopenharmony_cipublic:
922bf215546Sopenharmony_ci   constexpr Definition()
923bf215546Sopenharmony_ci       : temp(Temp(0, s1)), reg_(0), isFixed_(0), isKill_(0), isPrecise_(0), isNUW_(0), isNoCSE_(0)
924bf215546Sopenharmony_ci   {}
925bf215546Sopenharmony_ci   Definition(uint32_t index, RegClass type) noexcept : temp(index, type) {}
926bf215546Sopenharmony_ci   explicit Definition(Temp tmp) noexcept : temp(tmp) {}
927bf215546Sopenharmony_ci   Definition(PhysReg reg, RegClass type) noexcept : temp(Temp(0, type)) { setFixed(reg); }
928bf215546Sopenharmony_ci   Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept : temp(Temp(tmpId, type))
929bf215546Sopenharmony_ci   {
930bf215546Sopenharmony_ci      setFixed(reg);
931bf215546Sopenharmony_ci   }
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci   constexpr bool isTemp() const noexcept { return tempId() > 0; }
934bf215546Sopenharmony_ci
935bf215546Sopenharmony_ci   constexpr Temp getTemp() const noexcept { return temp; }
936bf215546Sopenharmony_ci
937bf215546Sopenharmony_ci   constexpr uint32_t tempId() const noexcept { return temp.id(); }
938bf215546Sopenharmony_ci
939bf215546Sopenharmony_ci   constexpr void setTemp(Temp t) noexcept { temp = t; }
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_ci   void swapTemp(Definition& other) noexcept { std::swap(temp, other.temp); }
942bf215546Sopenharmony_ci
943bf215546Sopenharmony_ci   constexpr RegClass regClass() const noexcept { return temp.regClass(); }
944bf215546Sopenharmony_ci
945bf215546Sopenharmony_ci   constexpr unsigned bytes() const noexcept { return temp.bytes(); }
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci   constexpr unsigned size() const noexcept { return temp.size(); }
948bf215546Sopenharmony_ci
949bf215546Sopenharmony_ci   constexpr bool isFixed() const noexcept { return isFixed_; }
950bf215546Sopenharmony_ci
951bf215546Sopenharmony_ci   constexpr PhysReg physReg() const noexcept { return reg_; }
952bf215546Sopenharmony_ci
953bf215546Sopenharmony_ci   constexpr void setFixed(PhysReg reg) noexcept
954bf215546Sopenharmony_ci   {
955bf215546Sopenharmony_ci      isFixed_ = 1;
956bf215546Sopenharmony_ci      reg_ = reg;
957bf215546Sopenharmony_ci   }
958bf215546Sopenharmony_ci
959bf215546Sopenharmony_ci   constexpr void setKill(bool flag) noexcept { isKill_ = flag; }
960bf215546Sopenharmony_ci
961bf215546Sopenharmony_ci   constexpr bool isKill() const noexcept { return isKill_; }
962bf215546Sopenharmony_ci
963bf215546Sopenharmony_ci   constexpr void setPrecise(bool precise) noexcept { isPrecise_ = precise; }
964bf215546Sopenharmony_ci
965bf215546Sopenharmony_ci   constexpr bool isPrecise() const noexcept { return isPrecise_; }
966bf215546Sopenharmony_ci
967bf215546Sopenharmony_ci   /* No Unsigned Wrap */
968bf215546Sopenharmony_ci   constexpr void setNUW(bool nuw) noexcept { isNUW_ = nuw; }
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_ci   constexpr bool isNUW() const noexcept { return isNUW_; }
971bf215546Sopenharmony_ci
972bf215546Sopenharmony_ci   constexpr void setNoCSE(bool noCSE) noexcept { isNoCSE_ = noCSE; }
973bf215546Sopenharmony_ci
974bf215546Sopenharmony_ci   constexpr bool isNoCSE() const noexcept { return isNoCSE_; }
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_ciprivate:
977bf215546Sopenharmony_ci   Temp temp = Temp(0, s1);
978bf215546Sopenharmony_ci   PhysReg reg_;
979bf215546Sopenharmony_ci   union {
980bf215546Sopenharmony_ci      struct {
981bf215546Sopenharmony_ci         uint8_t isFixed_ : 1;
982bf215546Sopenharmony_ci         uint8_t isKill_ : 1;
983bf215546Sopenharmony_ci         uint8_t isPrecise_ : 1;
984bf215546Sopenharmony_ci         uint8_t isNUW_ : 1;
985bf215546Sopenharmony_ci         uint8_t isNoCSE_ : 1;
986bf215546Sopenharmony_ci      };
987bf215546Sopenharmony_ci      /* can't initialize bit-fields in c++11, so work around using a union */
988bf215546Sopenharmony_ci      uint8_t control_ = 0;
989bf215546Sopenharmony_ci   };
990bf215546Sopenharmony_ci};
991bf215546Sopenharmony_ci
992bf215546Sopenharmony_cistruct Block;
993bf215546Sopenharmony_cistruct Instruction;
994bf215546Sopenharmony_cistruct Pseudo_instruction;
995bf215546Sopenharmony_cistruct SOP1_instruction;
996bf215546Sopenharmony_cistruct SOP2_instruction;
997bf215546Sopenharmony_cistruct SOPK_instruction;
998bf215546Sopenharmony_cistruct SOPP_instruction;
999bf215546Sopenharmony_cistruct SOPC_instruction;
1000bf215546Sopenharmony_cistruct SMEM_instruction;
1001bf215546Sopenharmony_cistruct DS_instruction;
1002bf215546Sopenharmony_cistruct MTBUF_instruction;
1003bf215546Sopenharmony_cistruct MUBUF_instruction;
1004bf215546Sopenharmony_cistruct MIMG_instruction;
1005bf215546Sopenharmony_cistruct Export_instruction;
1006bf215546Sopenharmony_cistruct FLAT_instruction;
1007bf215546Sopenharmony_cistruct Pseudo_branch_instruction;
1008bf215546Sopenharmony_cistruct Pseudo_barrier_instruction;
1009bf215546Sopenharmony_cistruct Pseudo_reduction_instruction;
1010bf215546Sopenharmony_cistruct VOP3P_instruction;
1011bf215546Sopenharmony_cistruct VOP1_instruction;
1012bf215546Sopenharmony_cistruct VOP2_instruction;
1013bf215546Sopenharmony_cistruct VOPC_instruction;
1014bf215546Sopenharmony_cistruct VOP3_instruction;
1015bf215546Sopenharmony_cistruct Interp_instruction;
1016bf215546Sopenharmony_cistruct DPP16_instruction;
1017bf215546Sopenharmony_cistruct DPP8_instruction;
1018bf215546Sopenharmony_cistruct SDWA_instruction;
1019bf215546Sopenharmony_ci
1020bf215546Sopenharmony_cistruct Instruction {
1021bf215546Sopenharmony_ci   aco_opcode opcode;
1022bf215546Sopenharmony_ci   Format format;
1023bf215546Sopenharmony_ci   uint32_t pass_flags;
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci   aco::span<Operand> operands;
1026bf215546Sopenharmony_ci   aco::span<Definition> definitions;
1027bf215546Sopenharmony_ci
1028bf215546Sopenharmony_ci   constexpr bool usesModifiers() const noexcept;
1029bf215546Sopenharmony_ci
1030bf215546Sopenharmony_ci   constexpr bool reads_exec() const noexcept
1031bf215546Sopenharmony_ci   {
1032bf215546Sopenharmony_ci      for (const Operand& op : operands) {
1033bf215546Sopenharmony_ci         if (op.isFixed() && op.physReg() == exec)
1034bf215546Sopenharmony_ci            return true;
1035bf215546Sopenharmony_ci      }
1036bf215546Sopenharmony_ci      return false;
1037bf215546Sopenharmony_ci   }
1038bf215546Sopenharmony_ci
1039bf215546Sopenharmony_ci   Pseudo_instruction& pseudo() noexcept
1040bf215546Sopenharmony_ci   {
1041bf215546Sopenharmony_ci      assert(isPseudo());
1042bf215546Sopenharmony_ci      return *(Pseudo_instruction*)this;
1043bf215546Sopenharmony_ci   }
1044bf215546Sopenharmony_ci   const Pseudo_instruction& pseudo() const noexcept
1045bf215546Sopenharmony_ci   {
1046bf215546Sopenharmony_ci      assert(isPseudo());
1047bf215546Sopenharmony_ci      return *(Pseudo_instruction*)this;
1048bf215546Sopenharmony_ci   }
1049bf215546Sopenharmony_ci   constexpr bool isPseudo() const noexcept { return format == Format::PSEUDO; }
1050bf215546Sopenharmony_ci   SOP1_instruction& sop1() noexcept
1051bf215546Sopenharmony_ci   {
1052bf215546Sopenharmony_ci      assert(isSOP1());
1053bf215546Sopenharmony_ci      return *(SOP1_instruction*)this;
1054bf215546Sopenharmony_ci   }
1055bf215546Sopenharmony_ci   const SOP1_instruction& sop1() const noexcept
1056bf215546Sopenharmony_ci   {
1057bf215546Sopenharmony_ci      assert(isSOP1());
1058bf215546Sopenharmony_ci      return *(SOP1_instruction*)this;
1059bf215546Sopenharmony_ci   }
1060bf215546Sopenharmony_ci   constexpr bool isSOP1() const noexcept { return format == Format::SOP1; }
1061bf215546Sopenharmony_ci   SOP2_instruction& sop2() noexcept
1062bf215546Sopenharmony_ci   {
1063bf215546Sopenharmony_ci      assert(isSOP2());
1064bf215546Sopenharmony_ci      return *(SOP2_instruction*)this;
1065bf215546Sopenharmony_ci   }
1066bf215546Sopenharmony_ci   const SOP2_instruction& sop2() const noexcept
1067bf215546Sopenharmony_ci   {
1068bf215546Sopenharmony_ci      assert(isSOP2());
1069bf215546Sopenharmony_ci      return *(SOP2_instruction*)this;
1070bf215546Sopenharmony_ci   }
1071bf215546Sopenharmony_ci   constexpr bool isSOP2() const noexcept { return format == Format::SOP2; }
1072bf215546Sopenharmony_ci   SOPK_instruction& sopk() noexcept
1073bf215546Sopenharmony_ci   {
1074bf215546Sopenharmony_ci      assert(isSOPK());
1075bf215546Sopenharmony_ci      return *(SOPK_instruction*)this;
1076bf215546Sopenharmony_ci   }
1077bf215546Sopenharmony_ci   const SOPK_instruction& sopk() const noexcept
1078bf215546Sopenharmony_ci   {
1079bf215546Sopenharmony_ci      assert(isSOPK());
1080bf215546Sopenharmony_ci      return *(SOPK_instruction*)this;
1081bf215546Sopenharmony_ci   }
1082bf215546Sopenharmony_ci   constexpr bool isSOPK() const noexcept { return format == Format::SOPK; }
1083bf215546Sopenharmony_ci   SOPP_instruction& sopp() noexcept
1084bf215546Sopenharmony_ci   {
1085bf215546Sopenharmony_ci      assert(isSOPP());
1086bf215546Sopenharmony_ci      return *(SOPP_instruction*)this;
1087bf215546Sopenharmony_ci   }
1088bf215546Sopenharmony_ci   const SOPP_instruction& sopp() const noexcept
1089bf215546Sopenharmony_ci   {
1090bf215546Sopenharmony_ci      assert(isSOPP());
1091bf215546Sopenharmony_ci      return *(SOPP_instruction*)this;
1092bf215546Sopenharmony_ci   }
1093bf215546Sopenharmony_ci   constexpr bool isSOPP() const noexcept { return format == Format::SOPP; }
1094bf215546Sopenharmony_ci   SOPC_instruction& sopc() noexcept
1095bf215546Sopenharmony_ci   {
1096bf215546Sopenharmony_ci      assert(isSOPC());
1097bf215546Sopenharmony_ci      return *(SOPC_instruction*)this;
1098bf215546Sopenharmony_ci   }
1099bf215546Sopenharmony_ci   const SOPC_instruction& sopc() const noexcept
1100bf215546Sopenharmony_ci   {
1101bf215546Sopenharmony_ci      assert(isSOPC());
1102bf215546Sopenharmony_ci      return *(SOPC_instruction*)this;
1103bf215546Sopenharmony_ci   }
1104bf215546Sopenharmony_ci   constexpr bool isSOPC() const noexcept { return format == Format::SOPC; }
1105bf215546Sopenharmony_ci   SMEM_instruction& smem() noexcept
1106bf215546Sopenharmony_ci   {
1107bf215546Sopenharmony_ci      assert(isSMEM());
1108bf215546Sopenharmony_ci      return *(SMEM_instruction*)this;
1109bf215546Sopenharmony_ci   }
1110bf215546Sopenharmony_ci   const SMEM_instruction& smem() const noexcept
1111bf215546Sopenharmony_ci   {
1112bf215546Sopenharmony_ci      assert(isSMEM());
1113bf215546Sopenharmony_ci      return *(SMEM_instruction*)this;
1114bf215546Sopenharmony_ci   }
1115bf215546Sopenharmony_ci   constexpr bool isSMEM() const noexcept { return format == Format::SMEM; }
1116bf215546Sopenharmony_ci   DS_instruction& ds() noexcept
1117bf215546Sopenharmony_ci   {
1118bf215546Sopenharmony_ci      assert(isDS());
1119bf215546Sopenharmony_ci      return *(DS_instruction*)this;
1120bf215546Sopenharmony_ci   }
1121bf215546Sopenharmony_ci   const DS_instruction& ds() const noexcept
1122bf215546Sopenharmony_ci   {
1123bf215546Sopenharmony_ci      assert(isDS());
1124bf215546Sopenharmony_ci      return *(DS_instruction*)this;
1125bf215546Sopenharmony_ci   }
1126bf215546Sopenharmony_ci   constexpr bool isDS() const noexcept { return format == Format::DS; }
1127bf215546Sopenharmony_ci   MTBUF_instruction& mtbuf() noexcept
1128bf215546Sopenharmony_ci   {
1129bf215546Sopenharmony_ci      assert(isMTBUF());
1130bf215546Sopenharmony_ci      return *(MTBUF_instruction*)this;
1131bf215546Sopenharmony_ci   }
1132bf215546Sopenharmony_ci   const MTBUF_instruction& mtbuf() const noexcept
1133bf215546Sopenharmony_ci   {
1134bf215546Sopenharmony_ci      assert(isMTBUF());
1135bf215546Sopenharmony_ci      return *(MTBUF_instruction*)this;
1136bf215546Sopenharmony_ci   }
1137bf215546Sopenharmony_ci   constexpr bool isMTBUF() const noexcept { return format == Format::MTBUF; }
1138bf215546Sopenharmony_ci   MUBUF_instruction& mubuf() noexcept
1139bf215546Sopenharmony_ci   {
1140bf215546Sopenharmony_ci      assert(isMUBUF());
1141bf215546Sopenharmony_ci      return *(MUBUF_instruction*)this;
1142bf215546Sopenharmony_ci   }
1143bf215546Sopenharmony_ci   const MUBUF_instruction& mubuf() const noexcept
1144bf215546Sopenharmony_ci   {
1145bf215546Sopenharmony_ci      assert(isMUBUF());
1146bf215546Sopenharmony_ci      return *(MUBUF_instruction*)this;
1147bf215546Sopenharmony_ci   }
1148bf215546Sopenharmony_ci   constexpr bool isMUBUF() const noexcept { return format == Format::MUBUF; }
1149bf215546Sopenharmony_ci   MIMG_instruction& mimg() noexcept
1150bf215546Sopenharmony_ci   {
1151bf215546Sopenharmony_ci      assert(isMIMG());
1152bf215546Sopenharmony_ci      return *(MIMG_instruction*)this;
1153bf215546Sopenharmony_ci   }
1154bf215546Sopenharmony_ci   const MIMG_instruction& mimg() const noexcept
1155bf215546Sopenharmony_ci   {
1156bf215546Sopenharmony_ci      assert(isMIMG());
1157bf215546Sopenharmony_ci      return *(MIMG_instruction*)this;
1158bf215546Sopenharmony_ci   }
1159bf215546Sopenharmony_ci   constexpr bool isMIMG() const noexcept { return format == Format::MIMG; }
1160bf215546Sopenharmony_ci   Export_instruction& exp() noexcept
1161bf215546Sopenharmony_ci   {
1162bf215546Sopenharmony_ci      assert(isEXP());
1163bf215546Sopenharmony_ci      return *(Export_instruction*)this;
1164bf215546Sopenharmony_ci   }
1165bf215546Sopenharmony_ci   const Export_instruction& exp() const noexcept
1166bf215546Sopenharmony_ci   {
1167bf215546Sopenharmony_ci      assert(isEXP());
1168bf215546Sopenharmony_ci      return *(Export_instruction*)this;
1169bf215546Sopenharmony_ci   }
1170bf215546Sopenharmony_ci   constexpr bool isEXP() const noexcept { return format == Format::EXP; }
1171bf215546Sopenharmony_ci   FLAT_instruction& flat() noexcept
1172bf215546Sopenharmony_ci   {
1173bf215546Sopenharmony_ci      assert(isFlat());
1174bf215546Sopenharmony_ci      return *(FLAT_instruction*)this;
1175bf215546Sopenharmony_ci   }
1176bf215546Sopenharmony_ci   const FLAT_instruction& flat() const noexcept
1177bf215546Sopenharmony_ci   {
1178bf215546Sopenharmony_ci      assert(isFlat());
1179bf215546Sopenharmony_ci      return *(FLAT_instruction*)this;
1180bf215546Sopenharmony_ci   }
1181bf215546Sopenharmony_ci   constexpr bool isFlat() const noexcept { return format == Format::FLAT; }
1182bf215546Sopenharmony_ci   FLAT_instruction& global() noexcept
1183bf215546Sopenharmony_ci   {
1184bf215546Sopenharmony_ci      assert(isGlobal());
1185bf215546Sopenharmony_ci      return *(FLAT_instruction*)this;
1186bf215546Sopenharmony_ci   }
1187bf215546Sopenharmony_ci   const FLAT_instruction& global() const noexcept
1188bf215546Sopenharmony_ci   {
1189bf215546Sopenharmony_ci      assert(isGlobal());
1190bf215546Sopenharmony_ci      return *(FLAT_instruction*)this;
1191bf215546Sopenharmony_ci   }
1192bf215546Sopenharmony_ci   constexpr bool isGlobal() const noexcept { return format == Format::GLOBAL; }
1193bf215546Sopenharmony_ci   FLAT_instruction& scratch() noexcept
1194bf215546Sopenharmony_ci   {
1195bf215546Sopenharmony_ci      assert(isScratch());
1196bf215546Sopenharmony_ci      return *(FLAT_instruction*)this;
1197bf215546Sopenharmony_ci   }
1198bf215546Sopenharmony_ci   const FLAT_instruction& scratch() const noexcept
1199bf215546Sopenharmony_ci   {
1200bf215546Sopenharmony_ci      assert(isScratch());
1201bf215546Sopenharmony_ci      return *(FLAT_instruction*)this;
1202bf215546Sopenharmony_ci   }
1203bf215546Sopenharmony_ci   constexpr bool isScratch() const noexcept { return format == Format::SCRATCH; }
1204bf215546Sopenharmony_ci   Pseudo_branch_instruction& branch() noexcept
1205bf215546Sopenharmony_ci   {
1206bf215546Sopenharmony_ci      assert(isBranch());
1207bf215546Sopenharmony_ci      return *(Pseudo_branch_instruction*)this;
1208bf215546Sopenharmony_ci   }
1209bf215546Sopenharmony_ci   const Pseudo_branch_instruction& branch() const noexcept
1210bf215546Sopenharmony_ci   {
1211bf215546Sopenharmony_ci      assert(isBranch());
1212bf215546Sopenharmony_ci      return *(Pseudo_branch_instruction*)this;
1213bf215546Sopenharmony_ci   }
1214bf215546Sopenharmony_ci   constexpr bool isBranch() const noexcept { return format == Format::PSEUDO_BRANCH; }
1215bf215546Sopenharmony_ci   Pseudo_barrier_instruction& barrier() noexcept
1216bf215546Sopenharmony_ci   {
1217bf215546Sopenharmony_ci      assert(isBarrier());
1218bf215546Sopenharmony_ci      return *(Pseudo_barrier_instruction*)this;
1219bf215546Sopenharmony_ci   }
1220bf215546Sopenharmony_ci   const Pseudo_barrier_instruction& barrier() const noexcept
1221bf215546Sopenharmony_ci   {
1222bf215546Sopenharmony_ci      assert(isBarrier());
1223bf215546Sopenharmony_ci      return *(Pseudo_barrier_instruction*)this;
1224bf215546Sopenharmony_ci   }
1225bf215546Sopenharmony_ci   constexpr bool isBarrier() const noexcept { return format == Format::PSEUDO_BARRIER; }
1226bf215546Sopenharmony_ci   Pseudo_reduction_instruction& reduction() noexcept
1227bf215546Sopenharmony_ci   {
1228bf215546Sopenharmony_ci      assert(isReduction());
1229bf215546Sopenharmony_ci      return *(Pseudo_reduction_instruction*)this;
1230bf215546Sopenharmony_ci   }
1231bf215546Sopenharmony_ci   const Pseudo_reduction_instruction& reduction() const noexcept
1232bf215546Sopenharmony_ci   {
1233bf215546Sopenharmony_ci      assert(isReduction());
1234bf215546Sopenharmony_ci      return *(Pseudo_reduction_instruction*)this;
1235bf215546Sopenharmony_ci   }
1236bf215546Sopenharmony_ci   constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
1237bf215546Sopenharmony_ci   VOP3P_instruction& vop3p() noexcept
1238bf215546Sopenharmony_ci   {
1239bf215546Sopenharmony_ci      assert(isVOP3P());
1240bf215546Sopenharmony_ci      return *(VOP3P_instruction*)this;
1241bf215546Sopenharmony_ci   }
1242bf215546Sopenharmony_ci   const VOP3P_instruction& vop3p() const noexcept
1243bf215546Sopenharmony_ci   {
1244bf215546Sopenharmony_ci      assert(isVOP3P());
1245bf215546Sopenharmony_ci      return *(VOP3P_instruction*)this;
1246bf215546Sopenharmony_ci   }
1247bf215546Sopenharmony_ci   constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; }
1248bf215546Sopenharmony_ci   VOP1_instruction& vop1() noexcept
1249bf215546Sopenharmony_ci   {
1250bf215546Sopenharmony_ci      assert(isVOP1());
1251bf215546Sopenharmony_ci      return *(VOP1_instruction*)this;
1252bf215546Sopenharmony_ci   }
1253bf215546Sopenharmony_ci   const VOP1_instruction& vop1() const noexcept
1254bf215546Sopenharmony_ci   {
1255bf215546Sopenharmony_ci      assert(isVOP1());
1256bf215546Sopenharmony_ci      return *(VOP1_instruction*)this;
1257bf215546Sopenharmony_ci   }
1258bf215546Sopenharmony_ci   constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
1259bf215546Sopenharmony_ci   VOP2_instruction& vop2() noexcept
1260bf215546Sopenharmony_ci   {
1261bf215546Sopenharmony_ci      assert(isVOP2());
1262bf215546Sopenharmony_ci      return *(VOP2_instruction*)this;
1263bf215546Sopenharmony_ci   }
1264bf215546Sopenharmony_ci   const VOP2_instruction& vop2() const noexcept
1265bf215546Sopenharmony_ci   {
1266bf215546Sopenharmony_ci      assert(isVOP2());
1267bf215546Sopenharmony_ci      return *(VOP2_instruction*)this;
1268bf215546Sopenharmony_ci   }
1269bf215546Sopenharmony_ci   constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
1270bf215546Sopenharmony_ci   VOPC_instruction& vopc() noexcept
1271bf215546Sopenharmony_ci   {
1272bf215546Sopenharmony_ci      assert(isVOPC());
1273bf215546Sopenharmony_ci      return *(VOPC_instruction*)this;
1274bf215546Sopenharmony_ci   }
1275bf215546Sopenharmony_ci   const VOPC_instruction& vopc() const noexcept
1276bf215546Sopenharmony_ci   {
1277bf215546Sopenharmony_ci      assert(isVOPC());
1278bf215546Sopenharmony_ci      return *(VOPC_instruction*)this;
1279bf215546Sopenharmony_ci   }
1280bf215546Sopenharmony_ci   constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
1281bf215546Sopenharmony_ci   VOP3_instruction& vop3() noexcept
1282bf215546Sopenharmony_ci   {
1283bf215546Sopenharmony_ci      assert(isVOP3());
1284bf215546Sopenharmony_ci      return *(VOP3_instruction*)this;
1285bf215546Sopenharmony_ci   }
1286bf215546Sopenharmony_ci   const VOP3_instruction& vop3() const noexcept
1287bf215546Sopenharmony_ci   {
1288bf215546Sopenharmony_ci      assert(isVOP3());
1289bf215546Sopenharmony_ci      return *(VOP3_instruction*)this;
1290bf215546Sopenharmony_ci   }
1291bf215546Sopenharmony_ci   constexpr bool isVOP3() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3; }
1292bf215546Sopenharmony_ci   Interp_instruction& vintrp() noexcept
1293bf215546Sopenharmony_ci   {
1294bf215546Sopenharmony_ci      assert(isVINTRP());
1295bf215546Sopenharmony_ci      return *(Interp_instruction*)this;
1296bf215546Sopenharmony_ci   }
1297bf215546Sopenharmony_ci   const Interp_instruction& vintrp() const noexcept
1298bf215546Sopenharmony_ci   {
1299bf215546Sopenharmony_ci      assert(isVINTRP());
1300bf215546Sopenharmony_ci      return *(Interp_instruction*)this;
1301bf215546Sopenharmony_ci   }
1302bf215546Sopenharmony_ci   constexpr bool isVINTRP() const noexcept { return (uint16_t)format & (uint16_t)Format::VINTRP; }
1303bf215546Sopenharmony_ci   DPP16_instruction& dpp16() noexcept
1304bf215546Sopenharmony_ci   {
1305bf215546Sopenharmony_ci      assert(isDPP16());
1306bf215546Sopenharmony_ci      return *(DPP16_instruction*)this;
1307bf215546Sopenharmony_ci   }
1308bf215546Sopenharmony_ci   const DPP16_instruction& dpp16() const noexcept
1309bf215546Sopenharmony_ci   {
1310bf215546Sopenharmony_ci      assert(isDPP16());
1311bf215546Sopenharmony_ci      return *(DPP16_instruction*)this;
1312bf215546Sopenharmony_ci   }
1313bf215546Sopenharmony_ci   constexpr bool isDPP16() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP16; }
1314bf215546Sopenharmony_ci   DPP8_instruction& dpp8() noexcept
1315bf215546Sopenharmony_ci   {
1316bf215546Sopenharmony_ci      assert(isDPP8());
1317bf215546Sopenharmony_ci      return *(DPP8_instruction*)this;
1318bf215546Sopenharmony_ci   }
1319bf215546Sopenharmony_ci   const DPP8_instruction& dpp8() const noexcept
1320bf215546Sopenharmony_ci   {
1321bf215546Sopenharmony_ci      assert(isDPP8());
1322bf215546Sopenharmony_ci      return *(DPP8_instruction*)this;
1323bf215546Sopenharmony_ci   }
1324bf215546Sopenharmony_ci   constexpr bool isDPP8() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP8; }
1325bf215546Sopenharmony_ci   constexpr bool isDPP() const noexcept { return isDPP16() || isDPP8(); }
1326bf215546Sopenharmony_ci   SDWA_instruction& sdwa() noexcept
1327bf215546Sopenharmony_ci   {
1328bf215546Sopenharmony_ci      assert(isSDWA());
1329bf215546Sopenharmony_ci      return *(SDWA_instruction*)this;
1330bf215546Sopenharmony_ci   }
1331bf215546Sopenharmony_ci   const SDWA_instruction& sdwa() const noexcept
1332bf215546Sopenharmony_ci   {
1333bf215546Sopenharmony_ci      assert(isSDWA());
1334bf215546Sopenharmony_ci      return *(SDWA_instruction*)this;
1335bf215546Sopenharmony_ci   }
1336bf215546Sopenharmony_ci   constexpr bool isSDWA() const noexcept { return (uint16_t)format & (uint16_t)Format::SDWA; }
1337bf215546Sopenharmony_ci
1338bf215546Sopenharmony_ci   FLAT_instruction& flatlike() { return *(FLAT_instruction*)this; }
1339bf215546Sopenharmony_ci
1340bf215546Sopenharmony_ci   const FLAT_instruction& flatlike() const { return *(FLAT_instruction*)this; }
1341bf215546Sopenharmony_ci
1342bf215546Sopenharmony_ci   constexpr bool isFlatLike() const noexcept { return isFlat() || isGlobal() || isScratch(); }
1343bf215546Sopenharmony_ci
1344bf215546Sopenharmony_ci   constexpr bool isVALU() const noexcept
1345bf215546Sopenharmony_ci   {
1346bf215546Sopenharmony_ci      return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P();
1347bf215546Sopenharmony_ci   }
1348bf215546Sopenharmony_ci
1349bf215546Sopenharmony_ci   constexpr bool isSALU() const noexcept
1350bf215546Sopenharmony_ci   {
1351bf215546Sopenharmony_ci      return isSOP1() || isSOP2() || isSOPC() || isSOPK() || isSOPP();
1352bf215546Sopenharmony_ci   }
1353bf215546Sopenharmony_ci
1354bf215546Sopenharmony_ci   constexpr bool isVMEM() const noexcept { return isMTBUF() || isMUBUF() || isMIMG(); }
1355bf215546Sopenharmony_ci};
1356bf215546Sopenharmony_cistatic_assert(sizeof(Instruction) == 16, "Unexpected padding");
1357bf215546Sopenharmony_ci
1358bf215546Sopenharmony_cistruct SOPK_instruction : public Instruction {
1359bf215546Sopenharmony_ci   uint16_t imm;
1360bf215546Sopenharmony_ci   uint16_t padding;
1361bf215546Sopenharmony_ci};
1362bf215546Sopenharmony_cistatic_assert(sizeof(SOPK_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1363bf215546Sopenharmony_ci
1364bf215546Sopenharmony_cistruct SOPP_instruction : public Instruction {
1365bf215546Sopenharmony_ci   uint32_t imm;
1366bf215546Sopenharmony_ci   int block;
1367bf215546Sopenharmony_ci};
1368bf215546Sopenharmony_cistatic_assert(sizeof(SOPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1369bf215546Sopenharmony_ci
1370bf215546Sopenharmony_cistruct SOPC_instruction : public Instruction {
1371bf215546Sopenharmony_ci   uint32_t padding;
1372bf215546Sopenharmony_ci};
1373bf215546Sopenharmony_cistatic_assert(sizeof(SOPC_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1374bf215546Sopenharmony_ci
1375bf215546Sopenharmony_cistruct SOP1_instruction : public Instruction {};
1376bf215546Sopenharmony_cistatic_assert(sizeof(SOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
1377bf215546Sopenharmony_ci
1378bf215546Sopenharmony_cistruct SOP2_instruction : public Instruction {
1379bf215546Sopenharmony_ci   uint32_t padding;
1380bf215546Sopenharmony_ci};
1381bf215546Sopenharmony_cistatic_assert(sizeof(SOP2_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1382bf215546Sopenharmony_ci
1383bf215546Sopenharmony_ci/**
1384bf215546Sopenharmony_ci * Scalar Memory Format:
1385bf215546Sopenharmony_ci * For s_(buffer_)load_dword*:
1386bf215546Sopenharmony_ci * Operand(0): SBASE - SGPR-pair which provides base address
1387bf215546Sopenharmony_ci * Operand(1): Offset - immediate (un)signed offset or SGPR
1388bf215546Sopenharmony_ci * Operand(2) / Definition(0): SDATA - SGPR for read / write result
1389bf215546Sopenharmony_ci * Operand(n-1): SOffset - SGPR offset (Vega only)
1390bf215546Sopenharmony_ci *
1391bf215546Sopenharmony_ci * Having no operands is also valid for instructions such as s_dcache_inv.
1392bf215546Sopenharmony_ci *
1393bf215546Sopenharmony_ci */
1394bf215546Sopenharmony_cistruct SMEM_instruction : public Instruction {
1395bf215546Sopenharmony_ci   memory_sync_info sync;
1396bf215546Sopenharmony_ci   bool glc : 1; /* VI+: globally coherent */
1397bf215546Sopenharmony_ci   bool dlc : 1; /* NAVI: device level coherent */
1398bf215546Sopenharmony_ci   bool nv : 1;  /* VEGA only: Non-volatile */
1399bf215546Sopenharmony_ci   bool disable_wqm : 1;
1400bf215546Sopenharmony_ci   bool prevent_overflow : 1; /* avoid overflow when combining additions */
1401bf215546Sopenharmony_ci   uint8_t padding : 3;
1402bf215546Sopenharmony_ci};
1403bf215546Sopenharmony_cistatic_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1404bf215546Sopenharmony_ci
1405bf215546Sopenharmony_cistruct VOP1_instruction : public Instruction {};
1406bf215546Sopenharmony_cistatic_assert(sizeof(VOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
1407bf215546Sopenharmony_ci
1408bf215546Sopenharmony_cistruct VOP2_instruction : public Instruction {};
1409bf215546Sopenharmony_cistatic_assert(sizeof(VOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
1410bf215546Sopenharmony_ci
1411bf215546Sopenharmony_cistruct VOPC_instruction : public Instruction {};
1412bf215546Sopenharmony_cistatic_assert(sizeof(VOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
1413bf215546Sopenharmony_ci
1414bf215546Sopenharmony_cistruct VOP3_instruction : public Instruction {
1415bf215546Sopenharmony_ci   bool abs[3];
1416bf215546Sopenharmony_ci   bool neg[3];
1417bf215546Sopenharmony_ci   uint8_t opsel : 4;
1418bf215546Sopenharmony_ci   uint8_t omod : 2;
1419bf215546Sopenharmony_ci   bool clamp : 1;
1420bf215546Sopenharmony_ci   uint8_t padding0 : 1;
1421bf215546Sopenharmony_ci   uint8_t padding1;
1422bf215546Sopenharmony_ci};
1423bf215546Sopenharmony_cistatic_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1424bf215546Sopenharmony_ci
1425bf215546Sopenharmony_cistruct VOP3P_instruction : public Instruction {
1426bf215546Sopenharmony_ci   bool neg_lo[3];
1427bf215546Sopenharmony_ci   bool neg_hi[3]; /* abs modifier, for v_mad_mix/v_fma_mix */
1428bf215546Sopenharmony_ci   uint8_t opsel_lo : 3;
1429bf215546Sopenharmony_ci   uint8_t opsel_hi : 3;
1430bf215546Sopenharmony_ci   bool clamp : 1;
1431bf215546Sopenharmony_ci   uint8_t padding0 : 1;
1432bf215546Sopenharmony_ci   uint8_t padding1;
1433bf215546Sopenharmony_ci};
1434bf215546Sopenharmony_cistatic_assert(sizeof(VOP3P_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1435bf215546Sopenharmony_ci
1436bf215546Sopenharmony_ci/**
1437bf215546Sopenharmony_ci * Data Parallel Primitives Format:
1438bf215546Sopenharmony_ci * This format can be used for VOP1, VOP2 or VOPC instructions.
1439bf215546Sopenharmony_ci * The swizzle applies to the src0 operand.
1440bf215546Sopenharmony_ci *
1441bf215546Sopenharmony_ci */
1442bf215546Sopenharmony_cistruct DPP16_instruction : public Instruction {
1443bf215546Sopenharmony_ci   bool abs[2];
1444bf215546Sopenharmony_ci   bool neg[2];
1445bf215546Sopenharmony_ci   uint16_t dpp_ctrl;
1446bf215546Sopenharmony_ci   uint8_t row_mask : 4;
1447bf215546Sopenharmony_ci   uint8_t bank_mask : 4;
1448bf215546Sopenharmony_ci   bool bound_ctrl : 1;
1449bf215546Sopenharmony_ci   uint8_t padding : 7;
1450bf215546Sopenharmony_ci};
1451bf215546Sopenharmony_cistatic_assert(sizeof(DPP16_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1452bf215546Sopenharmony_ci
1453bf215546Sopenharmony_cistruct DPP8_instruction : public Instruction {
1454bf215546Sopenharmony_ci   uint8_t lane_sel[8];
1455bf215546Sopenharmony_ci};
1456bf215546Sopenharmony_cistatic_assert(sizeof(DPP8_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1457bf215546Sopenharmony_ci
1458bf215546Sopenharmony_cistruct SubdwordSel {
1459bf215546Sopenharmony_ci   enum sdwa_sel : uint8_t {
1460bf215546Sopenharmony_ci      ubyte = 0x4,
1461bf215546Sopenharmony_ci      uword = 0x8,
1462bf215546Sopenharmony_ci      dword = 0x10,
1463bf215546Sopenharmony_ci      sext = 0x20,
1464bf215546Sopenharmony_ci      sbyte = ubyte | sext,
1465bf215546Sopenharmony_ci      sword = uword | sext,
1466bf215546Sopenharmony_ci
1467bf215546Sopenharmony_ci      ubyte0 = ubyte,
1468bf215546Sopenharmony_ci      ubyte1 = ubyte | 1,
1469bf215546Sopenharmony_ci      ubyte2 = ubyte | 2,
1470bf215546Sopenharmony_ci      ubyte3 = ubyte | 3,
1471bf215546Sopenharmony_ci      sbyte0 = sbyte,
1472bf215546Sopenharmony_ci      sbyte1 = sbyte | 1,
1473bf215546Sopenharmony_ci      sbyte2 = sbyte | 2,
1474bf215546Sopenharmony_ci      sbyte3 = sbyte | 3,
1475bf215546Sopenharmony_ci      uword0 = uword,
1476bf215546Sopenharmony_ci      uword1 = uword | 2,
1477bf215546Sopenharmony_ci      sword0 = sword,
1478bf215546Sopenharmony_ci      sword1 = sword | 2,
1479bf215546Sopenharmony_ci   };
1480bf215546Sopenharmony_ci
1481bf215546Sopenharmony_ci   SubdwordSel() : sel((sdwa_sel)0) {}
1482bf215546Sopenharmony_ci   constexpr SubdwordSel(sdwa_sel sel_) : sel(sel_) {}
1483bf215546Sopenharmony_ci   constexpr SubdwordSel(unsigned size, unsigned offset, bool sign_extend)
1484bf215546Sopenharmony_ci       : sel((sdwa_sel)((sign_extend ? sext : 0) | size << 2 | offset))
1485bf215546Sopenharmony_ci   {}
1486bf215546Sopenharmony_ci   constexpr operator sdwa_sel() const { return sel; }
1487bf215546Sopenharmony_ci   explicit operator bool() const { return sel != 0; }
1488bf215546Sopenharmony_ci
1489bf215546Sopenharmony_ci   constexpr unsigned size() const { return (sel >> 2) & 0x7; }
1490bf215546Sopenharmony_ci   constexpr unsigned offset() const { return sel & 0x3; }
1491bf215546Sopenharmony_ci   constexpr bool sign_extend() const { return sel & sext; }
1492bf215546Sopenharmony_ci   constexpr unsigned to_sdwa_sel(unsigned reg_byte_offset) const
1493bf215546Sopenharmony_ci   {
1494bf215546Sopenharmony_ci      reg_byte_offset += offset();
1495bf215546Sopenharmony_ci      if (size() == 1)
1496bf215546Sopenharmony_ci         return reg_byte_offset;
1497bf215546Sopenharmony_ci      else if (size() == 2)
1498bf215546Sopenharmony_ci         return 4 + (reg_byte_offset >> 1);
1499bf215546Sopenharmony_ci      else
1500bf215546Sopenharmony_ci         return 6;
1501bf215546Sopenharmony_ci   }
1502bf215546Sopenharmony_ci
1503bf215546Sopenharmony_ciprivate:
1504bf215546Sopenharmony_ci   sdwa_sel sel;
1505bf215546Sopenharmony_ci};
1506bf215546Sopenharmony_ci
1507bf215546Sopenharmony_ci/**
1508bf215546Sopenharmony_ci * Sub-Dword Addressing Format:
1509bf215546Sopenharmony_ci * This format can be used for VOP1, VOP2 or VOPC instructions.
1510bf215546Sopenharmony_ci *
1511bf215546Sopenharmony_ci * omod and SGPR/constant operands are only available on GFX9+. For VOPC,
1512bf215546Sopenharmony_ci * the definition doesn't have to be VCC on GFX9+.
1513bf215546Sopenharmony_ci *
1514bf215546Sopenharmony_ci */
1515bf215546Sopenharmony_cistruct SDWA_instruction : public Instruction {
1516bf215546Sopenharmony_ci   /* these destination modifiers aren't available with VOPC except for
1517bf215546Sopenharmony_ci    * clamp on GFX8 */
1518bf215546Sopenharmony_ci   SubdwordSel sel[2];
1519bf215546Sopenharmony_ci   SubdwordSel dst_sel;
1520bf215546Sopenharmony_ci   bool neg[2];
1521bf215546Sopenharmony_ci   bool abs[2];
1522bf215546Sopenharmony_ci   bool clamp : 1;
1523bf215546Sopenharmony_ci   uint8_t omod : 2; /* GFX9+ */
1524bf215546Sopenharmony_ci   uint8_t padding : 5;
1525bf215546Sopenharmony_ci};
1526bf215546Sopenharmony_cistatic_assert(sizeof(SDWA_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1527bf215546Sopenharmony_ci
1528bf215546Sopenharmony_cistruct Interp_instruction : public Instruction {
1529bf215546Sopenharmony_ci   uint8_t attribute;
1530bf215546Sopenharmony_ci   uint8_t component;
1531bf215546Sopenharmony_ci   uint16_t padding;
1532bf215546Sopenharmony_ci};
1533bf215546Sopenharmony_cistatic_assert(sizeof(Interp_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1534bf215546Sopenharmony_ci
1535bf215546Sopenharmony_ci/**
1536bf215546Sopenharmony_ci * Local and Global Data Sharing instructions
1537bf215546Sopenharmony_ci * Operand(0): ADDR - VGPR which supplies the address.
1538bf215546Sopenharmony_ci * Operand(1): DATA0 - First data VGPR.
1539bf215546Sopenharmony_ci * Operand(2): DATA1 - Second data VGPR.
1540bf215546Sopenharmony_ci * Operand(n-1): M0 - LDS size.
1541bf215546Sopenharmony_ci * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
1542bf215546Sopenharmony_ci *
1543bf215546Sopenharmony_ci */
1544bf215546Sopenharmony_cistruct DS_instruction : public Instruction {
1545bf215546Sopenharmony_ci   memory_sync_info sync;
1546bf215546Sopenharmony_ci   bool gds;
1547bf215546Sopenharmony_ci   uint16_t offset0;
1548bf215546Sopenharmony_ci   uint8_t offset1;
1549bf215546Sopenharmony_ci   uint8_t padding;
1550bf215546Sopenharmony_ci};
1551bf215546Sopenharmony_cistatic_assert(sizeof(DS_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1552bf215546Sopenharmony_ci
1553bf215546Sopenharmony_ci/**
1554bf215546Sopenharmony_ci * Vector Memory Untyped-buffer Instructions
1555bf215546Sopenharmony_ci * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
1556bf215546Sopenharmony_ci * Operand(1): VADDR - Address source. Can carry an index and/or offset
1557bf215546Sopenharmony_ci * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
1558bf215546Sopenharmony_ci * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
1559bf215546Sopenharmony_ci *
1560bf215546Sopenharmony_ci */
1561bf215546Sopenharmony_cistruct MUBUF_instruction : public Instruction {
1562bf215546Sopenharmony_ci   memory_sync_info sync;
1563bf215546Sopenharmony_ci   bool offen : 1;           /* Supply an offset from VGPR (VADDR) */
1564bf215546Sopenharmony_ci   bool idxen : 1;           /* Supply an index from VGPR (VADDR) */
1565bf215546Sopenharmony_ci   bool addr64 : 1;          /* SI, CIK: Address size is 64-bit */
1566bf215546Sopenharmony_ci   bool glc : 1;             /* globally coherent */
1567bf215546Sopenharmony_ci   bool dlc : 1;             /* NAVI: device level coherent */
1568bf215546Sopenharmony_ci   bool slc : 1;             /* system level coherent */
1569bf215546Sopenharmony_ci   bool tfe : 1;             /* texture fail enable */
1570bf215546Sopenharmony_ci   bool lds : 1;             /* Return read-data to LDS instead of VGPRs */
1571bf215546Sopenharmony_ci   uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
1572bf215546Sopenharmony_ci   uint16_t offset : 12;     /* Unsigned byte offset - 12 bit */
1573bf215546Sopenharmony_ci   uint16_t swizzled : 1;
1574bf215546Sopenharmony_ci   uint16_t padding0 : 2;
1575bf215546Sopenharmony_ci   uint16_t vtx_binding : 6; /* 0 if this is not a vertex attribute load */
1576bf215546Sopenharmony_ci   uint16_t padding1 : 10;
1577bf215546Sopenharmony_ci};
1578bf215546Sopenharmony_cistatic_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1579bf215546Sopenharmony_ci
1580bf215546Sopenharmony_ci/**
1581bf215546Sopenharmony_ci * Vector Memory Typed-buffer Instructions
1582bf215546Sopenharmony_ci * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
1583bf215546Sopenharmony_ci * Operand(1): VADDR - Address source. Can carry an index and/or offset
1584bf215546Sopenharmony_ci * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
1585bf215546Sopenharmony_ci * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
1586bf215546Sopenharmony_ci *
1587bf215546Sopenharmony_ci */
1588bf215546Sopenharmony_cistruct MTBUF_instruction : public Instruction {
1589bf215546Sopenharmony_ci   memory_sync_info sync;
1590bf215546Sopenharmony_ci   uint8_t dfmt : 4;         /* Data Format of data in memory buffer */
1591bf215546Sopenharmony_ci   uint8_t nfmt : 3;         /* Numeric format of data in memory */
1592bf215546Sopenharmony_ci   bool offen : 1;           /* Supply an offset from VGPR (VADDR) */
1593bf215546Sopenharmony_ci   uint16_t idxen : 1;       /* Supply an index from VGPR (VADDR) */
1594bf215546Sopenharmony_ci   uint16_t glc : 1;         /* globally coherent */
1595bf215546Sopenharmony_ci   uint16_t dlc : 1;         /* NAVI: device level coherent */
1596bf215546Sopenharmony_ci   uint16_t slc : 1;         /* system level coherent */
1597bf215546Sopenharmony_ci   uint16_t tfe : 1;         /* texture fail enable */
1598bf215546Sopenharmony_ci   uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
1599bf215546Sopenharmony_ci   uint16_t vtx_binding : 6; /* 0 if this is not a vertex attribute load */
1600bf215546Sopenharmony_ci   uint16_t padding : 4;
1601bf215546Sopenharmony_ci   uint16_t offset; /* Unsigned byte offset - 12 bit */
1602bf215546Sopenharmony_ci};
1603bf215546Sopenharmony_cistatic_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1604bf215546Sopenharmony_ci
1605bf215546Sopenharmony_ci/**
1606bf215546Sopenharmony_ci * Vector Memory Image Instructions
1607bf215546Sopenharmony_ci * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
1608bf215546Sopenharmony_ci * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
1609bf215546Sopenharmony_ci * Operand(2): VDATA - Vector GPR for write data or zero if TFE/LWE=1.
1610bf215546Sopenharmony_ci * Operand(3): VADDR - Address source. Can carry an offset or an index.
1611bf215546Sopenharmony_ci * Definition(0): VDATA - Vector GPR for read result.
1612bf215546Sopenharmony_ci *
1613bf215546Sopenharmony_ci */
1614bf215546Sopenharmony_cistruct MIMG_instruction : public Instruction {
1615bf215546Sopenharmony_ci   memory_sync_info sync;
1616bf215546Sopenharmony_ci   uint8_t dmask;        /* Data VGPR enable mask */
1617bf215546Sopenharmony_ci   uint8_t dim : 3;      /* NAVI: dimensionality */
1618bf215546Sopenharmony_ci   bool unrm : 1;        /* Force address to be un-normalized */
1619bf215546Sopenharmony_ci   bool dlc : 1;         /* NAVI: device level coherent */
1620bf215546Sopenharmony_ci   bool glc : 1;         /* globally coherent */
1621bf215546Sopenharmony_ci   bool slc : 1;         /* system level coherent */
1622bf215546Sopenharmony_ci   bool tfe : 1;         /* texture fail enable */
1623bf215546Sopenharmony_ci   bool da : 1;          /* declare an array */
1624bf215546Sopenharmony_ci   bool lwe : 1;         /* LOD warning enable */
1625bf215546Sopenharmony_ci   bool r128 : 1;        /* NAVI: Texture resource size */
1626bf215546Sopenharmony_ci   bool a16 : 1;         /* VEGA, NAVI: Address components are 16-bits */
1627bf215546Sopenharmony_ci   bool d16 : 1;         /* Convert 32-bit data to 16-bit data */
1628bf215546Sopenharmony_ci   bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1629bf215546Sopenharmony_ci   uint8_t padding0 : 2;
1630bf215546Sopenharmony_ci   uint8_t padding1;
1631bf215546Sopenharmony_ci   uint8_t padding2;
1632bf215546Sopenharmony_ci};
1633bf215546Sopenharmony_cistatic_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1634bf215546Sopenharmony_ci
1635bf215546Sopenharmony_ci/**
1636bf215546Sopenharmony_ci * Flat/Scratch/Global Instructions
1637bf215546Sopenharmony_ci * Operand(0): ADDR
1638bf215546Sopenharmony_ci * Operand(1): SADDR
1639bf215546Sopenharmony_ci * Operand(2) / Definition(0): DATA/VDST
1640bf215546Sopenharmony_ci *
1641bf215546Sopenharmony_ci */
1642bf215546Sopenharmony_cistruct FLAT_instruction : public Instruction {
1643bf215546Sopenharmony_ci   memory_sync_info sync;
1644bf215546Sopenharmony_ci   bool slc : 1; /* system level coherent */
1645bf215546Sopenharmony_ci   bool glc : 1; /* globally coherent */
1646bf215546Sopenharmony_ci   bool dlc : 1; /* NAVI: device level coherent */
1647bf215546Sopenharmony_ci   bool lds : 1;
1648bf215546Sopenharmony_ci   bool nv : 1;
1649bf215546Sopenharmony_ci   bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1650bf215546Sopenharmony_ci   uint8_t padding0 : 2;
1651bf215546Sopenharmony_ci   int16_t offset; /* Vega/Navi only */
1652bf215546Sopenharmony_ci   uint16_t padding1;
1653bf215546Sopenharmony_ci};
1654bf215546Sopenharmony_cistatic_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1655bf215546Sopenharmony_ci
1656bf215546Sopenharmony_cistruct Export_instruction : public Instruction {
1657bf215546Sopenharmony_ci   uint8_t enabled_mask;
1658bf215546Sopenharmony_ci   uint8_t dest;
1659bf215546Sopenharmony_ci   bool compressed : 1;
1660bf215546Sopenharmony_ci   bool done : 1;
1661bf215546Sopenharmony_ci   bool valid_mask : 1;
1662bf215546Sopenharmony_ci   uint8_t padding0 : 5;
1663bf215546Sopenharmony_ci   uint8_t padding1;
1664bf215546Sopenharmony_ci};
1665bf215546Sopenharmony_cistatic_assert(sizeof(Export_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1666bf215546Sopenharmony_ci
1667bf215546Sopenharmony_cistruct Pseudo_instruction : public Instruction {
1668bf215546Sopenharmony_ci   PhysReg scratch_sgpr; /* might not be valid if it's not needed */
1669bf215546Sopenharmony_ci   bool tmp_in_scc;
1670bf215546Sopenharmony_ci   uint8_t padding;
1671bf215546Sopenharmony_ci};
1672bf215546Sopenharmony_cistatic_assert(sizeof(Pseudo_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1673bf215546Sopenharmony_ci
1674bf215546Sopenharmony_cistruct Pseudo_branch_instruction : public Instruction {
1675bf215546Sopenharmony_ci   /* target[0] is the block index of the branch target.
1676bf215546Sopenharmony_ci    * For conditional branches, target[1] contains the fall-through alternative.
1677bf215546Sopenharmony_ci    * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
1678bf215546Sopenharmony_ci    */
1679bf215546Sopenharmony_ci   uint32_t target[2];
1680bf215546Sopenharmony_ci};
1681bf215546Sopenharmony_cistatic_assert(sizeof(Pseudo_branch_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1682bf215546Sopenharmony_ci
1683bf215546Sopenharmony_cistruct Pseudo_barrier_instruction : public Instruction {
1684bf215546Sopenharmony_ci   memory_sync_info sync;
1685bf215546Sopenharmony_ci   sync_scope exec_scope;
1686bf215546Sopenharmony_ci};
1687bf215546Sopenharmony_cistatic_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1688bf215546Sopenharmony_ci
1689bf215546Sopenharmony_cienum ReduceOp : uint16_t {
1690bf215546Sopenharmony_ci   // clang-format off
1691bf215546Sopenharmony_ci   iadd8, iadd16, iadd32, iadd64,
1692bf215546Sopenharmony_ci   imul8, imul16, imul32, imul64,
1693bf215546Sopenharmony_ci          fadd16, fadd32, fadd64,
1694bf215546Sopenharmony_ci          fmul16, fmul32, fmul64,
1695bf215546Sopenharmony_ci   imin8, imin16, imin32, imin64,
1696bf215546Sopenharmony_ci   imax8, imax16, imax32, imax64,
1697bf215546Sopenharmony_ci   umin8, umin16, umin32, umin64,
1698bf215546Sopenharmony_ci   umax8, umax16, umax32, umax64,
1699bf215546Sopenharmony_ci          fmin16, fmin32, fmin64,
1700bf215546Sopenharmony_ci          fmax16, fmax32, fmax64,
1701bf215546Sopenharmony_ci   iand8, iand16, iand32, iand64,
1702bf215546Sopenharmony_ci   ior8, ior16, ior32, ior64,
1703bf215546Sopenharmony_ci   ixor8, ixor16, ixor32, ixor64,
1704bf215546Sopenharmony_ci   num_reduce_ops,
1705bf215546Sopenharmony_ci   // clang-format on
1706bf215546Sopenharmony_ci};
1707bf215546Sopenharmony_ci
1708bf215546Sopenharmony_ci/**
1709bf215546Sopenharmony_ci * Subgroup Reduction Instructions, everything except for the data to be
1710bf215546Sopenharmony_ci * reduced and the result as inserted by setup_reduce_temp().
1711bf215546Sopenharmony_ci * Operand(0): data to be reduced
1712bf215546Sopenharmony_ci * Operand(1): reduce temporary
1713bf215546Sopenharmony_ci * Operand(2): vector temporary
1714bf215546Sopenharmony_ci * Definition(0): result
1715bf215546Sopenharmony_ci * Definition(1): scalar temporary
1716bf215546Sopenharmony_ci * Definition(2): scalar identity temporary (not used to store identity on GFX10)
1717bf215546Sopenharmony_ci * Definition(3): scc clobber
1718bf215546Sopenharmony_ci * Definition(4): vcc clobber
1719bf215546Sopenharmony_ci *
1720bf215546Sopenharmony_ci */
1721bf215546Sopenharmony_cistruct Pseudo_reduction_instruction : public Instruction {
1722bf215546Sopenharmony_ci   ReduceOp reduce_op;
1723bf215546Sopenharmony_ci   uint16_t cluster_size; // must be 0 for scans
1724bf215546Sopenharmony_ci};
1725bf215546Sopenharmony_cistatic_assert(sizeof(Pseudo_reduction_instruction) == sizeof(Instruction) + 4,
1726bf215546Sopenharmony_ci              "Unexpected padding");
1727bf215546Sopenharmony_ci
1728bf215546Sopenharmony_cistruct instr_deleter_functor {
1729bf215546Sopenharmony_ci   void operator()(void* p) { free(p); }
1730bf215546Sopenharmony_ci};
1731bf215546Sopenharmony_ci
1732bf215546Sopenharmony_citemplate <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
1733bf215546Sopenharmony_ci
1734bf215546Sopenharmony_citemplate <typename T>
1735bf215546Sopenharmony_ciT*
1736bf215546Sopenharmony_cicreate_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
1737bf215546Sopenharmony_ci                   uint32_t num_definitions)
1738bf215546Sopenharmony_ci{
1739bf215546Sopenharmony_ci   std::size_t size =
1740bf215546Sopenharmony_ci      sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
1741bf215546Sopenharmony_ci   char* data = (char*)calloc(1, size);
1742bf215546Sopenharmony_ci   T* inst = (T*)data;
1743bf215546Sopenharmony_ci
1744bf215546Sopenharmony_ci   inst->opcode = opcode;
1745bf215546Sopenharmony_ci   inst->format = format;
1746bf215546Sopenharmony_ci
1747bf215546Sopenharmony_ci   uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
1748bf215546Sopenharmony_ci   inst->operands = aco::span<Operand>(operands_offset, num_operands);
1749bf215546Sopenharmony_ci   uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
1750bf215546Sopenharmony_ci   inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
1751bf215546Sopenharmony_ci
1752bf215546Sopenharmony_ci   return inst;
1753bf215546Sopenharmony_ci}
1754bf215546Sopenharmony_ci
1755bf215546Sopenharmony_ciconstexpr bool
1756bf215546Sopenharmony_ciInstruction::usesModifiers() const noexcept
1757bf215546Sopenharmony_ci{
1758bf215546Sopenharmony_ci   if (isDPP() || isSDWA())
1759bf215546Sopenharmony_ci      return true;
1760bf215546Sopenharmony_ci
1761bf215546Sopenharmony_ci   if (isVOP3P()) {
1762bf215546Sopenharmony_ci      const VOP3P_instruction& vop3p = this->vop3p();
1763bf215546Sopenharmony_ci      for (unsigned i = 0; i < operands.size(); i++) {
1764bf215546Sopenharmony_ci         if (vop3p.neg_lo[i] || vop3p.neg_hi[i])
1765bf215546Sopenharmony_ci            return true;
1766bf215546Sopenharmony_ci
1767bf215546Sopenharmony_ci         /* opsel_hi must be 1 to not be considered a modifier - even for constants */
1768bf215546Sopenharmony_ci         if (!(vop3p.opsel_hi & (1 << i)))
1769bf215546Sopenharmony_ci            return true;
1770bf215546Sopenharmony_ci      }
1771bf215546Sopenharmony_ci      return vop3p.opsel_lo || vop3p.clamp;
1772bf215546Sopenharmony_ci   } else if (isVOP3()) {
1773bf215546Sopenharmony_ci      const VOP3_instruction& vop3 = this->vop3();
1774bf215546Sopenharmony_ci      for (unsigned i = 0; i < operands.size(); i++) {
1775bf215546Sopenharmony_ci         if (vop3.abs[i] || vop3.neg[i])
1776bf215546Sopenharmony_ci            return true;
1777bf215546Sopenharmony_ci      }
1778bf215546Sopenharmony_ci      return vop3.opsel || vop3.clamp || vop3.omod;
1779bf215546Sopenharmony_ci   }
1780bf215546Sopenharmony_ci   return false;
1781bf215546Sopenharmony_ci}
1782bf215546Sopenharmony_ci
1783bf215546Sopenharmony_ciconstexpr bool
1784bf215546Sopenharmony_ciis_phi(Instruction* instr)
1785bf215546Sopenharmony_ci{
1786bf215546Sopenharmony_ci   return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
1787bf215546Sopenharmony_ci}
1788bf215546Sopenharmony_ci
1789bf215546Sopenharmony_cistatic inline bool
1790bf215546Sopenharmony_ciis_phi(aco_ptr<Instruction>& instr)
1791bf215546Sopenharmony_ci{
1792bf215546Sopenharmony_ci   return is_phi(instr.get());
1793bf215546Sopenharmony_ci}
1794bf215546Sopenharmony_ci
1795bf215546Sopenharmony_cimemory_sync_info get_sync_info(const Instruction* instr);
1796bf215546Sopenharmony_ci
1797bf215546Sopenharmony_cibool is_dead(const std::vector<uint16_t>& uses, Instruction* instr);
1798bf215546Sopenharmony_ci
1799bf215546Sopenharmony_cibool can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx);
1800bf215546Sopenharmony_cibool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op);
1801bf215546Sopenharmony_cibool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra);
1802bf215546Sopenharmony_cibool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra, bool dpp8);
1803bf215546Sopenharmony_ci/* updates "instr" and returns the old instruction (or NULL if no update was needed) */
1804bf215546Sopenharmony_ciaco_ptr<Instruction> convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr);
1805bf215546Sopenharmony_ciaco_ptr<Instruction> convert_to_DPP(aco_ptr<Instruction>& instr, bool dpp8);
1806bf215546Sopenharmony_cibool needs_exec_mask(const Instruction* instr);
1807bf215546Sopenharmony_ci
1808bf215546Sopenharmony_ciaco_opcode get_ordered(aco_opcode op);
1809bf215546Sopenharmony_ciaco_opcode get_unordered(aco_opcode op);
1810bf215546Sopenharmony_ciaco_opcode get_inverse(aco_opcode op);
1811bf215546Sopenharmony_ciaco_opcode get_f32_cmp(aco_opcode op);
1812bf215546Sopenharmony_ciaco_opcode get_vcmpx(aco_opcode op);
1813bf215546Sopenharmony_ciunsigned get_cmp_bitsize(aco_opcode op);
1814bf215546Sopenharmony_cibool is_cmp(aco_opcode op);
1815bf215546Sopenharmony_ci
1816bf215546Sopenharmony_cibool can_swap_operands(aco_ptr<Instruction>& instr, aco_opcode* new_op);
1817bf215546Sopenharmony_ci
1818bf215546Sopenharmony_ciuint32_t get_reduction_identity(ReduceOp op, unsigned idx);
1819bf215546Sopenharmony_ci
1820bf215546Sopenharmony_ciunsigned get_mimg_nsa_dwords(const Instruction* instr);
1821bf215546Sopenharmony_ci
1822bf215546Sopenharmony_cibool should_form_clause(const Instruction* a, const Instruction* b);
1823bf215546Sopenharmony_ci
1824bf215546Sopenharmony_cienum block_kind {
1825bf215546Sopenharmony_ci   /* uniform indicates that leaving this block,
1826bf215546Sopenharmony_ci    * all actives lanes stay active */
1827bf215546Sopenharmony_ci   block_kind_uniform = 1 << 0,
1828bf215546Sopenharmony_ci   block_kind_top_level = 1 << 1,
1829bf215546Sopenharmony_ci   block_kind_loop_preheader = 1 << 2,
1830bf215546Sopenharmony_ci   block_kind_loop_header = 1 << 3,
1831bf215546Sopenharmony_ci   block_kind_loop_exit = 1 << 4,
1832bf215546Sopenharmony_ci   block_kind_continue = 1 << 5,
1833bf215546Sopenharmony_ci   block_kind_break = 1 << 6,
1834bf215546Sopenharmony_ci   block_kind_continue_or_break = 1 << 7,
1835bf215546Sopenharmony_ci   block_kind_branch = 1 << 8,
1836bf215546Sopenharmony_ci   block_kind_merge = 1 << 9,
1837bf215546Sopenharmony_ci   block_kind_invert = 1 << 10,
1838bf215546Sopenharmony_ci   block_kind_uses_discard = 1 << 12,
1839bf215546Sopenharmony_ci   block_kind_needs_lowering = 1 << 13,
1840bf215546Sopenharmony_ci   block_kind_export_end = 1 << 15,
1841bf215546Sopenharmony_ci};
1842bf215546Sopenharmony_ci
1843bf215546Sopenharmony_cistruct RegisterDemand {
1844bf215546Sopenharmony_ci   constexpr RegisterDemand() = default;
1845bf215546Sopenharmony_ci   constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept : vgpr{v}, sgpr{s} {}
1846bf215546Sopenharmony_ci   int16_t vgpr = 0;
1847bf215546Sopenharmony_ci   int16_t sgpr = 0;
1848bf215546Sopenharmony_ci
1849bf215546Sopenharmony_ci   constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept
1850bf215546Sopenharmony_ci   {
1851bf215546Sopenharmony_ci      return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1852bf215546Sopenharmony_ci   }
1853bf215546Sopenharmony_ci
1854bf215546Sopenharmony_ci   constexpr bool exceeds(const RegisterDemand other) const noexcept
1855bf215546Sopenharmony_ci   {
1856bf215546Sopenharmony_ci      return vgpr > other.vgpr || sgpr > other.sgpr;
1857bf215546Sopenharmony_ci   }
1858bf215546Sopenharmony_ci
1859bf215546Sopenharmony_ci   constexpr RegisterDemand operator+(const Temp t) const noexcept
1860bf215546Sopenharmony_ci   {
1861bf215546Sopenharmony_ci      if (t.type() == RegType::sgpr)
1862bf215546Sopenharmony_ci         return RegisterDemand(vgpr, sgpr + t.size());
1863bf215546Sopenharmony_ci      else
1864bf215546Sopenharmony_ci         return RegisterDemand(vgpr + t.size(), sgpr);
1865bf215546Sopenharmony_ci   }
1866bf215546Sopenharmony_ci
1867bf215546Sopenharmony_ci   constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept
1868bf215546Sopenharmony_ci   {
1869bf215546Sopenharmony_ci      return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1870bf215546Sopenharmony_ci   }
1871bf215546Sopenharmony_ci
1872bf215546Sopenharmony_ci   constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept
1873bf215546Sopenharmony_ci   {
1874bf215546Sopenharmony_ci      return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1875bf215546Sopenharmony_ci   }
1876bf215546Sopenharmony_ci
1877bf215546Sopenharmony_ci   constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept
1878bf215546Sopenharmony_ci   {
1879bf215546Sopenharmony_ci      vgpr += other.vgpr;
1880bf215546Sopenharmony_ci      sgpr += other.sgpr;
1881bf215546Sopenharmony_ci      return *this;
1882bf215546Sopenharmony_ci   }
1883bf215546Sopenharmony_ci
1884bf215546Sopenharmony_ci   constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept
1885bf215546Sopenharmony_ci   {
1886bf215546Sopenharmony_ci      vgpr -= other.vgpr;
1887bf215546Sopenharmony_ci      sgpr -= other.sgpr;
1888bf215546Sopenharmony_ci      return *this;
1889bf215546Sopenharmony_ci   }
1890bf215546Sopenharmony_ci
1891bf215546Sopenharmony_ci   constexpr RegisterDemand& operator+=(const Temp t) noexcept
1892bf215546Sopenharmony_ci   {
1893bf215546Sopenharmony_ci      if (t.type() == RegType::sgpr)
1894bf215546Sopenharmony_ci         sgpr += t.size();
1895bf215546Sopenharmony_ci      else
1896bf215546Sopenharmony_ci         vgpr += t.size();
1897bf215546Sopenharmony_ci      return *this;
1898bf215546Sopenharmony_ci   }
1899bf215546Sopenharmony_ci
1900bf215546Sopenharmony_ci   constexpr RegisterDemand& operator-=(const Temp t) noexcept
1901bf215546Sopenharmony_ci   {
1902bf215546Sopenharmony_ci      if (t.type() == RegType::sgpr)
1903bf215546Sopenharmony_ci         sgpr -= t.size();
1904bf215546Sopenharmony_ci      else
1905bf215546Sopenharmony_ci         vgpr -= t.size();
1906bf215546Sopenharmony_ci      return *this;
1907bf215546Sopenharmony_ci   }
1908bf215546Sopenharmony_ci
1909bf215546Sopenharmony_ci   constexpr void update(const RegisterDemand other) noexcept
1910bf215546Sopenharmony_ci   {
1911bf215546Sopenharmony_ci      vgpr = std::max(vgpr, other.vgpr);
1912bf215546Sopenharmony_ci      sgpr = std::max(sgpr, other.sgpr);
1913bf215546Sopenharmony_ci   }
1914bf215546Sopenharmony_ci};
1915bf215546Sopenharmony_ci
1916bf215546Sopenharmony_ci/* CFG */
1917bf215546Sopenharmony_cistruct Block {
1918bf215546Sopenharmony_ci   float_mode fp_mode;
1919bf215546Sopenharmony_ci   unsigned index;
1920bf215546Sopenharmony_ci   unsigned offset = 0;
1921bf215546Sopenharmony_ci   std::vector<aco_ptr<Instruction>> instructions;
1922bf215546Sopenharmony_ci   std::vector<unsigned> logical_preds;
1923bf215546Sopenharmony_ci   std::vector<unsigned> linear_preds;
1924bf215546Sopenharmony_ci   std::vector<unsigned> logical_succs;
1925bf215546Sopenharmony_ci   std::vector<unsigned> linear_succs;
1926bf215546Sopenharmony_ci   RegisterDemand register_demand = RegisterDemand();
1927bf215546Sopenharmony_ci   uint16_t loop_nest_depth = 0;
1928bf215546Sopenharmony_ci   uint16_t divergent_if_logical_depth = 0;
1929bf215546Sopenharmony_ci   uint16_t uniform_if_depth = 0;
1930bf215546Sopenharmony_ci   uint16_t kind = 0;
1931bf215546Sopenharmony_ci   int logical_idom = -1;
1932bf215546Sopenharmony_ci   int linear_idom = -1;
1933bf215546Sopenharmony_ci
1934bf215546Sopenharmony_ci   /* this information is needed for predecessors to blocks with phis when
1935bf215546Sopenharmony_ci    * moving out of ssa */
1936bf215546Sopenharmony_ci   bool scc_live_out = false;
1937bf215546Sopenharmony_ci
1938bf215546Sopenharmony_ci   Block() : index(0) {}
1939bf215546Sopenharmony_ci};
1940bf215546Sopenharmony_ci
1941bf215546Sopenharmony_ci/*
1942bf215546Sopenharmony_ci * Shader stages as provided in Vulkan by the application. Contrast this to HWStage.
1943bf215546Sopenharmony_ci */
1944bf215546Sopenharmony_cienum class SWStage : uint16_t {
1945bf215546Sopenharmony_ci   None = 0,
1946bf215546Sopenharmony_ci   VS = 1 << 0,     /* Vertex Shader */
1947bf215546Sopenharmony_ci   GS = 1 << 1,     /* Geometry Shader */
1948bf215546Sopenharmony_ci   TCS = 1 << 2,    /* Tessellation Control aka Hull Shader */
1949bf215546Sopenharmony_ci   TES = 1 << 3,    /* Tessellation Evaluation aka Domain Shader */
1950bf215546Sopenharmony_ci   FS = 1 << 4,     /* Fragment aka Pixel Shader */
1951bf215546Sopenharmony_ci   CS = 1 << 5,     /* Compute Shader */
1952bf215546Sopenharmony_ci   TS = 1 << 6,     /* Task Shader */
1953bf215546Sopenharmony_ci   MS = 1 << 7,     /* Mesh Shader */
1954bf215546Sopenharmony_ci   GSCopy = 1 << 8, /* GS Copy Shader (internal) */
1955bf215546Sopenharmony_ci
1956bf215546Sopenharmony_ci   /* Stage combinations merged to run on a single HWStage */
1957bf215546Sopenharmony_ci   VS_GS = VS | GS,
1958bf215546Sopenharmony_ci   VS_TCS = VS | TCS,
1959bf215546Sopenharmony_ci   TES_GS = TES | GS,
1960bf215546Sopenharmony_ci};
1961bf215546Sopenharmony_ci
1962bf215546Sopenharmony_ciconstexpr SWStage
1963bf215546Sopenharmony_cioperator|(SWStage a, SWStage b)
1964bf215546Sopenharmony_ci{
1965bf215546Sopenharmony_ci   return static_cast<SWStage>(static_cast<uint16_t>(a) | static_cast<uint16_t>(b));
1966bf215546Sopenharmony_ci}
1967bf215546Sopenharmony_ci
1968bf215546Sopenharmony_ci/*
1969bf215546Sopenharmony_ci * Shader stages as running on the AMD GPU.
1970bf215546Sopenharmony_ci *
1971bf215546Sopenharmony_ci * The relation between HWStages and SWStages is not a one-to-one mapping:
1972bf215546Sopenharmony_ci * Some SWStages are merged by ACO to run on a single HWStage.
1973bf215546Sopenharmony_ci * See README.md for details.
1974bf215546Sopenharmony_ci */
1975bf215546Sopenharmony_cienum class HWStage : uint8_t {
1976bf215546Sopenharmony_ci   VS,
1977bf215546Sopenharmony_ci   ES, /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
1978bf215546Sopenharmony_ci   GS,  /* Geometry shader on GFX10/legacy and GFX6-9. */
1979bf215546Sopenharmony_ci   NGG, /* Primitive shader, used to implement VS, TES, GS. */
1980bf215546Sopenharmony_ci   LS,  /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
1981bf215546Sopenharmony_ci   HS,  /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
1982bf215546Sopenharmony_ci   FS,
1983bf215546Sopenharmony_ci   CS,
1984bf215546Sopenharmony_ci};
1985bf215546Sopenharmony_ci
1986bf215546Sopenharmony_ci/*
1987bf215546Sopenharmony_ci * Set of SWStages to be merged into a single shader paired with the
1988bf215546Sopenharmony_ci * HWStage it will run on.
1989bf215546Sopenharmony_ci */
1990bf215546Sopenharmony_cistruct Stage {
1991bf215546Sopenharmony_ci   constexpr Stage() = default;
1992bf215546Sopenharmony_ci
1993bf215546Sopenharmony_ci   explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
1994bf215546Sopenharmony_ci
1995bf215546Sopenharmony_ci   /* Check if the given SWStage is included */
1996bf215546Sopenharmony_ci   constexpr bool has(SWStage stage) const
1997bf215546Sopenharmony_ci   {
1998bf215546Sopenharmony_ci      return (static_cast<uint16_t>(sw) & static_cast<uint16_t>(stage));
1999bf215546Sopenharmony_ci   }
2000bf215546Sopenharmony_ci
2001bf215546Sopenharmony_ci   unsigned num_sw_stages() const { return util_bitcount(static_cast<uint16_t>(sw)); }
2002bf215546Sopenharmony_ci
2003bf215546Sopenharmony_ci   constexpr bool operator==(const Stage& other) const { return sw == other.sw && hw == other.hw; }
2004bf215546Sopenharmony_ci
2005bf215546Sopenharmony_ci   constexpr bool operator!=(const Stage& other) const { return sw != other.sw || hw != other.hw; }
2006bf215546Sopenharmony_ci
2007bf215546Sopenharmony_ci   /* Mask of merged software stages */
2008bf215546Sopenharmony_ci   SWStage sw = SWStage::None;
2009bf215546Sopenharmony_ci
2010bf215546Sopenharmony_ci   /* Active hardware stage */
2011bf215546Sopenharmony_ci   HWStage hw{};
2012bf215546Sopenharmony_ci};
2013bf215546Sopenharmony_ci
2014bf215546Sopenharmony_ci/* possible settings of Program::stage */
2015bf215546Sopenharmony_cistatic constexpr Stage vertex_vs(HWStage::VS, SWStage::VS);
2016bf215546Sopenharmony_cistatic constexpr Stage fragment_fs(HWStage::FS, SWStage::FS);
2017bf215546Sopenharmony_cistatic constexpr Stage compute_cs(HWStage::CS, SWStage::CS);
2018bf215546Sopenharmony_cistatic constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES);
2019bf215546Sopenharmony_cistatic constexpr Stage gs_copy_vs(HWStage::VS, SWStage::GSCopy);
2020bf215546Sopenharmony_ci/* Mesh shading pipeline */
2021bf215546Sopenharmony_cistatic constexpr Stage task_cs(HWStage::CS, SWStage::TS);
2022bf215546Sopenharmony_cistatic constexpr Stage mesh_ngg(HWStage::NGG, SWStage::MS);
2023bf215546Sopenharmony_ci/* GFX10/NGG */
2024bf215546Sopenharmony_cistatic constexpr Stage vertex_ngg(HWStage::NGG, SWStage::VS);
2025bf215546Sopenharmony_cistatic constexpr Stage vertex_geometry_ngg(HWStage::NGG, SWStage::VS_GS);
2026bf215546Sopenharmony_cistatic constexpr Stage tess_eval_ngg(HWStage::NGG, SWStage::TES);
2027bf215546Sopenharmony_cistatic constexpr Stage tess_eval_geometry_ngg(HWStage::NGG, SWStage::TES_GS);
2028bf215546Sopenharmony_ci/* GFX9 (and GFX10 if NGG isn't used) */
2029bf215546Sopenharmony_cistatic constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS);
2030bf215546Sopenharmony_cistatic constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS);
2031bf215546Sopenharmony_cistatic constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS);
2032bf215546Sopenharmony_ci/* pre-GFX9 */
2033bf215546Sopenharmony_cistatic constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tesselation control */
2034bf215546Sopenharmony_cistatic constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */
2035bf215546Sopenharmony_cistatic constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS);
2036bf215546Sopenharmony_cistatic constexpr Stage tess_eval_es(HWStage::ES,
2037bf215546Sopenharmony_ci                                    SWStage::TES); /* tesselation evaluation before geometry */
2038bf215546Sopenharmony_cistatic constexpr Stage geometry_gs(HWStage::GS, SWStage::GS);
2039bf215546Sopenharmony_ci
2040bf215546Sopenharmony_cienum statistic {
2041bf215546Sopenharmony_ci   statistic_hash,
2042bf215546Sopenharmony_ci   statistic_instructions,
2043bf215546Sopenharmony_ci   statistic_copies,
2044bf215546Sopenharmony_ci   statistic_branches,
2045bf215546Sopenharmony_ci   statistic_latency,
2046bf215546Sopenharmony_ci   statistic_inv_throughput,
2047bf215546Sopenharmony_ci   statistic_vmem_clauses,
2048bf215546Sopenharmony_ci   statistic_smem_clauses,
2049bf215546Sopenharmony_ci   statistic_sgpr_presched,
2050bf215546Sopenharmony_ci   statistic_vgpr_presched,
2051bf215546Sopenharmony_ci   num_statistics
2052bf215546Sopenharmony_ci};
2053bf215546Sopenharmony_ci
2054bf215546Sopenharmony_cistruct DeviceInfo {
2055bf215546Sopenharmony_ci   uint16_t lds_encoding_granule;
2056bf215546Sopenharmony_ci   uint16_t lds_alloc_granule;
2057bf215546Sopenharmony_ci   uint32_t lds_limit; /* in bytes */
2058bf215546Sopenharmony_ci   bool has_16bank_lds;
2059bf215546Sopenharmony_ci   uint16_t physical_sgprs;
2060bf215546Sopenharmony_ci   uint16_t physical_vgprs;
2061bf215546Sopenharmony_ci   uint16_t vgpr_limit;
2062bf215546Sopenharmony_ci   uint16_t sgpr_limit;
2063bf215546Sopenharmony_ci   uint16_t sgpr_alloc_granule;
2064bf215546Sopenharmony_ci   uint16_t vgpr_alloc_granule; /* must be power of two */
2065bf215546Sopenharmony_ci   unsigned max_wave64_per_simd;
2066bf215546Sopenharmony_ci   unsigned simd_per_cu;
2067bf215546Sopenharmony_ci   bool has_fast_fma32 = false;
2068bf215546Sopenharmony_ci   bool has_mac_legacy32 = false;
2069bf215546Sopenharmony_ci   bool fused_mad_mix = false;
2070bf215546Sopenharmony_ci   bool xnack_enabled = false;
2071bf215546Sopenharmony_ci   bool sram_ecc_enabled = false;
2072bf215546Sopenharmony_ci
2073bf215546Sopenharmony_ci   int16_t scratch_global_offset_min;
2074bf215546Sopenharmony_ci   int16_t scratch_global_offset_max;
2075bf215546Sopenharmony_ci};
2076bf215546Sopenharmony_ci
2077bf215546Sopenharmony_cienum class CompilationProgress {
2078bf215546Sopenharmony_ci   after_isel,
2079bf215546Sopenharmony_ci   after_spilling,
2080bf215546Sopenharmony_ci   after_ra,
2081bf215546Sopenharmony_ci};
2082bf215546Sopenharmony_ci
2083bf215546Sopenharmony_ciclass Program final {
2084bf215546Sopenharmony_cipublic:
2085bf215546Sopenharmony_ci   std::vector<Block> blocks;
2086bf215546Sopenharmony_ci   std::vector<RegClass> temp_rc = {s1};
2087bf215546Sopenharmony_ci   RegisterDemand max_reg_demand = RegisterDemand();
2088bf215546Sopenharmony_ci   ac_shader_config* config;
2089bf215546Sopenharmony_ci   struct aco_shader_info info;
2090bf215546Sopenharmony_ci   enum amd_gfx_level gfx_level;
2091bf215546Sopenharmony_ci   enum radeon_family family;
2092bf215546Sopenharmony_ci   DeviceInfo dev;
2093bf215546Sopenharmony_ci   unsigned wave_size;
2094bf215546Sopenharmony_ci   RegClass lane_mask;
2095bf215546Sopenharmony_ci   Stage stage;
2096bf215546Sopenharmony_ci   bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
2097bf215546Sopenharmony_ci   bool needs_wqm = false;   /* there exists a p_wqm instruction */
2098bf215546Sopenharmony_ci
2099bf215546Sopenharmony_ci   std::vector<uint8_t> constant_data;
2100bf215546Sopenharmony_ci   Temp private_segment_buffer;
2101bf215546Sopenharmony_ci   Temp scratch_offset;
2102bf215546Sopenharmony_ci
2103bf215546Sopenharmony_ci   uint16_t num_waves = 0;
2104bf215546Sopenharmony_ci   uint16_t min_waves = 0;
2105bf215546Sopenharmony_ci   unsigned workgroup_size; /* if known; otherwise UINT_MAX */
2106bf215546Sopenharmony_ci   bool wgp_mode;
2107bf215546Sopenharmony_ci   bool early_rast = false; /* whether rasterization can start as soon as the 1st DONE pos export */
2108bf215546Sopenharmony_ci
2109bf215546Sopenharmony_ci   bool needs_vcc = false;
2110bf215546Sopenharmony_ci
2111bf215546Sopenharmony_ci   CompilationProgress progress;
2112bf215546Sopenharmony_ci
2113bf215546Sopenharmony_ci   bool collect_statistics = false;
2114bf215546Sopenharmony_ci   uint32_t statistics[num_statistics];
2115bf215546Sopenharmony_ci
2116bf215546Sopenharmony_ci   float_mode next_fp_mode;
2117bf215546Sopenharmony_ci   unsigned next_loop_depth = 0;
2118bf215546Sopenharmony_ci   unsigned next_divergent_if_logical_depth = 0;
2119bf215546Sopenharmony_ci   unsigned next_uniform_if_depth = 0;
2120bf215546Sopenharmony_ci
2121bf215546Sopenharmony_ci   std::vector<Definition> vs_inputs;
2122bf215546Sopenharmony_ci
2123bf215546Sopenharmony_ci   struct {
2124bf215546Sopenharmony_ci      FILE* output = stderr;
2125bf215546Sopenharmony_ci      bool shorten_messages = false;
2126bf215546Sopenharmony_ci      void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message);
2127bf215546Sopenharmony_ci      void* private_data;
2128bf215546Sopenharmony_ci   } debug;
2129bf215546Sopenharmony_ci
2130bf215546Sopenharmony_ci   uint32_t allocateId(RegClass rc)
2131bf215546Sopenharmony_ci   {
2132bf215546Sopenharmony_ci      assert(allocationID <= 16777215);
2133bf215546Sopenharmony_ci      temp_rc.push_back(rc);
2134bf215546Sopenharmony_ci      return allocationID++;
2135bf215546Sopenharmony_ci   }
2136bf215546Sopenharmony_ci
2137bf215546Sopenharmony_ci   void allocateRange(unsigned amount)
2138bf215546Sopenharmony_ci   {
2139bf215546Sopenharmony_ci      assert(allocationID + amount <= 16777216);
2140bf215546Sopenharmony_ci      temp_rc.resize(temp_rc.size() + amount);
2141bf215546Sopenharmony_ci      allocationID += amount;
2142bf215546Sopenharmony_ci   }
2143bf215546Sopenharmony_ci
2144bf215546Sopenharmony_ci   Temp allocateTmp(RegClass rc) { return Temp(allocateId(rc), rc); }
2145bf215546Sopenharmony_ci
2146bf215546Sopenharmony_ci   uint32_t peekAllocationId() { return allocationID; }
2147bf215546Sopenharmony_ci
2148bf215546Sopenharmony_ci   friend void reindex_ssa(Program* program);
2149bf215546Sopenharmony_ci   friend void reindex_ssa(Program* program, std::vector<IDSet>& live_out);
2150bf215546Sopenharmony_ci
2151bf215546Sopenharmony_ci   Block* create_and_insert_block()
2152bf215546Sopenharmony_ci   {
2153bf215546Sopenharmony_ci      Block block;
2154bf215546Sopenharmony_ci      return insert_block(std::move(block));
2155bf215546Sopenharmony_ci   }
2156bf215546Sopenharmony_ci
2157bf215546Sopenharmony_ci   Block* insert_block(Block&& block)
2158bf215546Sopenharmony_ci   {
2159bf215546Sopenharmony_ci      block.index = blocks.size();
2160bf215546Sopenharmony_ci      block.fp_mode = next_fp_mode;
2161bf215546Sopenharmony_ci      block.loop_nest_depth = next_loop_depth;
2162bf215546Sopenharmony_ci      block.divergent_if_logical_depth = next_divergent_if_logical_depth;
2163bf215546Sopenharmony_ci      block.uniform_if_depth = next_uniform_if_depth;
2164bf215546Sopenharmony_ci      blocks.emplace_back(std::move(block));
2165bf215546Sopenharmony_ci      return &blocks.back();
2166bf215546Sopenharmony_ci   }
2167bf215546Sopenharmony_ci
2168bf215546Sopenharmony_ciprivate:
2169bf215546Sopenharmony_ci   uint32_t allocationID = 1;
2170bf215546Sopenharmony_ci};
2171bf215546Sopenharmony_ci
2172bf215546Sopenharmony_cistruct live {
2173bf215546Sopenharmony_ci   /* live temps out per block */
2174bf215546Sopenharmony_ci   std::vector<IDSet> live_out;
2175bf215546Sopenharmony_ci   /* register demand (sgpr/vgpr) per instruction per block */
2176bf215546Sopenharmony_ci   std::vector<std::vector<RegisterDemand>> register_demand;
2177bf215546Sopenharmony_ci};
2178bf215546Sopenharmony_ci
2179bf215546Sopenharmony_cistruct ra_test_policy {
2180bf215546Sopenharmony_ci   /* Force RA to always use its pessimistic fallback algorithm */
2181bf215546Sopenharmony_ci   bool skip_optimistic_path = false;
2182bf215546Sopenharmony_ci};
2183bf215546Sopenharmony_ci
2184bf215546Sopenharmony_civoid init();
2185bf215546Sopenharmony_ci
2186bf215546Sopenharmony_civoid init_program(Program* program, Stage stage, const struct aco_shader_info* info,
2187bf215546Sopenharmony_ci                  enum amd_gfx_level gfx_level, enum radeon_family family, bool wgp_mode,
2188bf215546Sopenharmony_ci                  ac_shader_config* config);
2189bf215546Sopenharmony_ci
2190bf215546Sopenharmony_civoid select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
2191bf215546Sopenharmony_ci                    ac_shader_config* config, const struct aco_compiler_options* options,
2192bf215546Sopenharmony_ci                    const struct aco_shader_info* info,
2193bf215546Sopenharmony_ci                    const struct radv_shader_args* args);
2194bf215546Sopenharmony_civoid select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_config* config,
2195bf215546Sopenharmony_ci                           const struct aco_compiler_options* options,
2196bf215546Sopenharmony_ci                           const struct aco_shader_info* info,
2197bf215546Sopenharmony_ci                           const struct radv_shader_args* args);
2198bf215546Sopenharmony_civoid select_trap_handler_shader(Program* program, struct nir_shader* shader,
2199bf215546Sopenharmony_ci                                ac_shader_config* config,
2200bf215546Sopenharmony_ci                                const struct aco_compiler_options* options,
2201bf215546Sopenharmony_ci                                const struct aco_shader_info* info,
2202bf215546Sopenharmony_ci                                const struct radv_shader_args* args);
2203bf215546Sopenharmony_civoid select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key,
2204bf215546Sopenharmony_ci                      ac_shader_config* config,
2205bf215546Sopenharmony_ci                      const struct aco_compiler_options* options,
2206bf215546Sopenharmony_ci                      const struct aco_shader_info* info,
2207bf215546Sopenharmony_ci                      const struct radv_shader_args* args,
2208bf215546Sopenharmony_ci                      unsigned* num_preserved_sgprs);
2209bf215546Sopenharmony_ci
2210bf215546Sopenharmony_civoid select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key,
2211bf215546Sopenharmony_ci                      ac_shader_config* config,
2212bf215546Sopenharmony_ci                      const struct aco_compiler_options* options,
2213bf215546Sopenharmony_ci                      const struct aco_shader_info* info,
2214bf215546Sopenharmony_ci                      const struct radv_shader_args* args);
2215bf215546Sopenharmony_ci
2216bf215546Sopenharmony_civoid lower_phis(Program* program);
2217bf215546Sopenharmony_civoid calc_min_waves(Program* program);
2218bf215546Sopenharmony_civoid update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
2219bf215546Sopenharmony_cilive live_var_analysis(Program* program);
2220bf215546Sopenharmony_cistd::vector<uint16_t> dead_code_analysis(Program* program);
2221bf215546Sopenharmony_civoid dominator_tree(Program* program);
2222bf215546Sopenharmony_civoid insert_exec_mask(Program* program);
2223bf215546Sopenharmony_civoid value_numbering(Program* program);
2224bf215546Sopenharmony_civoid optimize(Program* program);
2225bf215546Sopenharmony_civoid optimize_postRA(Program* program);
2226bf215546Sopenharmony_civoid setup_reduce_temp(Program* program);
2227bf215546Sopenharmony_civoid lower_to_cssa(Program* program, live& live_vars);
2228bf215546Sopenharmony_civoid register_allocation(Program* program, std::vector<IDSet>& live_out_per_block,
2229bf215546Sopenharmony_ci                         ra_test_policy = {});
2230bf215546Sopenharmony_civoid ssa_elimination(Program* program);
2231bf215546Sopenharmony_civoid lower_to_hw_instr(Program* program);
2232bf215546Sopenharmony_civoid schedule_program(Program* program, live& live_vars);
2233bf215546Sopenharmony_civoid spill(Program* program, live& live_vars);
2234bf215546Sopenharmony_civoid insert_wait_states(Program* program);
2235bf215546Sopenharmony_civoid insert_NOPs(Program* program);
2236bf215546Sopenharmony_civoid form_hard_clauses(Program* program);
2237bf215546Sopenharmony_ciunsigned emit_program(Program* program, std::vector<uint32_t>& code);
2238bf215546Sopenharmony_ci/**
2239bf215546Sopenharmony_ci * Returns true if print_asm can disassemble the given program for the current build/runtime
2240bf215546Sopenharmony_ci * configuration
2241bf215546Sopenharmony_ci */
2242bf215546Sopenharmony_cibool check_print_asm_support(Program* program);
2243bf215546Sopenharmony_cibool print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output);
2244bf215546Sopenharmony_cibool validate_ir(Program* program);
2245bf215546Sopenharmony_cibool validate_ra(Program* program);
2246bf215546Sopenharmony_ci#ifndef NDEBUG
2247bf215546Sopenharmony_civoid perfwarn(Program* program, bool cond, const char* msg, Instruction* instr = NULL);
2248bf215546Sopenharmony_ci#else
2249bf215546Sopenharmony_ci#define perfwarn(program, cond, msg, ...)                                                          \
2250bf215546Sopenharmony_ci   do {                                                                                            \
2251bf215546Sopenharmony_ci   } while (0)
2252bf215546Sopenharmony_ci#endif
2253bf215546Sopenharmony_ci
2254bf215546Sopenharmony_civoid collect_presched_stats(Program* program);
2255bf215546Sopenharmony_civoid collect_preasm_stats(Program* program);
2256bf215546Sopenharmony_civoid collect_postasm_stats(Program* program, const std::vector<uint32_t>& code);
2257bf215546Sopenharmony_ci
2258bf215546Sopenharmony_cienum print_flags {
2259bf215546Sopenharmony_ci   print_no_ssa = 0x1,
2260bf215546Sopenharmony_ci   print_perf_info = 0x2,
2261bf215546Sopenharmony_ci   print_kill = 0x4,
2262bf215546Sopenharmony_ci   print_live_vars = 0x8,
2263bf215546Sopenharmony_ci};
2264bf215546Sopenharmony_ci
2265bf215546Sopenharmony_civoid aco_print_operand(const Operand* operand, FILE* output, unsigned flags = 0);
2266bf215546Sopenharmony_civoid aco_print_instr(const Instruction* instr, FILE* output, unsigned flags = 0);
2267bf215546Sopenharmony_civoid aco_print_program(const Program* program, FILE* output, unsigned flags = 0);
2268bf215546Sopenharmony_civoid aco_print_program(const Program* program, FILE* output, const live& live_vars,
2269bf215546Sopenharmony_ci                       unsigned flags = 0);
2270bf215546Sopenharmony_ci
2271bf215546Sopenharmony_civoid _aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...);
2272bf215546Sopenharmony_civoid _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...);
2273bf215546Sopenharmony_ci
2274bf215546Sopenharmony_ci#define aco_perfwarn(program, ...) _aco_perfwarn(program, __FILE__, __LINE__, __VA_ARGS__)
2275bf215546Sopenharmony_ci#define aco_err(program, ...)      _aco_err(program, __FILE__, __LINE__, __VA_ARGS__)
2276bf215546Sopenharmony_ci
2277bf215546Sopenharmony_ci/* utilities for dealing with register demand */
2278bf215546Sopenharmony_ciRegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
2279bf215546Sopenharmony_ciRegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
2280bf215546Sopenharmony_ciRegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr,
2281bf215546Sopenharmony_ci                                 aco_ptr<Instruction>& instr_before);
2282bf215546Sopenharmony_ci
2283bf215546Sopenharmony_ci/* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
2284bf215546Sopenharmony_ciuint16_t get_extra_sgprs(Program* program);
2285bf215546Sopenharmony_ci
2286bf215546Sopenharmony_ci/* adjust num_waves for workgroup size and LDS limits */
2287bf215546Sopenharmony_ciuint16_t max_suitable_waves(Program* program, uint16_t waves);
2288bf215546Sopenharmony_ci
2289bf215546Sopenharmony_ci/* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
2290bf215546Sopenharmony_ciuint16_t get_sgpr_alloc(Program* program, uint16_t addressable_sgprs);
2291bf215546Sopenharmony_ciuint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs);
2292bf215546Sopenharmony_ci
2293bf215546Sopenharmony_ci/* return number of addressable sgprs/vgprs for max_waves */
2294bf215546Sopenharmony_ciuint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves);
2295bf215546Sopenharmony_ciuint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves);
2296bf215546Sopenharmony_ci
2297bf215546Sopenharmony_citypedef struct {
2298bf215546Sopenharmony_ci   const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
2299bf215546Sopenharmony_ci   const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
2300bf215546Sopenharmony_ci   const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
2301bf215546Sopenharmony_ci   const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
2302bf215546Sopenharmony_ci   const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
2303bf215546Sopenharmony_ci   const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
2304bf215546Sopenharmony_ci   const char* name[static_cast<int>(aco_opcode::num_opcodes)];
2305bf215546Sopenharmony_ci   const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
2306bf215546Sopenharmony_ci   /* sizes used for input/output modifiers and constants */
2307bf215546Sopenharmony_ci   const unsigned operand_size[static_cast<int>(aco_opcode::num_opcodes)];
2308bf215546Sopenharmony_ci   const instr_class classes[static_cast<int>(aco_opcode::num_opcodes)];
2309bf215546Sopenharmony_ci} Info;
2310bf215546Sopenharmony_ci
2311bf215546Sopenharmony_ciextern const Info instr_info;
2312bf215546Sopenharmony_ci
2313bf215546Sopenharmony_ci} // namespace aco
2314bf215546Sopenharmony_ci
2315bf215546Sopenharmony_ci#endif /* ACO_IR_H */
2316