1bf215546Sopenharmony_ci<!--
2bf215546Sopenharmony_ci  Copyright (C) 2021 Collabora Ltd.
3bf215546Sopenharmony_ci
4bf215546Sopenharmony_ci  Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci  copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci  to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci  the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci  and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci  Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci
11bf215546Sopenharmony_ci  The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci  paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci  Software.
14bf215546Sopenharmony_ci
15bf215546Sopenharmony_ci  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci  SOFTWARE.
22bf215546Sopenharmony_ci-->
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci<valhall>
25bf215546Sopenharmony_ci  <lut name="Immediates">
26bf215546Sopenharmony_ci    <desc>
27bf215546Sopenharmony_ci      This immediates are accessible in (almost) any instruction, provided the
28bf215546Sopenharmony_ci      immediate mode is kept to the default. They optimize for the most common
29bf215546Sopenharmony_ci      immediate values; any immediate listed here may be used without taking up
30bf215546Sopenharmony_ci      a uniform slot or a register. Most integer instructions can access
31bf215546Sopenharmony_ci      separate half-words and individual bytes via swizzles on the source.
32bf215546Sopenharmony_ci    </desc>
33bf215546Sopenharmony_ci    <constant desc="Zero">0x00000000</constant>
34bf215546Sopenharmony_ci    <constant desc="All ones; integer $-1$">0xFFFFFFFF</constant>
35bf215546Sopenharmony_ci    <constant desc="Maximum integer; floating-point NaN">0x7FFFFFFF</constant>
36bf215546Sopenharmony_ci    <constant desc="Integers $(-2, -3, -4, -5)$">0xFAFCFDFE</constant>
37bf215546Sopenharmony_ci    <constant desc="16-bit integer $2^8$">0x01000000</constant>
38bf215546Sopenharmony_ci    <constant desc="Multiples of 16 $(0, 32, 0, 128)$">0x80002000</constant>
39bf215546Sopenharmony_ci    <constant desc="Multiples of 16 $(48, 80, 96, 112)$">0x70605030</constant>
40bf215546Sopenharmony_ci    <constant desc="Multiples of 16 $(144, 160, 176, 192)$">0xC0B0A090</constant>
41bf215546Sopenharmony_ci    <constant desc="Integers $(0, 1, 2, 3)$">0x03020100</constant>
42bf215546Sopenharmony_ci    <constant desc="Integers $(4, 5, 6, 7)$">0x07060504</constant>
43bf215546Sopenharmony_ci    <constant desc="Integers $(8, 9, 10, 11)$">0x0B0A0908</constant>
44bf215546Sopenharmony_ci    <constant desc="Integers $(12, 13, 14, 15)$">0x0F0E0D0C</constant>
45bf215546Sopenharmony_ci    <constant desc="Integers $(16, 17, 18, 19)$">0x13121110</constant>
46bf215546Sopenharmony_ci    <constant desc="Integers $(20, 21, 22, 23)$">0x17161514</constant>
47bf215546Sopenharmony_ci    <constant desc="Integers $(24, 25, 26, 27)$">0x1B1A1918</constant>
48bf215546Sopenharmony_ci    <constant desc="Integers $(28, 29, 30, 31)$">0x1F1E1D1C</constant>
49bf215546Sopenharmony_ci    <constant desc="Float $1.0$">0x3F800000</constant>
50bf215546Sopenharmony_ci    <constant desc="Float $0.1$">0x3DCCCCCD</constant>
51bf215546Sopenharmony_ci    <constant desc="Float $1 / \pi$">0x3EA2F983</constant>
52bf215546Sopenharmony_ci    <constant desc="Float $\log(2)$">0x3F317218</constant>
53bf215546Sopenharmony_ci    <constant desc="Float $\pi$">0x40490FDB</constant>
54bf215546Sopenharmony_ci    <constant desc="Float $0.0$">0x00000000</constant>
55bf215546Sopenharmony_ci    <constant desc="Float $65535.0 = 2^{16} - 1$">0x477FFF00</constant>
56bf215546Sopenharmony_ci    <constant desc="Half-float $(255.0, 256.0) = (2^8 - 1, 2^8)$">0x5C005BF8</constant>
57bf215546Sopenharmony_ci    <constant desc="Half-float $0.1 = 1 / 10$">0x2E660000</constant>
58bf215546Sopenharmony_ci    <constant desc="Half-float $0.25 = 2^{-2}$">0x34000000</constant>
59bf215546Sopenharmony_ci    <constant desc="Half-float $0.5 = 2^{-1}$">0x38000000</constant>
60bf215546Sopenharmony_ci    <constant desc="Half-float $1.0 = 2^0$">0x3C000000</constant>
61bf215546Sopenharmony_ci    <constant desc="Half-float $2.0 = 2^1$">0x40000000</constant>
62bf215546Sopenharmony_ci    <constant desc="Half-float $4.0 = 2^2$">0x44000000</constant>
63bf215546Sopenharmony_ci    <constant desc="Half-float $8.0 = 2^3$">0x48000000</constant>
64bf215546Sopenharmony_ci    <constant desc="Half-float $\pi$">0x42480000</constant>
65bf215546Sopenharmony_ci  </lut>
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci  <enum name="Flow">
68bf215546Sopenharmony_ci    <desc>
69bf215546Sopenharmony_ci      Every Valhall instruction can wait on dependency
70bf215546Sopenharmony_ci      slots. A few special flows are available, specified in the instruction
71bf215546Sopenharmony_ci      metadata from this enum. The `wait0126` flow is required to wait on
72bf215546Sopenharmony_ci      dependency slot #6 and should be set on the instruction immediately
73bf215546Sopenharmony_ci      preceding `ATEST`. The `wait` flow should be set for barriers.
74bf215546Sopenharmony_ci      The `discard` flow only applies to fragment shaders and is used to
75bf215546Sopenharmony_ci      terminate helper invocations, it should be set as early as possible after
76bf215546Sopenharmony_ci      helper invocations are no longer needed as determined by data flow
77bf215546Sopenharmony_ci      analysis. The `end` flow is used to terminate the shader, although it
78bf215546Sopenharmony_ci      may be overloaded by the `BLEND` instruction.
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci      The `reconverge` flow is required on any instruction immediately
81bf215546Sopenharmony_ci      preceding a possible change to the mask of active threads in a subgroup.
82bf215546Sopenharmony_ci      This includes all divergent branches, but it also includes the final
83bf215546Sopenharmony_ci      instruction at the end of any basic block where the immediate successor
84bf215546Sopenharmony_ci      (fallthrough) is the target of a divergent branch.
85bf215546Sopenharmony_ci    </desc>
86bf215546Sopenharmony_ci    <value name="None" default="true">none</value>
87bf215546Sopenharmony_ci    <value name="Wait on slot 0">wait0</value>
88bf215546Sopenharmony_ci    <value name="Wait on slot 1">wait1</value>
89bf215546Sopenharmony_ci    <value name="Wait on slots 0, 1">wait01</value>
90bf215546Sopenharmony_ci    <value name="Wait on slot 2">wait2</value>
91bf215546Sopenharmony_ci    <value name="Wait on slots 0, 2">wait02</value>
92bf215546Sopenharmony_ci    <value name="Wait on slots 1, 2">wait12</value>
93bf215546Sopenharmony_ci    <value name="Wait on slots 0, 1, 2">wait012</value>
94bf215546Sopenharmony_ci    <value name="Wait on slots 0, 1, 2, 6">wait0126</value>
95bf215546Sopenharmony_ci    <value name="Wait on slots 0, 1, 2, 6, 7">wait</value>
96bf215546Sopenharmony_ci    <value name="Perform branch reconverge">reconverge</value>
97bf215546Sopenharmony_ci    <reserved/>
98bf215546Sopenharmony_ci    <reserved/>
99bf215546Sopenharmony_ci    <value name="Terminate discarded threads">discard</value>
100bf215546Sopenharmony_ci    <reserved/>
101bf215546Sopenharmony_ci    <value name="Return from shader">end</value>
102bf215546Sopenharmony_ci  </enum>
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_ci  <enum name="FAU special page 0">
105bf215546Sopenharmony_ci    <desc>
106bf215546Sopenharmony_ci      Situated between the immediates hard-coded in the hardware and the
107bf215546Sopenharmony_ci      uniforms defined purely in software, Valhall has a some special
108bf215546Sopenharmony_ci      "constants" passing through data structures. These are encoded like the
109bf215546Sopenharmony_ci      table of immediates, as if special constant $i$ were lookup table entry
110bf215546Sopenharmony_ci      $32 + i$.
111bf215546Sopenharmony_ci    </desc>
112bf215546Sopenharmony_ci    <reserved/>
113bf215546Sopenharmony_ci    <reserved/>
114bf215546Sopenharmony_ci    <value desc="Warp ID and warps/core - 1">warp_id</value>
115bf215546Sopenharmony_ci    <reserved/>
116bf215546Sopenharmony_ci    <value desc="Bounding box maximum X/Y">framebuffer_size</value>
117bf215546Sopenharmony_ci    <value desc="ATEST datum">atest_datum</value>
118bf215546Sopenharmony_ci    <value desc="Sample positions">sample</value>
119bf215546Sopenharmony_ci    <reserved/>
120bf215546Sopenharmony_ci    <value desc="Blend descriptor 0">blend_descriptor_0</value>
121bf215546Sopenharmony_ci    <value desc="Blend descriptor 1">blend_descriptor_1</value>
122bf215546Sopenharmony_ci    <value desc="Blend descriptor 2">blend_descriptor_2</value>
123bf215546Sopenharmony_ci    <value desc="Blend descriptor 3">blend_descriptor_3</value>
124bf215546Sopenharmony_ci    <value desc="Blend descriptor 4">blend_descriptor_4</value>
125bf215546Sopenharmony_ci    <value desc="Blend descriptor 5">blend_descriptor_5</value>
126bf215546Sopenharmony_ci    <value desc="Blend descriptor 6">blend_descriptor_6</value>
127bf215546Sopenharmony_ci    <value desc="Blend descriptor 7">blend_descriptor_7</value>
128bf215546Sopenharmony_ci  </enum>
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci  <enum name="FAU special page 1">
131bf215546Sopenharmony_ci    <desc>
132bf215546Sopenharmony_ci      Situated between the immediates hard-coded in the hardware and the
133bf215546Sopenharmony_ci      uniforms defined purely in software, Valhall has a some special
134bf215546Sopenharmony_ci      "constants" passing through data structures. These are encoded like the
135bf215546Sopenharmony_ci      table of immediates, as if special constant $i$ were lookup table entry
136bf215546Sopenharmony_ci      $32 + i$.
137bf215546Sopenharmony_ci    </desc>
138bf215546Sopenharmony_ci    <reserved/>
139bf215546Sopenharmony_ci    <value desc="Thread local storage base pointer">thread_local_pointer</value>
140bf215546Sopenharmony_ci    <reserved/>
141bf215546Sopenharmony_ci    <value desc="Workgroup local storage base pointer">workgroup_local_pointer</value>
142bf215546Sopenharmony_ci    <reserved/>
143bf215546Sopenharmony_ci    <reserved/>
144bf215546Sopenharmony_ci    <reserved/>
145bf215546Sopenharmony_ci    <value desc="Shader resource table base pointer">resource_table_pointer</value>
146bf215546Sopenharmony_ci    <reserved/>
147bf215546Sopenharmony_ci    <reserved/>
148bf215546Sopenharmony_ci    <reserved/>
149bf215546Sopenharmony_ci    <reserved/>
150bf215546Sopenharmony_ci    <reserved/>
151bf215546Sopenharmony_ci    <reserved/>
152bf215546Sopenharmony_ci    <reserved/>
153bf215546Sopenharmony_ci    <reserved/>
154bf215546Sopenharmony_ci  </enum>
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci  <enum name="FAU special page 3">
157bf215546Sopenharmony_ci    <desc>
158bf215546Sopenharmony_ci      Situated between the immediates hard-coded in the hardware and the
159bf215546Sopenharmony_ci      uniforms defined purely in software, Valhall has a some special
160bf215546Sopenharmony_ci      "constants" passing through data structures. These are encoded like the
161bf215546Sopenharmony_ci      table of immediates, as if special constant $i$ were lookup table entry
162bf215546Sopenharmony_ci      $32 + i$.
163bf215546Sopenharmony_ci    </desc>
164bf215546Sopenharmony_ci    <reserved/>
165bf215546Sopenharmony_ci    <value desc="Lane ID">lane_id</value>
166bf215546Sopenharmony_ci    <reserved/>
167bf215546Sopenharmony_ci    <value desc="Core ID">core_id</value>
168bf215546Sopenharmony_ci    <reserved/>
169bf215546Sopenharmony_ci    <reserved/>
170bf215546Sopenharmony_ci    <reserved/>
171bf215546Sopenharmony_ci    <reserved/>
172bf215546Sopenharmony_ci    <reserved/>
173bf215546Sopenharmony_ci    <reserved/>
174bf215546Sopenharmony_ci    <reserved/>
175bf215546Sopenharmony_ci    <reserved/>
176bf215546Sopenharmony_ci    <reserved/>
177bf215546Sopenharmony_ci    <reserved/>
178bf215546Sopenharmony_ci    <reserved/>
179bf215546Sopenharmony_ci    <value desc="Program counter">program_counter</value>
180bf215546Sopenharmony_ci  </enum>
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci  <enum name="Swizzles (8-bit)">
183bf215546Sopenharmony_ci    <value default="true">b0123</value>
184bf215546Sopenharmony_ci    <value>b3210</value>
185bf215546Sopenharmony_ci    <value>b0101</value>
186bf215546Sopenharmony_ci    <value>b2323</value>
187bf215546Sopenharmony_ci    <value>b0000</value>
188bf215546Sopenharmony_ci    <value>b1111</value>
189bf215546Sopenharmony_ci    <value>b2222</value>
190bf215546Sopenharmony_ci    <value>b3333</value>
191bf215546Sopenharmony_ci    <value>b2301</value>
192bf215546Sopenharmony_ci    <value>b1032</value>
193bf215546Sopenharmony_ci    <value>b0011</value>
194bf215546Sopenharmony_ci    <value>b2233</value>
195bf215546Sopenharmony_ci    <reserved/>
196bf215546Sopenharmony_ci    <reserved/>
197bf215546Sopenharmony_ci    <reserved/>
198bf215546Sopenharmony_ci    <reserved/>
199bf215546Sopenharmony_ci  </enum>
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci  <enum name="Lanes (8-bit)">
202bf215546Sopenharmony_ci    <desc>Used to select the 2 bytes for shifts of 16-bit vectors</desc>
203bf215546Sopenharmony_ci    <value>b02</value>
204bf215546Sopenharmony_ci    <reserved/>
205bf215546Sopenharmony_ci    <reserved/>
206bf215546Sopenharmony_ci    <reserved/>
207bf215546Sopenharmony_ci    <value>b00</value>
208bf215546Sopenharmony_ci    <value>b11</value>
209bf215546Sopenharmony_ci    <value>b22</value>
210bf215546Sopenharmony_ci    <value>b33</value>
211bf215546Sopenharmony_ci    <reserved/>
212bf215546Sopenharmony_ci    <reserved/>
213bf215546Sopenharmony_ci    <value>b01</value>
214bf215546Sopenharmony_ci    <value>b23</value>
215bf215546Sopenharmony_ci    <reserved/>
216bf215546Sopenharmony_ci    <reserved/>
217bf215546Sopenharmony_ci    <reserved/>
218bf215546Sopenharmony_ci    <reserved/>
219bf215546Sopenharmony_ci  </enum>
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci  <enum name="Half-swizzles (8-bit)">
222bf215546Sopenharmony_ci    <desc>
223bf215546Sopenharmony_ci      Used to select the 2 bytes to convert for conversions from 8-bit vectors
224bf215546Sopenharmony_ci      to 16-bit vectors
225bf215546Sopenharmony_ci    </desc>
226bf215546Sopenharmony_ci    <value>b00</value>
227bf215546Sopenharmony_ci    <value>b10</value>
228bf215546Sopenharmony_ci    <value>b20</value>
229bf215546Sopenharmony_ci    <value>b30</value>
230bf215546Sopenharmony_ci    <value>b01</value>
231bf215546Sopenharmony_ci    <value>b11</value>
232bf215546Sopenharmony_ci    <value>b21</value>
233bf215546Sopenharmony_ci    <value>b31</value>
234bf215546Sopenharmony_ci    <value>b02</value>
235bf215546Sopenharmony_ci    <value>b12</value>
236bf215546Sopenharmony_ci    <value>b22</value>
237bf215546Sopenharmony_ci    <value>b32</value>
238bf215546Sopenharmony_ci    <value>b03</value>
239bf215546Sopenharmony_ci    <value>b13</value>
240bf215546Sopenharmony_ci    <value>b23</value>
241bf215546Sopenharmony_ci    <value>b33</value>
242bf215546Sopenharmony_ci  </enum>
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci  <enum name="Swizzles (16-bit)">
245bf215546Sopenharmony_ci    <value>h00</value> <!-- 0,2 -->
246bf215546Sopenharmony_ci    <value>h10</value>
247bf215546Sopenharmony_ci    <value default="true">h01</value>
248bf215546Sopenharmony_ci    <value>h11</value>
249bf215546Sopenharmony_ci    <value>b00</value> <!-- 0,0 -->
250bf215546Sopenharmony_ci    <value>b20</value> <!-- 1,1 -->
251bf215546Sopenharmony_ci    <value>b02</value> <!-- 2,2 -->
252bf215546Sopenharmony_ci    <value>b22</value> <!-- 3,3 -->
253bf215546Sopenharmony_ci    <value>b11</value>
254bf215546Sopenharmony_ci    <value>b31</value>
255bf215546Sopenharmony_ci    <value>b13</value> <!-- 0,1 -->
256bf215546Sopenharmony_ci    <value>b33</value> <!-- 2,3 -->
257bf215546Sopenharmony_ci    <value>b01</value>
258bf215546Sopenharmony_ci    <value>b23</value>
259bf215546Sopenharmony_ci    <reserved/>
260bf215546Sopenharmony_ci    <reserved/>
261bf215546Sopenharmony_ci  </enum>
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_ci  <enum name="Swizzles (32-bit)">
264bf215546Sopenharmony_ci    <value default="true">none</value>
265bf215546Sopenharmony_ci    <reserved/>
266bf215546Sopenharmony_ci    <value>h0</value>
267bf215546Sopenharmony_ci    <value>h1</value>
268bf215546Sopenharmony_ci    <value>b0</value>
269bf215546Sopenharmony_ci    <value>b1</value>
270bf215546Sopenharmony_ci    <value>b2</value>
271bf215546Sopenharmony_ci    <value>b3</value>
272bf215546Sopenharmony_ci  </enum>
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci  <enum name="Swizzles (64-bit)">
275bf215546Sopenharmony_ci    <value default="true">none</value>
276bf215546Sopenharmony_ci    <reserved/>
277bf215546Sopenharmony_ci    <value>h0</value>
278bf215546Sopenharmony_ci    <value>h1</value>
279bf215546Sopenharmony_ci    <value>b0</value>
280bf215546Sopenharmony_ci    <value>b1</value>
281bf215546Sopenharmony_ci    <value>b2</value>
282bf215546Sopenharmony_ci    <value>b3</value>
283bf215546Sopenharmony_ci    <value>w0</value>
284bf215546Sopenharmony_ci    <reserved/>
285bf215546Sopenharmony_ci    <reserved/>
286bf215546Sopenharmony_ci    <reserved/>
287bf215546Sopenharmony_ci    <reserved/>
288bf215546Sopenharmony_ci    <reserved/>
289bf215546Sopenharmony_ci    <reserved/>
290bf215546Sopenharmony_ci    <reserved/>
291bf215546Sopenharmony_ci  </enum>
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci  <enum name="Lane (8-bit)" implied="true">
294bf215546Sopenharmony_ci    <value>b0</value>
295bf215546Sopenharmony_ci    <value>b1</value>
296bf215546Sopenharmony_ci    <value>b2</value>
297bf215546Sopenharmony_ci    <value>b3</value>
298bf215546Sopenharmony_ci  </enum>
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci  <enum name="Combine">
301bf215546Sopenharmony_ci    <desc>
302bf215546Sopenharmony_ci      Used for the lane select of `BRANCHZ`. To use an 8-bit condition, a
303bf215546Sopenharmony_ci      separate `ICMP` is required to cast to 16-bit.
304bf215546Sopenharmony_ci    </desc>
305bf215546Sopenharmony_ci    <value default="true">none</value>
306bf215546Sopenharmony_ci    <value>h0</value>
307bf215546Sopenharmony_ci    <value>h1</value>
308bf215546Sopenharmony_ci    <value>and</value>
309bf215546Sopenharmony_ci    <value>lowbits</value>
310bf215546Sopenharmony_ci  </enum>
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci  <enum name="Lane (16-bit)" implied="true">
313bf215546Sopenharmony_ci    <value>h0</value>
314bf215546Sopenharmony_ci    <value>h1</value>
315bf215546Sopenharmony_ci  </enum>
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci  <enum name="Load lane (8-bit)">
318bf215546Sopenharmony_ci    <value default="true">b0</value>
319bf215546Sopenharmony_ci    <value>b1</value>
320bf215546Sopenharmony_ci    <value>b2</value>
321bf215546Sopenharmony_ci    <value>b3</value>
322bf215546Sopenharmony_ci    <value desc="Zero-extend to 16-bit, low-half">h0</value>
323bf215546Sopenharmony_ci    <value desc="Zero-extend to 16-bit, high-half">h1</value>
324bf215546Sopenharmony_ci    <value desc="Zero-extend to 32-bit">w0</value>
325bf215546Sopenharmony_ci    <value desc="Zero-extend to 32-bit">d0</value>
326bf215546Sopenharmony_ci  </enum>
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci  <enum name="Load lane (16-bit)">
329bf215546Sopenharmony_ci    <value desc="Low half" default="true">h0</value>
330bf215546Sopenharmony_ci    <value desc="High half">h1</value>
331bf215546Sopenharmony_ci    <value desc="Zero-extend to 32-bit">w0</value>
332bf215546Sopenharmony_ci    <value desc="Zero-extend to 64-bit">d0</value>
333bf215546Sopenharmony_ci    <reserved/>
334bf215546Sopenharmony_ci    <reserved/>
335bf215546Sopenharmony_ci    <reserved/>
336bf215546Sopenharmony_ci    <reserved/>
337bf215546Sopenharmony_ci  </enum>
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci  <enum name="Load lane (24-bit)" implied="true">
340bf215546Sopenharmony_ci    <value default="true">identity</value>
341bf215546Sopenharmony_ci    <reserved/>
342bf215546Sopenharmony_ci    <reserved/>
343bf215546Sopenharmony_ci    <reserved/>
344bf215546Sopenharmony_ci    <reserved/>
345bf215546Sopenharmony_ci    <reserved/>
346bf215546Sopenharmony_ci    <reserved/>
347bf215546Sopenharmony_ci  </enum>
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci  <enum name="Load lane (32-bit)">
350bf215546Sopenharmony_ci    <value default="true">w0</value>
351bf215546Sopenharmony_ci    <value desc="Zero-extend to 64-bit">d0</value>
352bf215546Sopenharmony_ci    <reserved/>
353bf215546Sopenharmony_ci    <reserved/>
354bf215546Sopenharmony_ci    <reserved/>
355bf215546Sopenharmony_ci    <reserved/>
356bf215546Sopenharmony_ci    <reserved/>
357bf215546Sopenharmony_ci    <reserved/>
358bf215546Sopenharmony_ci  </enum>
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_ci  <enum name="Load lane (48-bit)">
361bf215546Sopenharmony_ci    <reserved/>
362bf215546Sopenharmony_ci    <reserved/>
363bf215546Sopenharmony_ci    <reserved/>
364bf215546Sopenharmony_ci    <reserved/>
365bf215546Sopenharmony_ci    <value default="true">identity</value>
366bf215546Sopenharmony_ci    <reserved/>
367bf215546Sopenharmony_ci    <reserved/>
368bf215546Sopenharmony_ci    <reserved/>
369bf215546Sopenharmony_ci  </enum>
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci  <enum name="Load lane (64-bit)">
372bf215546Sopenharmony_ci    <reserved/>
373bf215546Sopenharmony_ci    <reserved/>
374bf215546Sopenharmony_ci    <reserved/>
375bf215546Sopenharmony_ci    <reserved/>
376bf215546Sopenharmony_ci    <reserved/>
377bf215546Sopenharmony_ci    <reserved/>
378bf215546Sopenharmony_ci    <reserved/>
379bf215546Sopenharmony_ci    <value default="true">identity</value>
380bf215546Sopenharmony_ci  </enum>
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci  <enum name="Load lane (96-bit)">
383bf215546Sopenharmony_ci    <reserved/>
384bf215546Sopenharmony_ci    <reserved/>
385bf215546Sopenharmony_ci    <reserved/>
386bf215546Sopenharmony_ci    <reserved/>
387bf215546Sopenharmony_ci    <reserved/>
388bf215546Sopenharmony_ci    <reserved/>
389bf215546Sopenharmony_ci    <value default="true">identity</value>
390bf215546Sopenharmony_ci    <reserved/>
391bf215546Sopenharmony_ci  </enum>
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci  <enum name="Load lane (128-bit)">
394bf215546Sopenharmony_ci    <reserved/>
395bf215546Sopenharmony_ci    <reserved/>
396bf215546Sopenharmony_ci    <reserved/>
397bf215546Sopenharmony_ci    <reserved/>
398bf215546Sopenharmony_ci    <reserved/>
399bf215546Sopenharmony_ci    <reserved/>
400bf215546Sopenharmony_ci    <reserved/>
401bf215546Sopenharmony_ci    <value default="true">identity</value>
402bf215546Sopenharmony_ci  </enum>
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci  <enum name="Round mode">
405bf215546Sopenharmony_ci    <desc>Corresponds to IEEE 754 rounding modes</desc>
406bf215546Sopenharmony_ci    <value desc="Round to nearest even" default="true">rte</value>
407bf215546Sopenharmony_ci    <value desc="Round to positive infinity">rtp</value>
408bf215546Sopenharmony_ci    <value desc="Round to negative infinity">rtn</value>
409bf215546Sopenharmony_ci    <value desc="Round to zero">rtz</value>
410bf215546Sopenharmony_ci  </enum>
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci  <enum name="Result type">
413bf215546Sopenharmony_ci    <desc>
414bf215546Sopenharmony_ci      Comparison instructions like `FCMP` return a boolean but may encode this
415bf215546Sopenharmony_ci      boolean in a variety of ways. `i1` gives a OpenGL style `0/1` boolean.
416bf215546Sopenharmony_ci      `m1` gives a Direct3D style `0/~0` boolean. `f1` gives a floating-point
417bf215546Sopenharmony_ci      `0.0f / 1.0f` boolean. Switching between these modes is useful to fold a
418bf215546Sopenharmony_ci      boolean type convert into a comparison. `u1` is used internally to
419bf215546Sopenharmony_ci      implement 64-bit comparisons.
420bf215546Sopenharmony_ci    </desc>
421bf215546Sopenharmony_ci    <value desc="Integer 1">i1</value>
422bf215546Sopenharmony_ci    <value desc="Float 1">f1</value>
423bf215546Sopenharmony_ci    <value desc="Minus 1">m1</value>
424bf215546Sopenharmony_ci    <value desc="Low half of 64-bit compare">u1</value>
425bf215546Sopenharmony_ci  </enum>
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_ci  <enum name="Widen">
428bf215546Sopenharmony_ci    <value default="true">none</value>
429bf215546Sopenharmony_ci    <value>h0</value>
430bf215546Sopenharmony_ci    <value>h1</value>
431bf215546Sopenharmony_ci    <reserved/>
432bf215546Sopenharmony_ci    <reserved/>
433bf215546Sopenharmony_ci    <reserved/>
434bf215546Sopenharmony_ci    <reserved/>
435bf215546Sopenharmony_ci    <reserved/>
436bf215546Sopenharmony_ci  </enum>
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_ci  <enum name="Clamp">
439bf215546Sopenharmony_ci    <desc>
440bf215546Sopenharmony_ci      Clamp applied to the destination of a floating-point instruction. Note the
441bf215546Sopenharmony_ci      clamps may be decomposed as two independent bits for `clamp_0_inf` and
442bf215546Sopenharmony_ci      `clamp_m1_1`, with `clamp_0_1` arising as the composition of `clamp_0_inf`
443bf215546Sopenharmony_ci      and `clamp_m1_1` in either order.
444bf215546Sopenharmony_ci
445bf215546Sopenharmony_ci      Clamps are implemented per the SPIR-V specification:
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci      $$\text{clamp} \; (x, \ell, h) = \min( \max( x, \ell ), h)$$
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci      The min/max functions return the other operand if one operand is NaN, and
450bf215546Sopenharmony_ci      compare $-0 &lt; +0$. That means the following identities hold for Valhall
451bf215546Sopenharmony_ci      clamps:
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci      \begin{align*}
454bf215546Sopenharmony_ci        \text{clamp}(-0.0, 0.0, 1.0) &amp; = +0.0 \\
455bf215546Sopenharmony_ci        \text{clamp}(-\text{NaN}, 0.0, 1.0) &amp; = +0.0 \\
456bf215546Sopenharmony_ci        \text{clamp}(\text{NaN}, 0.0, 1.0) &amp; = +0.0 \\
457bf215546Sopenharmony_ci        &amp; \\
458bf215546Sopenharmony_ci        \text{clamp}(-0.0, -1.0, 1.0) &amp; = -0.0 \\
459bf215546Sopenharmony_ci        \text{clamp}(\text{NaN}, -1.0, 1.0) &amp; = -1.0 \\
460bf215546Sopenharmony_ci        \text{clamp}(-\text{NaN}, -1.0, 1.0) &amp; = -1.0 \\
461bf215546Sopenharmony_ci        &amp; \\
462bf215546Sopenharmony_ci        \max(\text{NaN}, 0.0) &amp; = +0.0 \\
463bf215546Sopenharmony_ci        \max(-\text{NaN}, 0.0) &amp; = +0.0 \\
464bf215546Sopenharmony_ci        \max(-0.0, 0.0) &amp; = +0.0 \\
465bf215546Sopenharmony_ci      \end{align*}
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci      This behaviour is consistent with the FMin/FMax/FClamp and
468bf215546Sopenharmony_ci      NMin/NMax/NClamp rules prescribed by SPIR-V and governed by IEEE-754. As
469bf215546Sopenharmony_ci      a consequence, substituting these clamps for equivalent minimum/maximum
470bf215546Sopenharmony_ci      exprssions is legal even with strict floating point rules.
471bf215546Sopenharmony_ci    </desc>
472bf215546Sopenharmony_ci    <value default="true" desc="Identity">none</value>
473bf215546Sopenharmony_ci    <value desc="Clamp positive">clamp_0_inf</value>
474bf215546Sopenharmony_ci    <value desc="Clamp to $[-1, 1]$">clamp_m1_1</value>
475bf215546Sopenharmony_ci    <value desc="Clamp to $[0, 1]$">clamp_0_1</value>
476bf215546Sopenharmony_ci  </enum>
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci  <enum name="Condition">
479bf215546Sopenharmony_ci    <desc>
480bf215546Sopenharmony_ci      Condition code. Type must be inferred from the instruction. IEEE 754 total
481bf215546Sopenharmony_ci      ordering only applies to floating point compares. "Not equal" and "greater
482bf215546Sopenharmony_ci      than or less than" are distinguished by NaN behaviour conforming to
483bf215546Sopenharmony_ci      the IEEE 754 specification.
484bf215546Sopenharmony_ci    </desc>
485bf215546Sopenharmony_ci    <value desc="Equal">eq</value>
486bf215546Sopenharmony_ci    <value desc="Greater than">gt</value>
487bf215546Sopenharmony_ci    <value desc="Greater than or equal">ge</value>
488bf215546Sopenharmony_ci    <value desc="Not equal">ne</value>
489bf215546Sopenharmony_ci    <value desc="Less than">lt</value>
490bf215546Sopenharmony_ci    <value desc="Less than or equal">le</value>
491bf215546Sopenharmony_ci    <value desc="Greater than or less than">gtlt</value>
492bf215546Sopenharmony_ci    <value desc="Totally ordered">total</value>
493bf215546Sopenharmony_ci  </enum>
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci  <enum name="Dimension">
496bf215546Sopenharmony_ci    <desc>Texture dimension.</desc>
497bf215546Sopenharmony_ci    <value desc="1D or buffer">1d</value>
498bf215546Sopenharmony_ci    <value desc="2D or 2D array">2d</value>
499bf215546Sopenharmony_ci    <value desc="3D or 3D array">3d</value>
500bf215546Sopenharmony_ci    <value desc="Cube map or cube map array">cube</value>
501bf215546Sopenharmony_ci  </enum>
502bf215546Sopenharmony_ci
503bf215546Sopenharmony_ci  <enum name="LOD mode">
504bf215546Sopenharmony_ci    <desc>Level-of-detail selection mode in a texture instruction.</desc>
505bf215546Sopenharmony_ci    <value desc="Set to zero">zero</value>
506bf215546Sopenharmony_ci    <value desc="Computed based on neighboring fragments">computed</value>
507bf215546Sopenharmony_ci    <reserved/>
508bf215546Sopenharmony_ci    <reserved/>
509bf215546Sopenharmony_ci    <value desc="Explicitly specified in a register">explicit</value>
510bf215546Sopenharmony_ci    <value desc="Computed based on neighboring fragments added with bias in a register">computed_bias</value>
511bf215546Sopenharmony_ci    <value desc="Derived from a gradient descriptor in registers">grdesc</value>
512bf215546Sopenharmony_ci    <reserved/>
513bf215546Sopenharmony_ci  </enum>
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci  <enum name="Register format">
516bf215546Sopenharmony_ci    <desc>Format of data loaded to / stored from registers for general memory access.</desc>
517bf215546Sopenharmony_ci    <value desc="32-bit type based on descriptor format">auto</value>
518bf215546Sopenharmony_ci    <reserved/>
519bf215546Sopenharmony_ci    <value desc="32-bit floats">f32</value>
520bf215546Sopenharmony_ci    <value desc="16-bit floats">f16</value>
521bf215546Sopenharmony_ci    <value desc="32-bit signed integers">s32</value>
522bf215546Sopenharmony_ci    <value desc="16-bit signed integers">s16</value>
523bf215546Sopenharmony_ci    <value desc="32-bit unsigned integers">u32</value>
524bf215546Sopenharmony_ci    <value desc="16-bit unsigned integers">u16</value>
525bf215546Sopenharmony_ci  </enum>
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci  <enum name="Staging register count" implied="true">
528bf215546Sopenharmony_ci    <value>sr0</value>
529bf215546Sopenharmony_ci    <value>sr1</value>
530bf215546Sopenharmony_ci    <value>sr2</value>
531bf215546Sopenharmony_ci    <value>sr3</value>
532bf215546Sopenharmony_ci    <value>sr4</value>
533bf215546Sopenharmony_ci    <value>sr5</value>
534bf215546Sopenharmony_ci    <value>sr6</value>
535bf215546Sopenharmony_ci    <value>sr7</value>
536bf215546Sopenharmony_ci  </enum>
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_ci  <enum name="Staging register write count" implied="true">
539bf215546Sopenharmony_ci    <value>write1</value>
540bf215546Sopenharmony_ci    <value>write2</value>
541bf215546Sopenharmony_ci    <value>write3</value>
542bf215546Sopenharmony_ci    <value>write4</value>
543bf215546Sopenharmony_ci    <value>write5</value>
544bf215546Sopenharmony_ci    <value>write6</value>
545bf215546Sopenharmony_ci    <value>write7</value>
546bf215546Sopenharmony_ci    <value>write8</value>
547bf215546Sopenharmony_ci  </enum>
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci  <enum name="Write mask">
550bf215546Sopenharmony_ci    <reserved/>
551bf215546Sopenharmony_ci    <value>r</value>
552bf215546Sopenharmony_ci    <value>g</value>
553bf215546Sopenharmony_ci    <value>rg</value>
554bf215546Sopenharmony_ci    <value>b</value>
555bf215546Sopenharmony_ci    <value>rb</value>
556bf215546Sopenharmony_ci    <value>gb</value>
557bf215546Sopenharmony_ci    <value>rgb</value>
558bf215546Sopenharmony_ci    <value>a</value>
559bf215546Sopenharmony_ci    <value>ra</value>
560bf215546Sopenharmony_ci    <value>ga</value>
561bf215546Sopenharmony_ci    <value>rga</value>
562bf215546Sopenharmony_ci    <value>ba</value>
563bf215546Sopenharmony_ci    <value>rba</value>
564bf215546Sopenharmony_ci    <value>gba</value>
565bf215546Sopenharmony_ci    <value default="true">rgba</value>
566bf215546Sopenharmony_ci  </enum>
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci  <enum name="Fetch component">
569bf215546Sopenharmony_ci    <value desc="Red">gather4_r</value>
570bf215546Sopenharmony_ci    <value desc="Green">gather4_g</value>
571bf215546Sopenharmony_ci    <value desc="Blue">gather4_b</value>
572bf215546Sopenharmony_ci    <value desc="Alpha">gather4_a</value>
573bf215546Sopenharmony_ci  </enum>
574bf215546Sopenharmony_ci
575bf215546Sopenharmony_ci  <enum name="Register type">
576bf215546Sopenharmony_ci    <desc>Unsized type, part of a register format.</desc>
577bf215546Sopenharmony_ci    <reserved/>
578bf215546Sopenharmony_ci    <value name="Float">f</value>
579bf215546Sopenharmony_ci    <value name="Unsigned">u</value>
580bf215546Sopenharmony_ci    <value name="Signed">s</value>
581bf215546Sopenharmony_ci  </enum>
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_ci  <enum name="Register width">
584bf215546Sopenharmony_ci    <desc>Untyped size, part of a register format.</desc>
585bf215546Sopenharmony_ci    <value>16</value>
586bf215546Sopenharmony_ci    <value>32</value>
587bf215546Sopenharmony_ci  </enum>
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_ci  <enum name="Varying texture register width">
590bf215546Sopenharmony_ci    <desc>
591bf215546Sopenharmony_ci      Size of results for varying texture instructions. For dual 16-bit results
592bf215546Sopenharmony_ci      use "16-bit".
593bf215546Sopenharmony_ci    </desc>
594bf215546Sopenharmony_ci    <value desc="16-bit">16</value>
595bf215546Sopenharmony_ci    <value desc="32-bit">32</value>
596bf215546Sopenharmony_ci    <value desc="16-bit, 32-bit">16.32</value>
597bf215546Sopenharmony_ci    <value desc="32-bit, 32-bit">32.32</value>
598bf215546Sopenharmony_ci  </enum>
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci  <enum name="Vector size">
601bf215546Sopenharmony_ci    <desc>Number of channels loaded/stored for general memory access.</desc>
602bf215546Sopenharmony_ci    <value default="true" desc="Scalar">none</value>
603bf215546Sopenharmony_ci    <value desc="2 channels">v2</value>
604bf215546Sopenharmony_ci    <value desc="3 channels">v3</value>
605bf215546Sopenharmony_ci    <value desc="4 channels">v4</value>
606bf215546Sopenharmony_ci  </enum>
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci  <enum name="Slot">
609bf215546Sopenharmony_ci    <desc>
610bf215546Sopenharmony_ci      Dependency slot set on a message-passing instruction that writes to
611bf215546Sopenharmony_ci      registers. Before reading the destination, a future instruction must wait
612bf215546Sopenharmony_ci      on the specified slot. Slot #7 is for `BARRIER` instructions only.
613bf215546Sopenharmony_ci    </desc>
614bf215546Sopenharmony_ci    <value desc="Slot #0">slot0</value>
615bf215546Sopenharmony_ci    <value desc="Slot #1">slot1</value>
616bf215546Sopenharmony_ci    <value desc="Slot #2">slot2</value>
617bf215546Sopenharmony_ci    <reserved/>
618bf215546Sopenharmony_ci    <reserved/>
619bf215546Sopenharmony_ci    <reserved/>
620bf215546Sopenharmony_ci    <reserved/>
621bf215546Sopenharmony_ci    <value desc="Slot #7">slot7</value>
622bf215546Sopenharmony_ci  </enum>
623bf215546Sopenharmony_ci
624bf215546Sopenharmony_ci  <enum name="Memory access">
625bf215546Sopenharmony_ci    <desc>Memory access hint for a `LOAD` or `STORE` instruction.</desc>
626bf215546Sopenharmony_ci    <value desc="No hint (global)" default="true">none</value>
627bf215546Sopenharmony_ci    <value desc="Internally streaming (position output)">istream</value>
628bf215546Sopenharmony_ci    <value desc="Externally streaming (varying output)">estream</value>
629bf215546Sopenharmony_ci    <value desc="Force access in discarded threads (thread local storage)">force</value>
630bf215546Sopenharmony_ci  </enum>
631bf215546Sopenharmony_ci
632bf215546Sopenharmony_ci  <enum name="Subgroup size">
633bf215546Sopenharmony_ci    <desc>
634bf215546Sopenharmony_ci      Selects the effective subgroup size from subgroup operations. The hardware
635bf215546Sopenharmony_ci      warps are sixteen threads on Valhall, but subdividing a warp may be useful
636bf215546Sopenharmony_ci      for API requirements. In particular, derivatives may be calculated with
637bf215546Sopenharmony_ci      quads (four threads).
638bf215546Sopenharmony_ci    </desc>
639bf215546Sopenharmony_ci    <value desc="Two threads">subgroup2</value>
640bf215546Sopenharmony_ci    <value desc="Four threads">subgroup4</value>
641bf215546Sopenharmony_ci    <value desc="Eight threads">subgroup8</value>
642bf215546Sopenharmony_ci    <value desc="Sixteen threads" default="true">subgroup16</value>
643bf215546Sopenharmony_ci  </enum>
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci  <enum name="Lane operation">
646bf215546Sopenharmony_ci    <desc>
647bf215546Sopenharmony_ci      Acts as a modifier on the lane specificier for a `CLPER` instruction. The
648bf215546Sopenharmony_ci      `accumulate` mode is required for efficient subgroup reductions.
649bf215546Sopenharmony_ci    </desc>
650bf215546Sopenharmony_ci    <value name="No operation" default="true">none</value>
651bf215546Sopenharmony_ci    <value name="Exclusive-or">xor</value>
652bf215546Sopenharmony_ci    <value name="Accumulate">accumulate</value>
653bf215546Sopenharmony_ci    <value name="Shift">shift</value>
654bf215546Sopenharmony_ci  </enum>
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci  <enum name="Inactive result">
657bf215546Sopenharmony_ci    <desc>
658bf215546Sopenharmony_ci      Accesses to inactive lanes (due to divergence) in a subgroup is generally
659bf215546Sopenharmony_ci      undefined in APIs. However, the results of permuting with an inactive lane
660bf215546Sopenharmony_ci      with `CLPER.i32` are well-defined in Valhall: they return one of the
661bf215546Sopenharmony_ci      following values, as specified in the `CLPER.i32` instructions. Sometimes
662bf215546Sopenharmony_ci      certain values enable small optimizations.
663bf215546Sopenharmony_ci    </desc>
664bf215546Sopenharmony_ci    <value name="0x00000000" default="true">zero</value>
665bf215546Sopenharmony_ci    <value name="0xFFFFFFFF">umax</value>
666bf215546Sopenharmony_ci    <value name="0x00000001">i1</value>
667bf215546Sopenharmony_ci    <value name="0x00010001">v2i1</value>
668bf215546Sopenharmony_ci    <value name="0x80000000">smin</value>
669bf215546Sopenharmony_ci    <value name="0x7FFFFFFF">smax</value>
670bf215546Sopenharmony_ci    <value name="0x80008000">v2smin</value>
671bf215546Sopenharmony_ci    <value name="0x7FFF7FFF">v2smax</value>
672bf215546Sopenharmony_ci    <value name="0x80808080">v4smin</value>
673bf215546Sopenharmony_ci    <value name="0x7F7F7F7F">v4smax</value>
674bf215546Sopenharmony_ci    <value name="0x3F800000">f1</value>
675bf215546Sopenharmony_ci    <value name="0x3C003C00">v2f1</value>
676bf215546Sopenharmony_ci    <value name="0xFF800000">infn</value>
677bf215546Sopenharmony_ci    <value name="0x7F800000">inf</value>
678bf215546Sopenharmony_ci    <value name="0xFC00FC00">v2infn</value>
679bf215546Sopenharmony_ci    <value name="0x7C007C00">v2inf</value>
680bf215546Sopenharmony_ci  </enum>
681bf215546Sopenharmony_ci
682bf215546Sopenharmony_ci  <enum name="Mux">
683bf215546Sopenharmony_ci    <desc>
684bf215546Sopenharmony_ci      Condition to use for a `MUX` instruction. `neg` checks the sign bit,
685bf215546Sopenharmony_ci      `int_zero` compares to `0x00000000`, `fp_zero` compares to $\pm 0.0$ as
686bf215546Sopenharmony_ci      an IEEE 754 float, and `bit` checks each bit separately. The `bit` mode
687bf215546Sopenharmony_ci      acts like an imaginary `CSEL.v32u1` instruction, and implements
688bf215546Sopenharmony_ci      `bitselect()` in OpenCL.
689bf215546Sopenharmony_ci    </desc>
690bf215546Sopenharmony_ci    <value desc="Negative">neg</value>
691bf215546Sopenharmony_ci    <value desc="Integer zero" default="true">int_zero</value>
692bf215546Sopenharmony_ci    <value desc="Floating point zero">fp_zero</value>
693bf215546Sopenharmony_ci    <value desc="Bitwise">bit</value>
694bf215546Sopenharmony_ci  </enum>
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_ci  <enum name="Sample mode">
697bf215546Sopenharmony_ci    <desc>
698bf215546Sopenharmony_ci      Varying interpolation mode, for choosing the correct sample to
699bf215546Sopenharmony_ci      interpolate at, allowing the `sample` and `centroid` qualifiers to be
700bf215546Sopenharmony_ci      implemented, as well as the `interpolateAt*` functions.
701bf215546Sopenharmony_ci    </desc>
702bf215546Sopenharmony_ci    <value desc="Center">center</value>
703bf215546Sopenharmony_ci    <value desc="Centroid">centroid</value>
704bf215546Sopenharmony_ci    <value desc="Sample">sample</value>
705bf215546Sopenharmony_ci    <value desc="Explicit">explicit</value>
706bf215546Sopenharmony_ci  </enum>
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_ci  <enum name="Update mode">
709bf215546Sopenharmony_ci    <desc>
710bf215546Sopenharmony_ci      The Valhall GPU maintains hidden state when interpolating varyings, to
711bf215546Sopenharmony_ci      allow reusing sample location calculations. The update mode of a varying
712bf215546Sopenharmony_ci      load controls this hidden state.
713bf215546Sopenharmony_ci    </desc>
714bf215546Sopenharmony_ci    <value desc="Store interpolation position">store</value>
715bf215546Sopenharmony_ci    <value desc="Retrieve interpolation position">retrieve</value>
716bf215546Sopenharmony_ci    <reserved/>
717bf215546Sopenharmony_ci    <value desc="Clobber saved position">clobber</value>
718bf215546Sopenharmony_ci  </enum>
719bf215546Sopenharmony_ci
720bf215546Sopenharmony_ci  <enum name="Sample and update mode">
721bf215546Sopenharmony_ci    <desc>
722bf215546Sopenharmony_ci      For fused varying/texture instructions, only the following specific
723bf215546Sopenharmony_ci      combinations of sample and update modes are permitted.
724bf215546Sopenharmony_ci    </desc>
725bf215546Sopenharmony_ci    <value desc="Center, store">center_store</value>
726bf215546Sopenharmony_ci    <value desc="Centroid, store">centroid_store</value>
727bf215546Sopenharmony_ci    <value desc="Sample, store">sample_store</value>
728bf215546Sopenharmony_ci    <value desc="Explicit, store">explicit_store</value>
729bf215546Sopenharmony_ci    <value desc="Center, clobber">center_clobber</value>
730bf215546Sopenharmony_ci    <reserved/>
731bf215546Sopenharmony_ci    <value desc="Sample, clobber">sample_clobber</value>
732bf215546Sopenharmony_ci    <value desc="Retrieve previous state">retrieve</value>
733bf215546Sopenharmony_ci  </enum>
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_ci  <enum name="Source format">
736bf215546Sopenharmony_ci    <desc>
737bf215546Sopenharmony_ci      In-memory format of varyings.
738bf215546Sopenharmony_ci
739bf215546Sopenharmony_ci      Note: src_flat32 is only valid with 32-bit varying instructions and
740bf215546Sopenharmony_ci      src_flat16 is only valid with 16-bit varying instructions.
741bf215546Sopenharmony_ci    </desc>
742bf215546Sopenharmony_ci    <value desc="Uninterpreted 32-bit values">src_flat32</value>
743bf215546Sopenharmony_ci    <value desc="Uninterpreted 16-bit values">src_flat16</value>
744bf215546Sopenharmony_ci    <value desc="Interpolated 32-bit floats">src_f32</value>
745bf215546Sopenharmony_ci    <value desc="Interpolated 16-bit floats">src_f16</value>
746bf215546Sopenharmony_ci  </enum>
747bf215546Sopenharmony_ci
748bf215546Sopenharmony_ci  <enum name="Atomic operation">
749bf215546Sopenharmony_ci    <desc>
750bf215546Sopenharmony_ci      Operation performed in a general computational atomic instruction.
751bf215546Sopenharmony_ci    </desc>
752bf215546Sopenharmony_ci    <reserved/>
753bf215546Sopenharmony_ci    <reserved/>
754bf215546Sopenharmony_ci    <value desc="Add">aadd</value>
755bf215546Sopenharmony_ci    <reserved/>
756bf215546Sopenharmony_ci    <reserved/>
757bf215546Sopenharmony_ci    <reserved/>
758bf215546Sopenharmony_ci    <reserved/>
759bf215546Sopenharmony_ci    <reserved/>
760bf215546Sopenharmony_ci    <value desc="Signed minimum">asmin</value>
761bf215546Sopenharmony_ci    <value desc="Signed maximum">asmax</value>
762bf215546Sopenharmony_ci    <value desc="Unsigned minimum">aumin</value>
763bf215546Sopenharmony_ci    <value desc="Unsigned maximum">aumax</value>
764bf215546Sopenharmony_ci    <value desc="Bitwise and">aand</value>
765bf215546Sopenharmony_ci    <value desc="Bitwise or">aor</value>
766bf215546Sopenharmony_ci    <value desc="Bitwise exclusive-or">axor</value>
767bf215546Sopenharmony_ci    <value desc="Exchange (must return the value)">axchg</value>
768bf215546Sopenharmony_ci  </enum>
769bf215546Sopenharmony_ci
770bf215546Sopenharmony_ci  <enum name="Atomic operation with 1">
771bf215546Sopenharmony_ci    <desc>
772bf215546Sopenharmony_ci      Operation performed in a computational atomic-with-1 instruction.
773bf215546Sopenharmony_ci    </desc>
774bf215546Sopenharmony_ci    <value desc="Increment">ainc</value>
775bf215546Sopenharmony_ci    <value desc="Decrement">adec</value>
776bf215546Sopenharmony_ci    <value desc="Unsigned maximum with 1">aumax1</value>
777bf215546Sopenharmony_ci    <value desc="Signed maximum with 1">asmax1</value>
778bf215546Sopenharmony_ci    <value desc="Set bottom bit">aor1</value>
779bf215546Sopenharmony_ci  </enum>
780bf215546Sopenharmony_ci
781bf215546Sopenharmony_ci  <ins name="NOP" title="No operation" dests="0" opcode="0x00" unit="CVT">
782bf215546Sopenharmony_ci    <desc>
783bf215546Sopenharmony_ci      Do nothing. Useful at the start of a block for waiting on slots required
784bf215546Sopenharmony_ci      by the first actual instruction of the block, to reconcile dependencies
785bf215546Sopenharmony_ci      after a branch. Also useful as the sole instruction of an empty shader.
786bf215546Sopenharmony_ci    </desc>
787bf215546Sopenharmony_ci  </ins>
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci  <ins name="BRANCHZ" title="Compare to zero and branch" dests="0" opcode="0x1F" unit="CVT">
790bf215546Sopenharmony_ci    <desc>
791bf215546Sopenharmony_ci      Branches to a specified relative offset if its source is nonzero (default)
792bf215546Sopenharmony_ci      or if its source is zero (if `.eq` is set). The offset is 27-bits and
793bf215546Sopenharmony_ci      sign-extended, giving an effective range of ±26-bits. The offset is
794bf215546Sopenharmony_ci      specified in units of instructions, relative to the *next* instruction.
795bf215546Sopenharmony_ci      Positive offsets may be interpreted as "number of instructions to skip".
796bf215546Sopenharmony_ci      Since Valhall instructions are 8 bytes, this operates as:
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_ci      $$PC := \begin{cases} PC + 8 \cdot (\text{offset} \; + 1) &amp; \text{if} \;
799bf215546Sopenharmony_ci      \text{src} \stackrel{?}{=} 0 \\ PC + 8 &amp; \text{otherwise} \end{cases}$$
800bf215546Sopenharmony_ci
801bf215546Sopenharmony_ci      Used with comparison instructions to implement control flow. Tie the
802bf215546Sopenharmony_ci      source to a nonzero constant to implement a jump. May introduce
803bf215546Sopenharmony_ci      divergence, so generally requires `.reconverge` flow control.
804bf215546Sopenharmony_ci    </desc>
805bf215546Sopenharmony_ci    <src combine="true">Value to compare against zero</src>
806bf215546Sopenharmony_ci    <imm name="offset" start="8" size="27" signed="true"/>
807bf215546Sopenharmony_ci    <conservative/>
808bf215546Sopenharmony_ci    <mod name="eq" start="36" size="1"/>
809bf215546Sopenharmony_ci  </ins>
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_ci  <ins name="DISCARD.f32" title="Discard fragment" dests="0" opcode="0x20" unit="CVT">
812bf215546Sopenharmony_ci    <desc>
813bf215546Sopenharmony_ci      Evaluates the given condition, and if it passes, discards the current
814bf215546Sopenharmony_ci      fragment and terminates the thread. Only valid in a **fragment** shader.
815bf215546Sopenharmony_ci    </desc>
816bf215546Sopenharmony_ci    <cmp/>
817bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">Left value to compare</src>
818bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">Right value to compare</src>
819bf215546Sopenharmony_ci  </ins>
820bf215546Sopenharmony_ci
821bf215546Sopenharmony_ci  <ins name="BRANCHZI" title="Compare to zero and branch indirect" opcode="0x2F" unit="CVT">
822bf215546Sopenharmony_ci    <desc>
823bf215546Sopenharmony_ci      Jump to an indirectly specified (absolute or relative) address. Used to
824bf215546Sopenharmony_ci      jump to blend shaders at the end of a fragment shader.
825bf215546Sopenharmony_ci    </desc>
826bf215546Sopenharmony_ci    <src combine="true">Value to compare against zero</src>
827bf215546Sopenharmony_ci    <src>Branch target</src>
828bf215546Sopenharmony_ci    <conservative/>
829bf215546Sopenharmony_ci    <mod name="eq" start="36" size="1"/>
830bf215546Sopenharmony_ci    <mod name="absolute" start="40" size="1"/>
831bf215546Sopenharmony_ci  </ins>
832bf215546Sopenharmony_ci
833bf215546Sopenharmony_ci  <ins name="BARRIER" title="Execution and memory barrier" opcode="0x45" unit="NONE">
834bf215546Sopenharmony_ci    <desc>
835bf215546Sopenharmony_ci      General-purpose barrier. Must use slot #7. Must be paired with a
836bf215546Sopenharmony_ci      `.wait` flow on the instruction.
837bf215546Sopenharmony_ci    </desc>
838bf215546Sopenharmony_ci    <slot/>
839bf215546Sopenharmony_ci  </ins>
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ci  <group name="CSEL" title="Floating-point conditional select" dests="1" unit="CVT">
842bf215546Sopenharmony_ci    <ins name="CSEL.f32" opcode="0x154"/>
843bf215546Sopenharmony_ci    <ins name="CSEL.v2f16" opcode="0x155"/>
844bf215546Sopenharmony_ci    <desc>
845bf215546Sopenharmony_ci      Evaluates the given condition and outputs either the true source or the
846bf215546Sopenharmony_ci      false source.
847bf215546Sopenharmony_ci    </desc>
848bf215546Sopenharmony_ci    <cmp/>
849bf215546Sopenharmony_ci    <src float="true">Left value to compare</src>
850bf215546Sopenharmony_ci    <src float="true">Right value to compare</src>
851bf215546Sopenharmony_ci    <src float="true">Return value if true</src>
852bf215546Sopenharmony_ci    <src float="true">Return value if false</src>
853bf215546Sopenharmony_ci  </group>
854bf215546Sopenharmony_ci
855bf215546Sopenharmony_ci  <group name="CSEL" title="Integer conditional select" dests="1" unit="CVT">
856bf215546Sopenharmony_ci    <ins name="CSEL.u32" opcode="0x150"/>
857bf215546Sopenharmony_ci    <ins name="CSEL.v2u16" opcode="0x151"/>
858bf215546Sopenharmony_ci    <ins name="CSEL.s32" opcode="0x158"/>
859bf215546Sopenharmony_ci    <ins name="CSEL.v2s16" opcode="0x159"/>
860bf215546Sopenharmony_ci    <desc>
861bf215546Sopenharmony_ci      Evaluates the given condition and outputs either the true source or the
862bf215546Sopenharmony_ci      false source.
863bf215546Sopenharmony_ci
864bf215546Sopenharmony_ci      Valhall lacks integer minimum/maximum instructions. `CSEL` instructions
865bf215546Sopenharmony_ci      with tied operands form the canonical implementations of these
866bf215546Sopenharmony_ci      instructions. Similarly, the integer $\text{sign}$ function is canonically
867bf215546Sopenharmony_ci      implemented with a pair of `CSEL` instructions.
868bf215546Sopenharmony_ci    </desc>
869bf215546Sopenharmony_ci    <cmp/>
870bf215546Sopenharmony_ci    <src>Left value to compare</src>
871bf215546Sopenharmony_ci    <src>Right value to compare</src>
872bf215546Sopenharmony_ci    <src>Return value if true</src>
873bf215546Sopenharmony_ci    <src>Return value if false</src>
874bf215546Sopenharmony_ci  </group>
875bf215546Sopenharmony_ci
876bf215546Sopenharmony_ci  <ins name="LD_VAR_SPECIAL" title="Load special varying" opcode="0x56" unit="V">
877bf215546Sopenharmony_ci    <sr write="true"/>
878bf215546Sopenharmony_ci    <sr_count/>
879bf215546Sopenharmony_ci    <vecsize/>
880bf215546Sopenharmony_ci    <regfmt/>
881bf215546Sopenharmony_ci    <sample/>
882bf215546Sopenharmony_ci    <update/>
883bf215546Sopenharmony_ci    <slot/>
884bf215546Sopenharmony_ci    <src/>
885bf215546Sopenharmony_ci    <imm name="index" start="12" size="4"/> <!-- 0 for pointx, 1 for pointy, 2 for fragw, 3 for fragz -->
886bf215546Sopenharmony_ci  </ins>
887bf215546Sopenharmony_ci
888bf215546Sopenharmony_ci  <group name="LD_VAR_BUF_IMM" title="Load immediate varying" unit="V">
889bf215546Sopenharmony_ci    <desc>Interpolates a given varying from hardware buffer</desc>
890bf215546Sopenharmony_ci    <ins name="LD_VAR_BUF_IMM.f32" opcode="0x5C"/>
891bf215546Sopenharmony_ci    <ins name="LD_VAR_BUF_IMM.f16" opcode="0x5D"/>
892bf215546Sopenharmony_ci    <slot/>
893bf215546Sopenharmony_ci    <vecsize/>
894bf215546Sopenharmony_ci    <source_format/>
895bf215546Sopenharmony_ci    <sample/>
896bf215546Sopenharmony_ci    <update/>
897bf215546Sopenharmony_ci    <sr write="true"/>
898bf215546Sopenharmony_ci    <sr_count/>
899bf215546Sopenharmony_ci    <src/>
900bf215546Sopenharmony_ci    <imm name="index" start="16" size="8"/>
901bf215546Sopenharmony_ci  </group>
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci  <group name="LD_VAR_BUF" title="Load indirect varying" unit="V">
904bf215546Sopenharmony_ci    <desc>Interpolates a given varying from hardware buffer</desc>
905bf215546Sopenharmony_ci    <ins name="LD_VAR_BUF.f32" opcode="0x6C"/>
906bf215546Sopenharmony_ci    <ins name="LD_VAR_BUF.f16" opcode="0x6D"/>
907bf215546Sopenharmony_ci    <slot/>
908bf215546Sopenharmony_ci    <vecsize/>
909bf215546Sopenharmony_ci    <source_format/>
910bf215546Sopenharmony_ci    <sample/>
911bf215546Sopenharmony_ci    <update/>
912bf215546Sopenharmony_ci    <sr write="true"/>
913bf215546Sopenharmony_ci    <sr_count/>
914bf215546Sopenharmony_ci    <src/>
915bf215546Sopenharmony_ci    <src/>
916bf215546Sopenharmony_ci  </group>
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci  <ins name="LD_VAR" title="Load indirect varying" unit="V" opcode="0x64">
919bf215546Sopenharmony_ci    <desc>Interpolates a given varying from a software buffer</desc>
920bf215546Sopenharmony_ci    <slot/>
921bf215546Sopenharmony_ci    <vecsize/>
922bf215546Sopenharmony_ci    <regfmt/>
923bf215546Sopenharmony_ci    <sample/>
924bf215546Sopenharmony_ci    <update/>
925bf215546Sopenharmony_ci    <sr write="true"/>
926bf215546Sopenharmony_ci    <sr_count/>
927bf215546Sopenharmony_ci    <src/>
928bf215546Sopenharmony_ci    <src>Varying index and table</src>
929bf215546Sopenharmony_ci  </ins>
930bf215546Sopenharmony_ci
931bf215546Sopenharmony_ci  <ins name="LD_VAR_IMM" title="Load immediate varying" unit="V" opcode="0x54">
932bf215546Sopenharmony_ci    <desc>Interpolates a given varying from a software buffer</desc>
933bf215546Sopenharmony_ci    <slot/>
934bf215546Sopenharmony_ci    <vecsize/>
935bf215546Sopenharmony_ci    <regfmt/>
936bf215546Sopenharmony_ci    <sample/>
937bf215546Sopenharmony_ci    <update/>
938bf215546Sopenharmony_ci    <sr write="true"/>
939bf215546Sopenharmony_ci    <sr_count/>
940bf215546Sopenharmony_ci    <src/>
941bf215546Sopenharmony_ci    <imm name="table" start="8" size="4"/>
942bf215546Sopenharmony_ci    <imm name="index" start="12" size="8"/>
943bf215546Sopenharmony_ci  </ins>
944bf215546Sopenharmony_ci
945bf215546Sopenharmony_ci  <ins name="LD_VAR_FLAT" title="Load indirect varying" unit="V" opcode="0x55">
946bf215546Sopenharmony_ci    <desc>Fetches a given varying from a software buffer</desc>
947bf215546Sopenharmony_ci    <slot/>
948bf215546Sopenharmony_ci    <vecsize/>
949bf215546Sopenharmony_ci    <regfmt/>
950bf215546Sopenharmony_ci    <sr write="true"/>
951bf215546Sopenharmony_ci    <sr_count/>
952bf215546Sopenharmony_ci    <src>Varying index and table</src>
953bf215546Sopenharmony_ci  </ins>
954bf215546Sopenharmony_ci
955bf215546Sopenharmony_ci  <ins name="LD_VAR_FLAT_IMM" title="Load immediate varying" unit="V" opcode="0x41">
956bf215546Sopenharmony_ci    <desc>Fetches a given varying from a software buffer</desc>
957bf215546Sopenharmony_ci    <slot/>
958bf215546Sopenharmony_ci    <vecsize/>
959bf215546Sopenharmony_ci    <regfmt/>
960bf215546Sopenharmony_ci    <sr write="true"/>
961bf215546Sopenharmony_ci    <sr_count/>
962bf215546Sopenharmony_ci    <imm name="table" start="8" size="4"/>
963bf215546Sopenharmony_ci    <imm name="index" start="12" size="8"/>
964bf215546Sopenharmony_ci  </ins>
965bf215546Sopenharmony_ci
966bf215546Sopenharmony_ci  <ins name="LD_ATTR_IMM" title="Load immediate attribute" opcode="0x66" opcode2="0" unit="LS">
967bf215546Sopenharmony_ci    <desc>
968bf215546Sopenharmony_ci      Load `vecsize` components from the attribute descriptor at entry `index`
969bf215546Sopenharmony_ci      of resource table `table` at index (vertex ID, instance ID), converting
970bf215546Sopenharmony_ci      to the specified register format.
971bf215546Sopenharmony_ci    </desc>
972bf215546Sopenharmony_ci    <sr_count/>
973bf215546Sopenharmony_ci    <vecsize/>
974bf215546Sopenharmony_ci    <regfmt/>
975bf215546Sopenharmony_ci    <slot/>
976bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
977bf215546Sopenharmony_ci    <sr write="true"/>
978bf215546Sopenharmony_ci    <src>Vertex ID</src>
979bf215546Sopenharmony_ci    <src>Instance ID</src>
980bf215546Sopenharmony_ci    <imm name="index" start="20" size="4"/>
981bf215546Sopenharmony_ci    <imm name="table" start="16" size="4"/>
982bf215546Sopenharmony_ci  </ins>
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_ci  <ins name="LD_ATTR" title="Load indirect attribute" opcode="0x76" opcode2="0" unit="LS">
985bf215546Sopenharmony_ci    <desc>
986bf215546Sopenharmony_ci      Load `vecsize` components from the attribute descriptor at the specified
987bf215546Sopenharmony_ci      location at index (vertex ID, instance ID), converting
988bf215546Sopenharmony_ci      to the specified register format.
989bf215546Sopenharmony_ci
990bf215546Sopenharmony_ci      The index must not diverge within a warp.
991bf215546Sopenharmony_ci    </desc>
992bf215546Sopenharmony_ci    <sr_count/>
993bf215546Sopenharmony_ci    <vecsize/>
994bf215546Sopenharmony_ci    <regfmt/>
995bf215546Sopenharmony_ci    <slot/>
996bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
997bf215546Sopenharmony_ci    <sr write="true"/>
998bf215546Sopenharmony_ci    <src>Vertex ID</src>
999bf215546Sopenharmony_ci    <src>Instance ID</src>
1000bf215546Sopenharmony_ci    <src>Index and table</src>
1001bf215546Sopenharmony_ci  </ins>
1002bf215546Sopenharmony_ci
1003bf215546Sopenharmony_ci  <ins name="LD_TEX_IMM" title="Load immediate texture" opcode="0x66" opcode2="1" unit="LS">
1004bf215546Sopenharmony_ci    <desc>
1005bf215546Sopenharmony_ci      Load `vecsize` components from the texture descriptor at entry `index`
1006bf215546Sopenharmony_ci      of resource table `table`, converting
1007bf215546Sopenharmony_ci      to the specified register format.
1008bf215546Sopenharmony_ci    </desc>
1009bf215546Sopenharmony_ci    <sr_count/>
1010bf215546Sopenharmony_ci    <vecsize/>
1011bf215546Sopenharmony_ci    <regfmt/>
1012bf215546Sopenharmony_ci    <slot/>
1013bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
1014bf215546Sopenharmony_ci    <sr write="true"/>
1015bf215546Sopenharmony_ci    <src>X/Y coordinates (16:16)</src>
1016bf215546Sopenharmony_ci    <src>Z/W coordinates (16:16)</src>
1017bf215546Sopenharmony_ci    <imm name="index" start="20" size="4"/>
1018bf215546Sopenharmony_ci    <imm name="table" start="16" size="4"/>
1019bf215546Sopenharmony_ci  </ins>
1020bf215546Sopenharmony_ci
1021bf215546Sopenharmony_ci  <ins name="LD_TEX" title="Load indirect texture" opcode="0x76" opcode2="1" unit="LS">
1022bf215546Sopenharmony_ci    <desc>
1023bf215546Sopenharmony_ci      Load `vecsize` components from the texture descriptor at the specified
1024bf215546Sopenharmony_ci      location at index, converting
1025bf215546Sopenharmony_ci      to the specified register format.
1026bf215546Sopenharmony_ci    </desc>
1027bf215546Sopenharmony_ci    <sr_count/>
1028bf215546Sopenharmony_ci    <vecsize/>
1029bf215546Sopenharmony_ci    <regfmt/>
1030bf215546Sopenharmony_ci    <slot/>
1031bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
1032bf215546Sopenharmony_ci    <sr write="true"/>
1033bf215546Sopenharmony_ci    <src>X/Y coordinates (16:16)</src>
1034bf215546Sopenharmony_ci    <src>Z/W coordinates (16:16)</src>
1035bf215546Sopenharmony_ci    <src>Index and table</src>
1036bf215546Sopenharmony_ci  </ins>
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_ci  <ins name="LEA_ATTR_IMM" title="Load effective address of image texel" opcode="0x67" opcode2="0" unit="LS">
1039bf215546Sopenharmony_ci    <desc>
1040bf215546Sopenharmony_ci      Load the effective address of an attribute specified with the
1041bf215546Sopenharmony_ci      given immediate index. Returns three staging register: the low/high
1042bf215546Sopenharmony_ci      32-bits of the address and the internal conversion descriptor.
1043bf215546Sopenharmony_ci    </desc>
1044bf215546Sopenharmony_ci    <slot/>
1045bf215546Sopenharmony_ci    <sr_count/>
1046bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
1047bf215546Sopenharmony_ci    <sr write="true"/>
1048bf215546Sopenharmony_ci    <src>Vertex index</src>
1049bf215546Sopenharmony_ci    <src>Instance index</src>
1050bf215546Sopenharmony_ci    <imm name="table" start="16" size="4"/>
1051bf215546Sopenharmony_ci    <imm name="index" start="20" size="4"/>
1052bf215546Sopenharmony_ci  </ins>
1053bf215546Sopenharmony_ci
1054bf215546Sopenharmony_ci  <ins name="LEA_ATTR" title="Load effective address of image texel" opcode="0x77" opcode2="0" unit="LS">
1055bf215546Sopenharmony_ci    <desc>
1056bf215546Sopenharmony_ci      Load the effective address of an attribute specified with the
1057bf215546Sopenharmony_ci      given index. Returns three staging register: the low/high
1058bf215546Sopenharmony_ci      32-bits of the address and the internal conversion descriptor.
1059bf215546Sopenharmony_ci    </desc>
1060bf215546Sopenharmony_ci    <vecsize/>
1061bf215546Sopenharmony_ci    <slot/>
1062bf215546Sopenharmony_ci    <sr_count/>
1063bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
1064bf215546Sopenharmony_ci    <sr write="true"/>
1065bf215546Sopenharmony_ci    <src>Vertex index</src>
1066bf215546Sopenharmony_ci    <src>Instance index</src>
1067bf215546Sopenharmony_ci    <src>Attribute index and table</src>
1068bf215546Sopenharmony_ci  </ins>
1069bf215546Sopenharmony_ci
1070bf215546Sopenharmony_ci  <ins name="LEA_TEX_IMM" title="Load effective address of image texel" opcode="0x67" opcode2="1" unit="LS">
1071bf215546Sopenharmony_ci    <desc>
1072bf215546Sopenharmony_ci      Load the effective address of a texel from the image specified with the
1073bf215546Sopenharmony_ci      given immediate index. Returns three staging registers: the low/high
1074bf215546Sopenharmony_ci      32-bits of the address and the internal conversion descriptor. The format
1075bf215546Sopenharmony_ci      of the internal conversion descriptor is compatible with Bifrost but
1076bf215546Sopenharmony_ci      omits the register format, as this is specified with the ST_CVT
1077bf215546Sopenharmony_ci      instruction on Valhall.
1078bf215546Sopenharmony_ci
1079bf215546Sopenharmony_ci      Coordinates are specified as 16-bit integers, packed into 32-bit sources.
1080bf215546Sopenharmony_ci    </desc>
1081bf215546Sopenharmony_ci    <slot/>
1082bf215546Sopenharmony_ci    <sr_count/>
1083bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
1084bf215546Sopenharmony_ci    <sr write="true"/>
1085bf215546Sopenharmony_ci    <src>X/Y coordinates (16:16)</src>
1086bf215546Sopenharmony_ci    <src>Z/W coordinates (16:16)</src>
1087bf215546Sopenharmony_ci    <imm name="table" start="16" size="4"/>
1088bf215546Sopenharmony_ci    <imm name="index" start="20" size="4"/>
1089bf215546Sopenharmony_ci  </ins>
1090bf215546Sopenharmony_ci
1091bf215546Sopenharmony_ci  <ins name="LEA_TEX" title="Load effective address of image texel" opcode="0x77" opcode2="1" unit="LS">
1092bf215546Sopenharmony_ci    <desc>
1093bf215546Sopenharmony_ci      Load the effective address of a texel from the image specified with the
1094bf215546Sopenharmony_ci      given index. Returns three staging register: the low/high
1095bf215546Sopenharmony_ci      32-bits of the address and the internal conversion descriptor. The format
1096bf215546Sopenharmony_ci      of the internal conversion descriptor is compatible with Bifrost but
1097bf215546Sopenharmony_ci      omits the register format, as this is specified with the ST_CVT
1098bf215546Sopenharmony_ci      instruction on Valhall.
1099bf215546Sopenharmony_ci
1100bf215546Sopenharmony_ci      Coordinates are specified as 16-bit integers, packed into 32-bit sources.
1101bf215546Sopenharmony_ci    </desc>
1102bf215546Sopenharmony_ci    <vecsize/>
1103bf215546Sopenharmony_ci    <slot/>
1104bf215546Sopenharmony_ci    <sr_count/>
1105bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
1106bf215546Sopenharmony_ci    <sr write="true"/>
1107bf215546Sopenharmony_ci    <src size="16">X/Y coordinates (16:16)</src>
1108bf215546Sopenharmony_ci    <src>Z/W coordinates (16:16)</src>
1109bf215546Sopenharmony_ci    <src>Index and table</src>
1110bf215546Sopenharmony_ci  </ins>
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i8" title="Global memory load" opcode="0x6a" opcode2="0" unit="LS">
1113bf215546Sopenharmony_ci    <desc>
1114bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
1115bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
1116bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
1117bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
1118bf215546Sopenharmony_ci      the mode descriptor.
1119bf215546Sopenharmony_ci    </desc>
1120bf215546Sopenharmony_ci    <sr write="true"/>
1121bf215546Sopenharmony_ci    <sr_count/>
1122bf215546Sopenharmony_ci    <mod name="load_lane_8_bit" start="36" size="3"/>
1123bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1124bf215546Sopenharmony_ci    <slot/>
1125bf215546Sopenharmony_ci    <src size="32">Address to load from after adding offset</src>
1126bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
1127bf215546Sopenharmony_ci  </ins>
1128bf215546Sopenharmony_ci
1129bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i16" title="Global memory load" opcode="0x6a" opcode2="1" unit="LS">
1130bf215546Sopenharmony_ci    <desc>
1131bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
1132bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
1133bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
1134bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
1135bf215546Sopenharmony_ci      the mode descriptor.
1136bf215546Sopenharmony_ci    </desc>
1137bf215546Sopenharmony_ci    <sr write="true"/>
1138bf215546Sopenharmony_ci    <sr_count/>
1139bf215546Sopenharmony_ci    <mod name="load_lane_16_bit" start="36" size="3"/>
1140bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1141bf215546Sopenharmony_ci    <slot/>
1142bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
1143bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
1144bf215546Sopenharmony_ci  </ins>
1145bf215546Sopenharmony_ci
1146bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i24" title="Global memory load" opcode="0x6a" opcode2="2" unit="LS">
1147bf215546Sopenharmony_ci    <desc>
1148bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
1149bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
1150bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
1151bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
1152bf215546Sopenharmony_ci      the mode descriptor.
1153bf215546Sopenharmony_ci    </desc>
1154bf215546Sopenharmony_ci    <sr write="true"/>
1155bf215546Sopenharmony_ci    <sr_count/>
1156bf215546Sopenharmony_ci    <mod name="load_lane_24_bit" start="36" size="3"/>
1157bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1158bf215546Sopenharmony_ci    <slot/>
1159bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
1160bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
1161bf215546Sopenharmony_ci  </ins>
1162bf215546Sopenharmony_ci
1163bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i32" title="Global memory load" opcode="0x6a" opcode2="3" unit="LS">
1164bf215546Sopenharmony_ci    <desc>
1165bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
1166bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
1167bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
1168bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
1169bf215546Sopenharmony_ci      the mode descriptor.
1170bf215546Sopenharmony_ci    </desc>
1171bf215546Sopenharmony_ci    <sr write="true"/>
1172bf215546Sopenharmony_ci    <sr_count/>
1173bf215546Sopenharmony_ci    <mod name="load_lane_32_bit" start="36" size="3"/>
1174bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1175bf215546Sopenharmony_ci    <slot/>
1176bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
1177bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
1178bf215546Sopenharmony_ci  </ins>
1179bf215546Sopenharmony_ci
1180bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i48" title="Global memory load" opcode="0x6a" opcode2="4" unit="LS">
1181bf215546Sopenharmony_ci    <desc>
1182bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
1183bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
1184bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
1185bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
1186bf215546Sopenharmony_ci      the mode descriptor.
1187bf215546Sopenharmony_ci    </desc>
1188bf215546Sopenharmony_ci    <sr write="true"/>
1189bf215546Sopenharmony_ci    <sr_count/>
1190bf215546Sopenharmony_ci    <mod name="load_lane_48_bit" start="36" size="3"/>
1191bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1192bf215546Sopenharmony_ci    <slot/>
1193bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
1194bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
1195bf215546Sopenharmony_ci  </ins>
1196bf215546Sopenharmony_ci
1197bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i64" title="Global memory load" opcode="0x6a" opcode2="5" unit="LS">
1198bf215546Sopenharmony_ci    <desc>
1199bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
1200bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
1201bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
1202bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
1203bf215546Sopenharmony_ci      the mode descriptor.
1204bf215546Sopenharmony_ci    </desc>
1205bf215546Sopenharmony_ci    <sr write="true"/>
1206bf215546Sopenharmony_ci    <sr_count/>
1207bf215546Sopenharmony_ci    <mod name="load_lane_64_bit" start="36" size="3"/>
1208bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1209bf215546Sopenharmony_ci    <slot/>
1210bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
1211bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
1212bf215546Sopenharmony_ci  </ins>
1213bf215546Sopenharmony_ci
1214bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i96" title="Global memory load" opcode="0x6a" opcode2="6" unit="LS">
1215bf215546Sopenharmony_ci    <desc>
1216bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
1217bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
1218bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
1219bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
1220bf215546Sopenharmony_ci      the mode descriptor.
1221bf215546Sopenharmony_ci    </desc>
1222bf215546Sopenharmony_ci    <sr write="true"/>
1223bf215546Sopenharmony_ci    <sr_count/>
1224bf215546Sopenharmony_ci    <mod name="load_lane_96_bit" start="36" size="3"/>
1225bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1226bf215546Sopenharmony_ci    <slot/>
1227bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
1228bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
1229bf215546Sopenharmony_ci  </ins>
1230bf215546Sopenharmony_ci
1231bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i128" title="Global memory load" opcode="0x6a" opcode2="7" unit="LS">
1232bf215546Sopenharmony_ci    <desc>
1233bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
1234bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
1235bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
1236bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
1237bf215546Sopenharmony_ci      the mode descriptor.
1238bf215546Sopenharmony_ci    </desc>
1239bf215546Sopenharmony_ci    <sr write="true"/>
1240bf215546Sopenharmony_ci    <sr_count/>
1241bf215546Sopenharmony_ci    <mod name="load_lane_128_bit" start="36" size="3"/>
1242bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1243bf215546Sopenharmony_ci    <slot/>
1244bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
1245bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
1246bf215546Sopenharmony_ci  </ins>
1247bf215546Sopenharmony_ci
1248bf215546Sopenharmony_ci  <ins name="LEA_BUF_IMM" title="Load buffer effective address" opcode="0x5E" unit="LS">
1249bf215546Sopenharmony_ci    <desc>
1250bf215546Sopenharmony_ci      Load effective address of a buffer with an immediate offset added.
1251bf215546Sopenharmony_ci    </desc>
1252bf215546Sopenharmony_ci    <sr write="true"/>
1253bf215546Sopenharmony_ci    <sr_count/>
1254bf215546Sopenharmony_ci    <slot/>
1255bf215546Sopenharmony_ci    <imm name="table" start="8" size="4"/>
1256bf215546Sopenharmony_ci    <imm name="index" start="12" size="8"/>
1257bf215546Sopenharmony_ci    <src>Linear ID</src>
1258bf215546Sopenharmony_ci  </ins>
1259bf215546Sopenharmony_ci
1260bf215546Sopenharmony_ci  <ins name="LOAD.i8" title="Global memory load" opcode="0x60" opcode2="0" unit="LS">
1261bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
1262bf215546Sopenharmony_ci    <sr write="true"/>
1263bf215546Sopenharmony_ci    <memory_access/>
1264bf215546Sopenharmony_ci    <sr_count/>
1265bf215546Sopenharmony_ci    <mod name="load_lane_8_bit" start="36" size="3"/>
1266bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1267bf215546Sopenharmony_ci    <slot/>
1268bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
1269bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
1270bf215546Sopenharmony_ci  </ins>
1271bf215546Sopenharmony_ci
1272bf215546Sopenharmony_ci  <ins name="LOAD.i16" title="Global memory load" opcode="0x60" opcode2="1" unit="LS">
1273bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
1274bf215546Sopenharmony_ci    <sr write="true"/>
1275bf215546Sopenharmony_ci    <memory_access/>
1276bf215546Sopenharmony_ci    <sr_count/>
1277bf215546Sopenharmony_ci    <mod name="load_lane_16_bit" start="36" size="3"/>
1278bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1279bf215546Sopenharmony_ci    <slot/>
1280bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
1281bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
1282bf215546Sopenharmony_ci  </ins>
1283bf215546Sopenharmony_ci
1284bf215546Sopenharmony_ci  <ins name="LOAD.i24" title="Global memory load" opcode="0x60" opcode2="2" unit="LS">
1285bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
1286bf215546Sopenharmony_ci    <sr write="true"/>
1287bf215546Sopenharmony_ci    <memory_access/>
1288bf215546Sopenharmony_ci    <sr_count/>
1289bf215546Sopenharmony_ci    <mod name="load_lane_24_bit" start="36" size="3"/>
1290bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1291bf215546Sopenharmony_ci    <slot/>
1292bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
1293bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
1294bf215546Sopenharmony_ci  </ins>
1295bf215546Sopenharmony_ci
1296bf215546Sopenharmony_ci  <ins name="LOAD.i32" title="Global memory load" opcode="0x60" opcode2="3" unit="LS">
1297bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
1298bf215546Sopenharmony_ci    <sr write="true"/>
1299bf215546Sopenharmony_ci    <memory_access/>
1300bf215546Sopenharmony_ci    <sr_count/>
1301bf215546Sopenharmony_ci    <mod name="load_lane_32_bit" start="36" size="3"/>
1302bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1303bf215546Sopenharmony_ci    <slot/>
1304bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
1305bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
1306bf215546Sopenharmony_ci  </ins>
1307bf215546Sopenharmony_ci
1308bf215546Sopenharmony_ci  <ins name="LOAD.i48" title="Global memory load" opcode="0x60" opcode2="4" unit="LS">
1309bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
1310bf215546Sopenharmony_ci    <sr write="true"/>
1311bf215546Sopenharmony_ci    <memory_access/>
1312bf215546Sopenharmony_ci    <sr_count/>
1313bf215546Sopenharmony_ci    <mod name="load_lane_48_bit" start="36" size="3"/>
1314bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1315bf215546Sopenharmony_ci    <slot/>
1316bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
1317bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
1318bf215546Sopenharmony_ci  </ins>
1319bf215546Sopenharmony_ci
1320bf215546Sopenharmony_ci  <ins name="LOAD.i64" title="Global memory load" opcode="0x60" opcode2="5" unit="LS">
1321bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
1322bf215546Sopenharmony_ci    <sr write="true"/>
1323bf215546Sopenharmony_ci    <memory_access/>
1324bf215546Sopenharmony_ci    <sr_count/>
1325bf215546Sopenharmony_ci    <mod name="load_lane_64_bit" start="36" size="3"/>
1326bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1327bf215546Sopenharmony_ci    <slot/>
1328bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
1329bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
1330bf215546Sopenharmony_ci  </ins>
1331bf215546Sopenharmony_ci
1332bf215546Sopenharmony_ci  <ins name="LOAD.i96" title="Global memory load" opcode="0x60" opcode2="6" unit="LS">
1333bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
1334bf215546Sopenharmony_ci    <sr write="true"/>
1335bf215546Sopenharmony_ci    <memory_access/>
1336bf215546Sopenharmony_ci    <sr_count/>
1337bf215546Sopenharmony_ci    <mod name="load_lane_96_bit" start="36" size="3"/>
1338bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1339bf215546Sopenharmony_ci    <slot/>
1340bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
1341bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
1342bf215546Sopenharmony_ci  </ins>
1343bf215546Sopenharmony_ci
1344bf215546Sopenharmony_ci  <ins name="LOAD.i128" title="Global memory load" opcode="0x60" opcode2="7" unit="LS">
1345bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
1346bf215546Sopenharmony_ci    <sr write="true"/>
1347bf215546Sopenharmony_ci    <memory_access/>
1348bf215546Sopenharmony_ci    <sr_count/>
1349bf215546Sopenharmony_ci    <mod name="load_lane_128_bit" start="36" size="3"/>
1350bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
1351bf215546Sopenharmony_ci    <slot/>
1352bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
1353bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
1354bf215546Sopenharmony_ci  </ins>
1355bf215546Sopenharmony_ci
1356bf215546Sopenharmony_ci  <group name="STORE" title="Global memory store" opcode="0x61" unit="LS">
1357bf215546Sopenharmony_ci    <desc>Stores to main memory</desc>
1358bf215546Sopenharmony_ci    <sr read="true"/>
1359bf215546Sopenharmony_ci    <ins name="STORE.i8" opcode2="0x0"/>
1360bf215546Sopenharmony_ci    <ins name="STORE.i16" opcode2="0x1"/>
1361bf215546Sopenharmony_ci    <ins name="STORE.i24" opcode2="0x2"/>
1362bf215546Sopenharmony_ci    <ins name="STORE.i32" opcode2="0x3"/>
1363bf215546Sopenharmony_ci    <ins name="STORE.i48" opcode2="0x4"/>
1364bf215546Sopenharmony_ci    <ins name="STORE.i64" opcode2="0x5"/>
1365bf215546Sopenharmony_ci    <ins name="STORE.i96" opcode2="0x6"/>
1366bf215546Sopenharmony_ci    <ins name="STORE.i128" opcode2="0x7"/>
1367bf215546Sopenharmony_ci    <sr_count/>
1368bf215546Sopenharmony_ci    <memory_access/>
1369bf215546Sopenharmony_ci    <slot/>
1370bf215546Sopenharmony_ci    <src size="64">Address to store to after adding offset</src>
1371bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
1372bf215546Sopenharmony_ci  </group>
1373bf215546Sopenharmony_ci
1374bf215546Sopenharmony_ci  <ins name="ST_CVT" title="Store with conversion" opcode="0x71" unit="LS">
1375bf215546Sopenharmony_ci    <desc>
1376bf215546Sopenharmony_ci      Store to memory with data conversion. The address to store to is given in
1377bf215546Sopenharmony_ci      the first source, which must be a 64-bit register (a pair of 32-bit
1378bf215546Sopenharmony_ci      registers). The other source is the conversion descriptor used for the store.
1379bf215546Sopenharmony_ci
1380bf215546Sopenharmony_ci      Used with LEA_TEX_IMM to implement image stores.
1381bf215546Sopenharmony_ci    </desc>
1382bf215546Sopenharmony_ci    <slot/>
1383bf215546Sopenharmony_ci    <mod name="memory_access" start="37" size="3"/>
1384bf215546Sopenharmony_ci    <vecsize/>
1385bf215546Sopenharmony_ci    <regfmt/>
1386bf215546Sopenharmony_ci    <sr read="true"/>
1387bf215546Sopenharmony_ci    <sr_count/>
1388bf215546Sopenharmony_ci    <src size="64">64-bit address to store to</src>
1389bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
1390bf215546Sopenharmony_ci    <src>Internal conversion descriptor</src>
1391bf215546Sopenharmony_ci  </ins>
1392bf215546Sopenharmony_ci
1393bf215546Sopenharmony_ci  <ins name="LD_TILE" title="Load from tilebuffer" opcode="0x78" unit="NONE">
1394bf215546Sopenharmony_ci    <desc>
1395bf215546Sopenharmony_ci      Loads a given render target, specified in the pixel indices descriptor, at
1396bf215546Sopenharmony_ci      a given location and sample, and convert to the format specified in the
1397bf215546Sopenharmony_ci      internal conversion descriptor. Used to implement EXT_framebuffer_fetch
1398bf215546Sopenharmony_ci      and internally in blend shaders.
1399bf215546Sopenharmony_ci    </desc>
1400bf215546Sopenharmony_ci    <sr write="true"/>
1401bf215546Sopenharmony_ci    <sr_count/>
1402bf215546Sopenharmony_ci    <vecsize/>
1403bf215546Sopenharmony_ci    <regfmt/>
1404bf215546Sopenharmony_ci    <slot/>
1405bf215546Sopenharmony_ci    <src>Pixel indices descriptor</src>
1406bf215546Sopenharmony_ci    <src>Coverage mask</src>
1407bf215546Sopenharmony_ci    <src>Conversion descriptor</src>
1408bf215546Sopenharmony_ci  </ins>
1409bf215546Sopenharmony_ci
1410bf215546Sopenharmony_ci  <ins name="ST_TILE" title="Store to tilebuffer" opcode="0x79" unit="NONE">
1411bf215546Sopenharmony_ci    <desc>
1412bf215546Sopenharmony_ci      Store to given render target, specified in the pixel indices descriptor, at
1413bf215546Sopenharmony_ci      a given location and sample, and convert to the format specified in the
1414bf215546Sopenharmony_ci      internal conversion descriptor. Used internally in blend shaders.
1415bf215546Sopenharmony_ci    </desc>
1416bf215546Sopenharmony_ci    <sr read="true"/>
1417bf215546Sopenharmony_ci    <sr_count/>
1418bf215546Sopenharmony_ci    <vecsize/>
1419bf215546Sopenharmony_ci    <regfmt/>
1420bf215546Sopenharmony_ci    <slot/>
1421bf215546Sopenharmony_ci    <src>Pixel indices descriptor</src>
1422bf215546Sopenharmony_ci    <src>Coverage mask</src>
1423bf215546Sopenharmony_ci    <src>Conversion descriptor</src>
1424bf215546Sopenharmony_ci  </ins>
1425bf215546Sopenharmony_ci
1426bf215546Sopenharmony_ci  <ins name="BLEND" title="Blend render target" opcode="0x7F" unit="NONE">
1427bf215546Sopenharmony_ci    <desc>
1428bf215546Sopenharmony_ci      Blends a given render target. This loads the API-specified blend state for
1429bf215546Sopenharmony_ci      the render target from the first source. Blend descriptors are available
1430bf215546Sopenharmony_ci      as special immediates. It then reads the colour to be blended from the
1431bf215546Sopenharmony_ci      first staging register, with the specified vector size and register format
1432bf215546Sopenharmony_ci      as desired. The resulting coverage mask is stored to the second set of
1433bf215546Sopenharmony_ci      staging registers.
1434bf215546Sopenharmony_ci
1435bf215546Sopenharmony_ci      In the fixed-function path, `BLEND` sends the colour to the blender to be
1436bf215546Sopenharmony_ci      written to the tilebuffer. Then, if the instruction's flow control
1437bf215546Sopenharmony_ci      specifies termination, the fragment program is ended. If it does not
1438bf215546Sopenharmony_ci      specify termination, `BLEND` acts as a relative branch, branching with the
1439bf215546Sopenharmony_ci      offset specified as `target`. This allows the subsequent instructions to
1440bf215546Sopenharmony_ci      be skipped when fixed-function blending is used. Note this implicit branch
1441bf215546Sopenharmony_ci      can never introduce divergence, so `.reconverge` is not required.
1442bf215546Sopenharmony_ci
1443bf215546Sopenharmony_ci      In the blend shader path, `BLEND` ignores the specified flow control and
1444bf215546Sopenharmony_ci      does not branch to the specified offset. Instead, execution continues
1445bf215546Sopenharmony_ci      normally with the next instruction. The compiler should insert code for
1446bf215546Sopenharmony_ci      calling a blend shader after the `BLEND` instruction unless it is known
1447bf215546Sopenharmony_ci      that a blend shader will never be required.
1448bf215546Sopenharmony_ci
1449bf215546Sopenharmony_ci      The indirection is required to support both fixed-function and blend
1450bf215546Sopenharmony_ci      shaders efficiently and without shader variants.
1451bf215546Sopenharmony_ci    </desc>
1452bf215546Sopenharmony_ci    <sr read="true"/>
1453bf215546Sopenharmony_ci    <src size="64">Blend descriptor</src>
1454bf215546Sopenharmony_ci    <src>Sample coverage</src>
1455bf215546Sopenharmony_ci    <imm name="target" start="8" size="8"/>
1456bf215546Sopenharmony_ci    <slot/>
1457bf215546Sopenharmony_ci    <sr_count/>
1458bf215546Sopenharmony_ci    <vecsize/>
1459bf215546Sopenharmony_ci    <regfmt/>
1460bf215546Sopenharmony_ci  </ins>
1461bf215546Sopenharmony_ci
1462bf215546Sopenharmony_ci  <ins name="ATEST" title="Alpha test" opcode="0x7D" unit="NONE">
1463bf215546Sopenharmony_ci    <desc>
1464bf215546Sopenharmony_ci      Does alpha-to-coverage testing, updating the sample coverage mask. ATEST
1465bf215546Sopenharmony_ci      does not do an implicit discard. It should be executed before the first
1466bf215546Sopenharmony_ci      ZS_EMIT or BLEND instruction.
1467bf215546Sopenharmony_ci    </desc>
1468bf215546Sopenharmony_ci    <sr write="true">Updated coverage mask</sr>
1469bf215546Sopenharmony_ci    <src>Input coverage mask</src>
1470bf215546Sopenharmony_ci    <src swizzle="true">Alpha value (render target 0)</src>
1471bf215546Sopenharmony_ci    <src/>
1472bf215546Sopenharmony_ci    <sr_count/>
1473bf215546Sopenharmony_ci  </ins>
1474bf215546Sopenharmony_ci
1475bf215546Sopenharmony_ci  <ins name="ZS_EMIT" title="Depth/stencil write" opcode="0x7E" unit="NONE">
1476bf215546Sopenharmony_ci    <desc>
1477bf215546Sopenharmony_ci      Programatically writes out depth, stencil, or both, depending on which
1478bf215546Sopenharmony_ci      modifiers are set. Used to implement gl_FragDepth and gl_FragStencil.
1479bf215546Sopenharmony_ci    </desc>
1480bf215546Sopenharmony_ci    <mod name="z" start="25" size="1"/>
1481bf215546Sopenharmony_ci    <mod name="stencil" start="24" size="1"/>
1482bf215546Sopenharmony_ci    <sr write="true">Updated coverage mask</sr>
1483bf215546Sopenharmony_ci    <src>Depth value</src>
1484bf215546Sopenharmony_ci    <src>Stencil value</src>
1485bf215546Sopenharmony_ci    <src>Input coverage mask</src>
1486bf215546Sopenharmony_ci    <sr_count/>
1487bf215546Sopenharmony_ci    <slot/>
1488bf215546Sopenharmony_ci  </ins>
1489bf215546Sopenharmony_ci
1490bf215546Sopenharmony_ci  <group name="CONVERT" title="Data conversions" dests="1" opcode="0x90" unit="CVT">
1491bf215546Sopenharmony_ci    <desc>
1492bf215546Sopenharmony_ci      Performs the given data conversion. Note that floating-point rounding is
1493bf215546Sopenharmony_ci      handled via the same hardware and therefore shares an encoding. Round mode
1494bf215546Sopenharmony_ci      is specified where it makes sense.
1495bf215546Sopenharmony_ci    </desc>
1496bf215546Sopenharmony_ci
1497bf215546Sopenharmony_ci    <ins name="V2S16_TO_V2F16" opcode2="0x7"/>
1498bf215546Sopenharmony_ci
1499bf215546Sopenharmony_ci    <ins name="S32_TO_F32" opcode2="0x9"/>
1500bf215546Sopenharmony_ci
1501bf215546Sopenharmony_ci    <ins name="V2U16_TO_V2F16" opcode2="0x17"/>
1502bf215546Sopenharmony_ci
1503bf215546Sopenharmony_ci    <ins name="U32_TO_F32" opcode2="0x19"/>
1504bf215546Sopenharmony_ci
1505bf215546Sopenharmony_ci    <roundmode/>
1506bf215546Sopenharmony_ci    <src widen="true">Value to convert</src>
1507bf215546Sopenharmony_ci  </group>
1508bf215546Sopenharmony_ci
1509bf215546Sopenharmony_ci  <group name="CONVERT" title="16->32 integer data conversions" dests="1" opcode="0x90" unit="CVT">
1510bf215546Sopenharmony_ci    <desc>
1511bf215546Sopenharmony_ci      Performs the given data conversion.
1512bf215546Sopenharmony_ci    </desc>
1513bf215546Sopenharmony_ci
1514bf215546Sopenharmony_ci    <ins name="S16_TO_S32" opcode2="0x4"/>
1515bf215546Sopenharmony_ci    <ins name="S16_TO_F32" opcode2="0x5"/>
1516bf215546Sopenharmony_ci    <ins name="U16_TO_U32" opcode2="0x14"/>
1517bf215546Sopenharmony_ci    <ins name="U16_TO_F32" opcode2="0x15"/>
1518bf215546Sopenharmony_ci
1519bf215546Sopenharmony_ci    <src swizzle="true" size="16">Value to convert</src>
1520bf215546Sopenharmony_ci  </group>
1521bf215546Sopenharmony_ci
1522bf215546Sopenharmony_ci  <group name="CONVERT" title="Float-to-int data conversions" dests="1" opcode="0x90" unit="CVT">
1523bf215546Sopenharmony_ci    <desc>Performs the given data conversion.</desc>
1524bf215546Sopenharmony_ci    <ins name="F32_TO_S32" opcode2="0xC"/>
1525bf215546Sopenharmony_ci    <ins name="F32_TO_U32" opcode2="0x1C"/>
1526bf215546Sopenharmony_ci    <roundmode/>
1527bf215546Sopenharmony_ci    <src absneg="true">Value to convert</src>
1528bf215546Sopenharmony_ci  </group>
1529bf215546Sopenharmony_ci
1530bf215546Sopenharmony_ci  <group name="CONVERT" title="Float-to-int data conversions" dests="1" opcode="0x90" unit="CVT">
1531bf215546Sopenharmony_ci    <desc>Performs the given data conversion.</desc>
1532bf215546Sopenharmony_ci    <ins name="V2F16_TO_V2S16" opcode2="0xE"/>
1533bf215546Sopenharmony_ci    <ins name="V2F16_TO_V2U16" opcode2="0x1E"/>
1534bf215546Sopenharmony_ci    <ins name="F16_TO_S32" opcode2="0xA"/>
1535bf215546Sopenharmony_ci    <ins name="F16_TO_U32" opcode2="0x1A"/>
1536bf215546Sopenharmony_ci    <roundmode/>
1537bf215546Sopenharmony_ci    <src swizzle="true" absneg="true" size="16">Value to convert</src>
1538bf215546Sopenharmony_ci  </group>
1539bf215546Sopenharmony_ci
1540bf215546Sopenharmony_ci  <ins name="F16_TO_F32" title="16-bit float to 32-bit float conversion" dests="1" opcode="0x90" opcode2="0xB" unit="CVT">
1541bf215546Sopenharmony_ci    <desc>Converts up with the specified round mode.</desc>
1542bf215546Sopenharmony_ci    <roundmode/>
1543bf215546Sopenharmony_ci    <src lane="28" size="16" absneg="true">Value to convert</src>
1544bf215546Sopenharmony_ci  </ins>
1545bf215546Sopenharmony_ci
1546bf215546Sopenharmony_ci  <group name="CONVERT" title="8-bit to 32-bit data conversions" dests="1" opcode="0x90" unit="CVT">
1547bf215546Sopenharmony_ci    <desc>
1548bf215546Sopenharmony_ci      Performs the given data conversion.
1549bf215546Sopenharmony_ci    </desc>
1550bf215546Sopenharmony_ci
1551bf215546Sopenharmony_ci    <ins name="S8_TO_S32" opcode2="0x0"/>
1552bf215546Sopenharmony_ci    <ins name="S8_TO_F32" opcode2="0x1"/>
1553bf215546Sopenharmony_ci
1554bf215546Sopenharmony_ci    <ins name="U8_TO_U32" opcode2="0x10"/>
1555bf215546Sopenharmony_ci    <ins name="U8_TO_F32" opcode2="0x11"/>
1556bf215546Sopenharmony_ci
1557bf215546Sopenharmony_ci    <src lane="28" size="8">Value to convert</src>
1558bf215546Sopenharmony_ci  </group>
1559bf215546Sopenharmony_ci
1560bf215546Sopenharmony_ci  <group name="CONVERT" title="8-bit to 16-bit data conversions" dests="1" opcode="0x90" unit="CVT">
1561bf215546Sopenharmony_ci    <desc>
1562bf215546Sopenharmony_ci      Performs the given data conversion.
1563bf215546Sopenharmony_ci    </desc>
1564bf215546Sopenharmony_ci
1565bf215546Sopenharmony_ci    <ins name="V2S8_TO_V2S16" opcode2="0x2"/>
1566bf215546Sopenharmony_ci    <ins name="V2S8_TO_V2F16" opcode2="0x3"/>
1567bf215546Sopenharmony_ci
1568bf215546Sopenharmony_ci    <ins name="V2U8_TO_V2U16" opcode2="0x12"/>
1569bf215546Sopenharmony_ci    <ins name="V2U8_TO_V2F16" opcode2="0x13"/>
1570bf215546Sopenharmony_ci
1571bf215546Sopenharmony_ci    <src halfswizzle="true" size="8">Value to convert</src>
1572bf215546Sopenharmony_ci  </group>
1573bf215546Sopenharmony_ci
1574bf215546Sopenharmony_ci  <group name="FROUND" title="Floating-point rounding" dests="1" opcode="0x90" unit="CVT">
1575bf215546Sopenharmony_ci    <desc>
1576bf215546Sopenharmony_ci      Performs the given rounding, using the convert unit.
1577bf215546Sopenharmony_ci    </desc>
1578bf215546Sopenharmony_ci
1579bf215546Sopenharmony_ci    <ins name="FROUND.f32" opcode2="0xD"/>
1580bf215546Sopenharmony_ci    <ins name="FROUND.v2f16" opcode2="0xF"/>
1581bf215546Sopenharmony_ci
1582bf215546Sopenharmony_ci    <roundmode/>
1583bf215546Sopenharmony_ci    <src swizzle="true" absneg="true">Value to convert</src>
1584bf215546Sopenharmony_ci  </group>
1585bf215546Sopenharmony_ci
1586bf215546Sopenharmony_ci  <ins name="MOV.i32" title="Register move" dests="1" opcode="0x91" opcode2="0x0" unit="CVT">
1587bf215546Sopenharmony_ci    <desc>Canonical register-to-register move.</desc>
1588bf215546Sopenharmony_ci    <src/>
1589bf215546Sopenharmony_ci  </ins>
1590bf215546Sopenharmony_ci
1591bf215546Sopenharmony_ci  <ins name="CLZ.u32" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x4" unit="CVT">
1592bf215546Sopenharmony_ci    <desc>
1593bf215546Sopenharmony_ci      Used as a primitive for various bitwise operations.
1594bf215546Sopenharmony_ci    </desc>
1595bf215546Sopenharmony_ci    <src/>
1596bf215546Sopenharmony_ci  </ins>
1597bf215546Sopenharmony_ci
1598bf215546Sopenharmony_ci  <ins name="CLZ.v2u16" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x5" unit="CVT">
1599bf215546Sopenharmony_ci    <desc>
1600bf215546Sopenharmony_ci      Used as a primitive for various bitwise operations.
1601bf215546Sopenharmony_ci    </desc>
1602bf215546Sopenharmony_ci    <src/>
1603bf215546Sopenharmony_ci  </ins>
1604bf215546Sopenharmony_ci
1605bf215546Sopenharmony_ci  <ins name="CLZ.v4u8" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x6" unit="CVT">
1606bf215546Sopenharmony_ci    <desc>
1607bf215546Sopenharmony_ci      Used as a primitive for various bitwise operations.
1608bf215546Sopenharmony_ci    </desc>
1609bf215546Sopenharmony_ci    <src/>
1610bf215546Sopenharmony_ci  </ins>
1611bf215546Sopenharmony_ci
1612bf215546Sopenharmony_ci  <ins name="IABS.s32" title="Absolute value" dests="1" opcode="0x91" opcode2="0x8" unit="CVT">
1613bf215546Sopenharmony_ci    <desc>
1614bf215546Sopenharmony_ci      64-bit abs may be constructed in 4 instructions (5 clocks) by checking the
1615bf215546Sopenharmony_ci      sign with `ICMP.s32.lt.m1 hi, 0` and negating based on the result with
1616bf215546Sopenharmony_ci      `IADD.s64` and `LSHIFT_XOR.i32` on each half.
1617bf215546Sopenharmony_ci    </desc>
1618bf215546Sopenharmony_ci    <src widen="true"/>
1619bf215546Sopenharmony_ci  </ins>
1620bf215546Sopenharmony_ci
1621bf215546Sopenharmony_ci  <ins name="IABS.v2s16" title="Absolute value" dests="1" opcode="0x91" opcode2="0x9" unit="CVT">
1622bf215546Sopenharmony_ci    <src widen="true"/>
1623bf215546Sopenharmony_ci  </ins>
1624bf215546Sopenharmony_ci
1625bf215546Sopenharmony_ci  <ins name="IABS.v4s8" title="Absolute value" dests="1" opcode="0x91" opcode2="0xa" unit="CVT">
1626bf215546Sopenharmony_ci    <src/>
1627bf215546Sopenharmony_ci  </ins>
1628bf215546Sopenharmony_ci
1629bf215546Sopenharmony_ci  <ins name="POPCOUNT.i32" title="Population count" dests="1" opcode="0x91" opcode2="0xC" unit="SFU">
1630bf215546Sopenharmony_ci    <desc>
1631bf215546Sopenharmony_ci      Only available as 32-bit. Smaller bitsizes require explicit conversions.
1632bf215546Sopenharmony_ci      64-bit popcount may be constructed in 3 clocks by separate 32-bit
1633bf215546Sopenharmony_ci      popcounts of each half and a 32-bit add, which is guaranteed not to
1634bf215546Sopenharmony_ci      overflow.
1635bf215546Sopenharmony_ci    </desc>
1636bf215546Sopenharmony_ci    <src/>
1637bf215546Sopenharmony_ci  </ins>
1638bf215546Sopenharmony_ci
1639bf215546Sopenharmony_ci  <ins name="BITREV.i32" title="Bitwise reverse" dests="1" opcode="0x91" opcode2="0xD" unit="SFU">
1640bf215546Sopenharmony_ci    <desc>
1641bf215546Sopenharmony_ci      Only available as 32-bit. Other bitsizes may be derived with swizzles.
1642bf215546Sopenharmony_ci    </desc>
1643bf215546Sopenharmony_ci    <src/>
1644bf215546Sopenharmony_ci  </ins>
1645bf215546Sopenharmony_ci
1646bf215546Sopenharmony_ci  <ins name="NOT_OLD.i32" title="Bitwise complement" dests="1" opcode="0x91" opcode2="0xE" unit="SFU">
1647bf215546Sopenharmony_ci    <desc>
1648bf215546Sopenharmony_ci      For fully featured bitwise operation, see the shift opcodes.
1649bf215546Sopenharmony_ci    </desc>
1650bf215546Sopenharmony_ci    <src/>
1651bf215546Sopenharmony_ci  </ins>
1652bf215546Sopenharmony_ci
1653bf215546Sopenharmony_ci  <ins name="NOT_OLD.i64" title="Bitwise complement" dests="1" opcode="0x191" opcode2="0xE" unit="SFU">
1654bf215546Sopenharmony_ci    <desc>
1655bf215546Sopenharmony_ci      For fully featured bitwise operation, see the shift opcodes.
1656bf215546Sopenharmony_ci    </desc>
1657bf215546Sopenharmony_ci    <src/>
1658bf215546Sopenharmony_ci  </ins>
1659bf215546Sopenharmony_ci
1660bf215546Sopenharmony_ci  <ins name="WMASK" title="Warp mask" dests="1" opcode="0x95" unit="CVT">
1661bf215546Sopenharmony_ci    <desc>
1662bf215546Sopenharmony_ci      Returns the mask of lanes ever active within the warp (subgroup), such
1663bf215546Sopenharmony_ci      that the source is nonzero. The number of work-items in a subgroup is
1664bf215546Sopenharmony_ci      given as the popcount of this value with a nonzero input.
1665bf215546Sopenharmony_ci
1666bf215546Sopenharmony_ci      An `all()` subgroup operation may be constructed as `WMASK` of the input
1667bf215546Sopenharmony_ci      compared for equality with `WMASK` of an nonzero value.
1668bf215546Sopenharmony_ci
1669bf215546Sopenharmony_ci      An `any()` subgroup operation may be constructed as `WMASK` of the input
1670bf215546Sopenharmony_ci      compared against zero.
1671bf215546Sopenharmony_ci    </desc>
1672bf215546Sopenharmony_ci    <src/>
1673bf215546Sopenharmony_ci    <subgroup/>
1674bf215546Sopenharmony_ci  </ins>
1675bf215546Sopenharmony_ci
1676bf215546Sopenharmony_ci  <group name="FREXP" title="Fraction/exponent extract" dests="1" opcode="0x99" unit="CVT">
1677bf215546Sopenharmony_ci    <ins name="FREXPM.f32" opcode2="0"/>
1678bf215546Sopenharmony_ci    <ins name="FREXPM.v2f16" opcode2="1"/>
1679bf215546Sopenharmony_ci    <ins name="FREXPE.f32" opcode2="2"/>
1680bf215546Sopenharmony_ci    <ins name="FREXPE.v2f16" opcode2="3"/>
1681bf215546Sopenharmony_ci    <desc>
1682bf215546Sopenharmony_ci      Breaks up the floating-point input into its fractional (mantissa) and
1683bf215546Sopenharmony_ci      exponent parts. By default, this is compatible with the `frexp()` function
1684bf215546Sopenharmony_ci      in APIs. With the log/sqrt modifiers, the floating point format is
1685bf215546Sopenharmony_ci      adjusted to be compatible with Valhall's argument reduction for logarithm
1686bf215546Sopenharmony_ci      and square root computation respectively.
1687bf215546Sopenharmony_ci    </desc>
1688bf215546Sopenharmony_ci    <mod name="sqrt" start="24" size="1"/>
1689bf215546Sopenharmony_ci    <mod name="log" start="25" size="1"/>
1690bf215546Sopenharmony_ci    <src float="true" swizzle="true"/>
1691bf215546Sopenharmony_ci  </group>
1692bf215546Sopenharmony_ci
1693bf215546Sopenharmony_ci  <group name="SFU" title="Special function unit" dests="1" opcode="0x9C" unit="SFU">
1694bf215546Sopenharmony_ci    <ins name="FRCP.f32" opcode2="0"/>
1695bf215546Sopenharmony_ci    <ins name="FRCP.f16" opcode2="1"/>
1696bf215546Sopenharmony_ci    <ins name="FRSQ.f32" opcode2="2"/>
1697bf215546Sopenharmony_ci    <ins name="FRSQ.f16" opcode2="3"/>
1698bf215546Sopenharmony_ci    <ins name="FLOGD.f32" opcode2="8"/>
1699bf215546Sopenharmony_ci    <ins name="FPCLASS.f32" opcode2="10"/>
1700bf215546Sopenharmony_ci    <ins name="FPCLASS.f16" opcode2="11"/>
1701bf215546Sopenharmony_ci    <ins name="FLOG_TABLE.f32" opcode2="12"/>
1702bf215546Sopenharmony_ci    <ins name="FRCP_APPROX.f32" opcode2="14"/>
1703bf215546Sopenharmony_ci    <ins name="FRSQ_APPROX.f32" opcode2="15"/>
1704bf215546Sopenharmony_ci    <desc>
1705bf215546Sopenharmony_ci      Performs a given special function. The floating-point reciprocal (`FRCP`)
1706bf215546Sopenharmony_ci      and reciprocal square root (`FRSQ`) instructions may be freely used as-is.
1707bf215546Sopenharmony_ci      The logarithm instruction (`FLOGD.f32`) requires an argument
1708bf215546Sopenharmony_ci      reduction. See the transcendentals section for more information. Like the
1709bf215546Sopenharmony_ci      Bifrost op, `FRSQ_APPROX.f32` does an implicit `FREXPM.f32.sqrt` on the
1710bf215546Sopenharmony_ci      source.
1711bf215546Sopenharmony_ci    </desc>
1712bf215546Sopenharmony_ci    <src float="true" swizzle="true" absneg="true"/>
1713bf215546Sopenharmony_ci  </group>
1714bf215546Sopenharmony_ci
1715bf215546Sopenharmony_ci  <group name="SFU" title="Special function unit" dests="1" opcode="0x9C" unit="SFU">
1716bf215546Sopenharmony_ci    <ins name="FSIN_TABLE.u6" opcode2="4"/>
1717bf215546Sopenharmony_ci    <ins name="FCOS_TABLE.u6" opcode2="5"/>
1718bf215546Sopenharmony_ci    <ins name="FSINCOS_OFFSET.u6" opcode2="6"/>
1719bf215546Sopenharmony_ci    <ins name="FEXP_TABLE.u4" opcode2="13"/>
1720bf215546Sopenharmony_ci    <desc>
1721bf215546Sopenharmony_ci      Performs a given special function. The trigonometric tables
1722bf215546Sopenharmony_ci      (`FSIN_TABLE.u6` and `FCOS_TABLE.u6`) are crude, requiring both an
1723bf215546Sopenharmony_ci      argument reduction and postprocessing.
1724bf215546Sopenharmony_ci    </desc>
1725bf215546Sopenharmony_ci    <src/>
1726bf215546Sopenharmony_ci  </group>
1727bf215546Sopenharmony_ci
1728bf215546Sopenharmony_ci  <group name="FADD" title="Floating-point add" dests="1" opcode2="0" unit="FMA">
1729bf215546Sopenharmony_ci    <ins name="FADD.f32" opcode="0xA4"/>
1730bf215546Sopenharmony_ci    <ins name="FADD.v2f16" opcode="0xA5"/>
1731bf215546Sopenharmony_ci    <desc>$A + B$</desc>
1732bf215546Sopenharmony_ci    <clamp/>
1733bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
1734bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">B</src>
1735bf215546Sopenharmony_ci  </group>
1736bf215546Sopenharmony_ci
1737bf215546Sopenharmony_ci  <group name="FMIN" title="Floating-point minimum" dests="1" opcode2="2" unit="CVT">
1738bf215546Sopenharmony_ci    <ins name="FMIN.f32" opcode="0xA4"/>
1739bf215546Sopenharmony_ci    <ins name="FMIN.v2f16" opcode="0xA5"/>
1740bf215546Sopenharmony_ci    <desc>$\min \{ A, B \}$</desc>
1741bf215546Sopenharmony_ci    <clamp/>
1742bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
1743bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">B</src>
1744bf215546Sopenharmony_ci  </group>
1745bf215546Sopenharmony_ci
1746bf215546Sopenharmony_ci  <group name="FMAX" title="Floating-point maximum" dests="1" opcode2="3" unit="CVT">
1747bf215546Sopenharmony_ci    <ins name="FMAX.f32" opcode="0xA4"/>
1748bf215546Sopenharmony_ci    <ins name="FMAX.v2f16" opcode="0xA5"/>
1749bf215546Sopenharmony_ci    <desc>$\max \{ A, B \}$</desc>
1750bf215546Sopenharmony_ci    <clamp/>
1751bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
1752bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">B</src>
1753bf215546Sopenharmony_ci  </group>
1754bf215546Sopenharmony_ci
1755bf215546Sopenharmony_ci  <group name="V2F32_TO_V2F16" title="Vectorized floating-point conversion" dests="1" opcode2="4" unit="CVT">
1756bf215546Sopenharmony_ci    <ins name="V2F32_TO_V2F16" opcode="0xA5"/>
1757bf215546Sopenharmony_ci    <desc>
1758bf215546Sopenharmony_ci      Given a pair of 32-bit floats, output a pair of 16-bit floats packed into
1759bf215546Sopenharmony_ci      a 32-bit destination.
1760bf215546Sopenharmony_ci    </desc>
1761bf215546Sopenharmony_ci    <clamp/>
1762bf215546Sopenharmony_ci    <roundmode/>
1763bf215546Sopenharmony_ci    <src absneg="true">A</src>
1764bf215546Sopenharmony_ci    <src absneg="true">B</src>
1765bf215546Sopenharmony_ci  </group>
1766bf215546Sopenharmony_ci
1767bf215546Sopenharmony_ci  <group name="LDEXP" title="Floating-point rescaling" dests="1" opcode2="6" unit="FMA">
1768bf215546Sopenharmony_ci    <ins name="LDEXP.f32" opcode="0xA4"/>
1769bf215546Sopenharmony_ci    <ins name="LDEXP.v2f16" opcode="0xA5"/>
1770bf215546Sopenharmony_ci    <desc>
1771bf215546Sopenharmony_ci      Computes $A \cdot 2^B$ by adding B to the exponent of A. Used to calculate
1772bf215546Sopenharmony_ci      various special functions, particularly base-2 exponents. Special case
1773bf215546Sopenharmony_ci      handling differs from an actual floating-point multiply, so this should
1774bf215546Sopenharmony_ci      not be used outside fixed instruction sequences.
1775bf215546Sopenharmony_ci    </desc>
1776bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
1777bf215546Sopenharmony_ci    <src/>
1778bf215546Sopenharmony_ci    <roundmode/> <!-- Also has rtna -->
1779bf215546Sopenharmony_ci    <!-- Also has infinity handling for arctan -->
1780bf215546Sopenharmony_ci  </group>
1781bf215546Sopenharmony_ci
1782bf215546Sopenharmony_ci  <ins name="FEXP.f32" title="Floating-point exponent" dests="1" opcode="0xA4" opcode2="8" unit="SFU">
1783bf215546Sopenharmony_ci    <desc>
1784bf215546Sopenharmony_ci      Calculates the base-2 exponent of an argument specified as a 8:24
1785bf215546Sopenharmony_ci      fixed-point. The original argument is passed as well for correct handling
1786bf215546Sopenharmony_ci      of special cases.
1787bf215546Sopenharmony_ci    </desc>
1788bf215546Sopenharmony_ci    <clamp/>
1789bf215546Sopenharmony_ci    <src>Input as 8:24 fixed-point</src>
1790bf215546Sopenharmony_ci    <src absneg="true">Input as 32-bit float</src>
1791bf215546Sopenharmony_ci  </ins>
1792bf215546Sopenharmony_ci
1793bf215546Sopenharmony_ci  <ins name="FADD_LSCALE.f32" title="Floating-point add with logarithm scale" dests="1" opcode="0xA4" opcode2="9" unit="FMA">
1794bf215546Sopenharmony_ci    <desc>
1795bf215546Sopenharmony_ci      Performs a floating-point addition specialized for logarithm computation.
1796bf215546Sopenharmony_ci    </desc>
1797bf215546Sopenharmony_ci    <clamp/>
1798bf215546Sopenharmony_ci    <src absneg="true">A</src>
1799bf215546Sopenharmony_ci    <src absneg="true">B</src>
1800bf215546Sopenharmony_ci  </ins>
1801bf215546Sopenharmony_ci
1802bf215546Sopenharmony_ci  <ins name="FATAN_ASSIST.f32" title="ATAN calculation helper" dests="1" opcode="0xA4" opcode2="14" unit="SFU">
1803bf215546Sopenharmony_ci    <desc>
1804bf215546Sopenharmony_ci      Used for `atan2()` implementation. Destination is two 16-bit
1805bf215546Sopenharmony_ci      values (int and float) for the first form, and a single 32-bit float when
1806bf215546Sopenharmony_ci      `.second` is set (indicating the FATAN_TABLE.f32 instruction).
1807bf215546Sopenharmony_ci    </desc>
1808bf215546Sopenharmony_ci    <mod name="second" start="24" size="1"/>
1809bf215546Sopenharmony_ci    <src>A</src>
1810bf215546Sopenharmony_ci    <src>B</src>
1811bf215546Sopenharmony_ci  </ins>
1812bf215546Sopenharmony_ci
1813bf215546Sopenharmony_ci  <group name="IADD" title="Integer addition" dests="1" opcode2="0" unit="CVT">
1814bf215546Sopenharmony_ci    <desc>
1815bf215546Sopenharmony_ci      $A + B$ with optional saturation.
1816bf215546Sopenharmony_ci
1817bf215546Sopenharmony_ci      As Valhall lacks swizzle instructions, `IADD.v2i16` with zero is the
1818bf215546Sopenharmony_ci      canonical lowering for swizzles.
1819bf215546Sopenharmony_ci    </desc>
1820bf215546Sopenharmony_ci    <ins name="IADD.u32" opcode="0xA0"/>
1821bf215546Sopenharmony_ci    <ins name="IADD.v2u16" opcode="0xA1"/>
1822bf215546Sopenharmony_ci    <ins name="IADD.v4u8" opcode="0xA2"/>
1823bf215546Sopenharmony_ci    <ins name="IADD.s32" opcode="0xA8"/>
1824bf215546Sopenharmony_ci    <ins name="IADD.v2s16" opcode="0xA9"/>
1825bf215546Sopenharmony_ci    <ins name="IADD.v4s8" opcode="0x1A2"/>
1826bf215546Sopenharmony_ci    <ins name="IADD.u64" opcode="0x1A3"/>
1827bf215546Sopenharmony_ci    <ins name="IADD.s64" opcode="0x1AB"/>
1828bf215546Sopenharmony_ci    <!-- <ins name="IADD.s32" opcode="0x1A0"/> -->
1829bf215546Sopenharmony_ci    <src widen="true">A</src>
1830bf215546Sopenharmony_ci    <src widen="true">B</src>
1831bf215546Sopenharmony_ci    <saturate/>
1832bf215546Sopenharmony_ci  </group>
1833bf215546Sopenharmony_ci
1834bf215546Sopenharmony_ci  <ins name="MKVEC.v2i16" title="Make 16-bit vector" dests="1" opcode="0xA1" opcode2="0x5" unit="CVT">
1835bf215546Sopenharmony_ci    <desc>Calculates $A | (B \ll 16)$. Used to implement `(ushort2)(A, B)`</desc>
1836bf215546Sopenharmony_ci    <src swizzle="true">A</src>
1837bf215546Sopenharmony_ci    <src swizzle="true">B</src>
1838bf215546Sopenharmony_ci  </ins>
1839bf215546Sopenharmony_ci
1840bf215546Sopenharmony_ci  <group name="ISUB" title="Integer subtract" dests="1" opcode2="1" unit="CVT">
1841bf215546Sopenharmony_ci    <ins name="ISUB.u32" opcode="0xA0"/>
1842bf215546Sopenharmony_ci    <ins name="ISUB.v2u16" opcode="0xA1"/>
1843bf215546Sopenharmony_ci    <ins name="ISUB.v4u8" opcode="0xA2"/>
1844bf215546Sopenharmony_ci    <ins name="ISUB.s32" opcode="0xA8"/>
1845bf215546Sopenharmony_ci    <ins name="ISUB.v2s16" opcode="0xA9"/>
1846bf215546Sopenharmony_ci    <ins name="ISUB.v4s8" opcode="0x1A2"/>
1847bf215546Sopenharmony_ci    <ins name="ISUB.u64" opcode="0x1A3"/>
1848bf215546Sopenharmony_ci    <ins name="ISUB.s64" opcode="0x1AB"/>
1849bf215546Sopenharmony_ci    <desc>$A - B$ with optional saturation</desc>
1850bf215546Sopenharmony_ci    <src widen="true">A</src>
1851bf215546Sopenharmony_ci    <src widen="true">B</src>
1852bf215546Sopenharmony_ci    <saturate/>
1853bf215546Sopenharmony_ci  </group>
1854bf215546Sopenharmony_ci
1855bf215546Sopenharmony_ci  <group name="SEG_ADD" title="Segment addition" dests="1" opcode2="6" unit="CVT">
1856bf215546Sopenharmony_ci    <desc>
1857bf215546Sopenharmony_ci      Similar to SHADDX, but especially used for loading offsets into
1858bf215546Sopenharmony_ci      WLS. Usually this is only required for atomic operations, which cannot
1859bf215546Sopenharmony_ci      directly use wls_pointer as an address.
1860bf215546Sopenharmony_ci
1861bf215546Sopenharmony_ci      .neg indicates SEG_SUB instead.
1862bf215546Sopenharmony_ci    </desc>
1863bf215546Sopenharmony_ci    <ins name="SEG_ADD.u64" opcode="0x1A3"/>
1864bf215546Sopenharmony_ci    <mod name="neg" start="38" size="1"/>
1865bf215546Sopenharmony_ci    <mod name="preserve_null" start="39" size="1"/>
1866bf215546Sopenharmony_ci    <src>A</src>
1867bf215546Sopenharmony_ci    <src widen="true">B</src>
1868bf215546Sopenharmony_ci  </group>
1869bf215546Sopenharmony_ci
1870bf215546Sopenharmony_ci  <group name="SHADDX" title="Shift, extend, and 64-bit add" dests="1" opcode2="7" unit="CVT">
1871bf215546Sopenharmony_ci    <desc>
1872bf215546Sopenharmony_ci      Sign or zero extend B to 64-bits, left-shift by `shift`, and add the
1873bf215546Sopenharmony_ci      64-bit value A. These instructions accelerate address arithmetic, but may
1874bf215546Sopenharmony_ci      be used in full generality for 64-bit integer arithmetic.
1875bf215546Sopenharmony_ci    </desc>
1876bf215546Sopenharmony_ci    <ins name="SHADDX.u64" opcode="0x1A3"/>
1877bf215546Sopenharmony_ci    <ins name="SHADDX.s64" opcode="0x1AB"/>
1878bf215546Sopenharmony_ci    <imm name="shift" start="20" size="3"/>
1879bf215546Sopenharmony_ci    <src>A</src>
1880bf215546Sopenharmony_ci    <src widen="true">B</src>
1881bf215546Sopenharmony_ci  </group>
1882bf215546Sopenharmony_ci
1883bf215546Sopenharmony_ci  <group name="IMUL" title="Integer multiply" dests="1" opcode2="0x0A" unit="SFU">
1884bf215546Sopenharmony_ci    <ins name="IMUL.i32" opcode="0xA0"/>
1885bf215546Sopenharmony_ci    <ins name="IMUL.v2i16" opcode="0xA1"/>
1886bf215546Sopenharmony_ci    <ins name="IMUL.v4i8" opcode="0xA2"/>
1887bf215546Sopenharmony_ci    <ins name="IMUL.s32" opcode="0xA8"/>
1888bf215546Sopenharmony_ci    <ins name="IMUL.v2s16" opcode="0xA9"/>
1889bf215546Sopenharmony_ci    <ins name="IMUL.v4s8" opcode="0x1A2"/>
1890bf215546Sopenharmony_ci    <ins name="IMULD.u64" opcode="0x1A3"/>
1891bf215546Sopenharmony_ci    <!-- <ins name="IMUL.s32" opcode="0x1A0"/> -->
1892bf215546Sopenharmony_ci    <desc>
1893bf215546Sopenharmony_ci      $A \cdot B$ with optional saturation. Note the multipliers can only handle up to
1894bf215546Sopenharmony_ci      32-bit by 32-bit multiplies. The 64-bit "multiply" acts like IMUL.u32 but
1895bf215546Sopenharmony_ci      additionally writes the high half of the product to the high half of the
1896bf215546Sopenharmony_ci      64-bit destination. Along with IADD.u32 and IADD.u64, this allows the
1897bf215546Sopenharmony_ci      construction of a 64-bit multiply in 5 instructions (6 clocks).
1898bf215546Sopenharmony_ci    </desc>
1899bf215546Sopenharmony_ci    <src widen="true">A</src>
1900bf215546Sopenharmony_ci    <src widen="true">B</src>
1901bf215546Sopenharmony_ci    <saturate/>
1902bf215546Sopenharmony_ci  </group>
1903bf215546Sopenharmony_ci
1904bf215546Sopenharmony_ci  <group name="HADD" title="Integer half-add" dests="1" opcode2="0x0B" unit="CVT">
1905bf215546Sopenharmony_ci    <ins name="HADD.u32" opcode="0xA0"/>
1906bf215546Sopenharmony_ci    <ins name="HADD.v2u16" opcode="0xA1"/>
1907bf215546Sopenharmony_ci    <ins name="HADD.v4u8" opcode="0xA2"/>
1908bf215546Sopenharmony_ci    <ins name="HADD.s32" opcode="0xA8"/>
1909bf215546Sopenharmony_ci    <ins name="HADD.v2s16" opcode="0xA9"/>
1910bf215546Sopenharmony_ci    <ins name="HADD.v4s8" opcode="0x1A2"/>
1911bf215546Sopenharmony_ci    <mod name="rhadd" start="30" size="1"/>
1912bf215546Sopenharmony_ci    <src widen="true">A</src>
1913bf215546Sopenharmony_ci    <src widen="true">B</src>
1914bf215546Sopenharmony_ci    <desc>
1915bf215546Sopenharmony_ci      $(A + B) \gg 1$ without intermediate overflow, corresponding to `hadd()` in
1916bf215546Sopenharmony_ci      OpenCL. With the `.rhadd` modifier set, it instead calculates
1917bf215546Sopenharmony_ci      $(A + B + 1) \gg 1$ corresponding to `rhadd()` in OpenCL.
1918bf215546Sopenharmony_ci    </desc>
1919bf215546Sopenharmony_ci  </group>
1920bf215546Sopenharmony_ci
1921bf215546Sopenharmony_ci  <group name="CLPER" title="Cross-lane permute" dests="1" opcode2="0xF" unit="SFU">
1922bf215546Sopenharmony_ci    <ins name="CLPER.i32" opcode="0xA0"/>
1923bf215546Sopenharmony_ci    <ins name="CLPER.v2u16" opcode="0xA1"/>
1924bf215546Sopenharmony_ci    <ins name="CLPER.v4u8" opcode="0xA2"/>
1925bf215546Sopenharmony_ci    <ins name="CLPER.s32" opcode="0xA8"/>
1926bf215546Sopenharmony_ci    <ins name="CLPER.v2s16" opcode="0xA9"/>
1927bf215546Sopenharmony_ci    <ins name="CLPER.v4s8" opcode="0x1A2"/>
1928bf215546Sopenharmony_ci    <ins name="CLPER.u64" opcode="0x1A3"/>
1929bf215546Sopenharmony_ci    <ins name="CLPER.s64" opcode="0x1AB"/>
1930bf215546Sopenharmony_ci    <!-- <ins name="CLPER.s32" opcode="0x1A0"/> -->
1931bf215546Sopenharmony_ci    <desc>
1932bf215546Sopenharmony_ci      Selects the value of A in the subgroup lane given by B. This implements
1933bf215546Sopenharmony_ci      subgroup broadcasts. It may be used as a primitive for screen space
1934bf215546Sopenharmony_ci      derivatives in fragment shaders.
1935bf215546Sopenharmony_ci    </desc>
1936bf215546Sopenharmony_ci    <src>A</src>
1937bf215546Sopenharmony_ci    <src widen="true">B</src>
1938bf215546Sopenharmony_ci    <subgroup/>
1939bf215546Sopenharmony_ci    <lane_op/>
1940bf215546Sopenharmony_ci    <inactive_result/>
1941bf215546Sopenharmony_ci  </group>
1942bf215546Sopenharmony_ci
1943bf215546Sopenharmony_ci  <group name="FMA" title="Fused floating-point multiply add" dests="1" unit="FMA">
1944bf215546Sopenharmony_ci    <ins name="FMA.f32" opcode="0xB2"/>
1945bf215546Sopenharmony_ci    <ins name="FMA.v2f16" opcode="0xB3"/>
1946bf215546Sopenharmony_ci    <desc>$A \cdot B + C$</desc>
1947bf215546Sopenharmony_ci    <clamp/>
1948bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
1949bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">B</src>
1950bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">C</src>
1951bf215546Sopenharmony_ci  </group>
1952bf215546Sopenharmony_ci
1953bf215546Sopenharmony_ci  <group name="LSHIFT_AND" title="Left shift and bitwise AND" dests="1" opcode2="0x100" unit="SFU">
1954bf215546Sopenharmony_ci    <ins name="LSHIFT_AND.i32" opcode="0xB4"/>
1955bf215546Sopenharmony_ci    <ins name="LSHIFT_AND.v2i16" opcode="0xB5"/>
1956bf215546Sopenharmony_ci    <ins name="LSHIFT_AND.v4i8" opcode="0xB6"/>
1957bf215546Sopenharmony_ci    <ins name="LSHIFT_AND.i64" opcode="0x1B7"/>
1958bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
1959bf215546Sopenharmony_ci    <desc>
1960bf215546Sopenharmony_ci      Left shifts its first source by a specified amount and bitwise ANDs it with the
1961bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result.
1962bf215546Sopenharmony_ci    </desc>
1963bf215546Sopenharmony_ci    <not_result/>
1964bf215546Sopenharmony_ci    <src widen="true">A</src>
1965bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
1966bf215546Sopenharmony_ci    <src not="true">B</src>
1967bf215546Sopenharmony_ci  </group>
1968bf215546Sopenharmony_ci
1969bf215546Sopenharmony_ci  <group name="RSHIFT_AND" title="Right shift and bitwise AND" dests="1" opcode2="0x000" unit="SFU">
1970bf215546Sopenharmony_ci    <ins name="RSHIFT_AND.i32" opcode="0xB4"/>
1971bf215546Sopenharmony_ci    <ins name="RSHIFT_AND.v2i16" opcode="0xB5"/>
1972bf215546Sopenharmony_ci    <ins name="RSHIFT_AND.v4i8" opcode="0xB6"/>
1973bf215546Sopenharmony_ci    <ins name="RSHIFT_AND.i64" opcode="0x1B7"/>
1974bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
1975bf215546Sopenharmony_ci    <desc>
1976bf215546Sopenharmony_ci      Right shifts its first source by a specified amount and bitwise ANDs it with the
1977bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result. If
1978bf215546Sopenharmony_ci      `signed` is set, the hardware performs an arithmetic right shift; otherwise,
1979bf215546Sopenharmony_ci      it performs an unsigned right shift.
1980bf215546Sopenharmony_ci    </desc>
1981bf215546Sopenharmony_ci    <mod name="signed" start="34" size="1"/>
1982bf215546Sopenharmony_ci    <not_result/>
1983bf215546Sopenharmony_ci    <src widen="true">A</src>
1984bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
1985bf215546Sopenharmony_ci    <src not="true">B</src>
1986bf215546Sopenharmony_ci  </group>
1987bf215546Sopenharmony_ci
1988bf215546Sopenharmony_ci  <group name="LSHIFT_OR" title="Left shift and bitwise OR" dests="1" opcode2="0x101" unit="SFU">
1989bf215546Sopenharmony_ci    <ins name="LSHIFT_OR.i32" opcode="0xB4"/>
1990bf215546Sopenharmony_ci    <ins name="LSHIFT_OR.v2i16" opcode="0xB5"/>
1991bf215546Sopenharmony_ci    <ins name="LSHIFT_OR.v4i8" opcode="0xB6"/>
1992bf215546Sopenharmony_ci    <ins name="LSHIFT_OR.i64" opcode="0x1B7"/>
1993bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
1994bf215546Sopenharmony_ci    <desc>
1995bf215546Sopenharmony_ci      Left shifts its first source by a specified amount and bitwise ORs it with the
1996bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result.
1997bf215546Sopenharmony_ci    </desc>
1998bf215546Sopenharmony_ci    <not_result/>
1999bf215546Sopenharmony_ci    <src widen="true">A</src>
2000bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
2001bf215546Sopenharmony_ci    <src not="true">B</src>
2002bf215546Sopenharmony_ci  </group>
2003bf215546Sopenharmony_ci
2004bf215546Sopenharmony_ci  <group name="RSHIFT_OR" title="Right shift and bitwise OR" dests="1" opcode2="0x001" unit="SFU">
2005bf215546Sopenharmony_ci    <ins name="RSHIFT_OR.i32" opcode="0xB4"/>
2006bf215546Sopenharmony_ci    <ins name="RSHIFT_OR.v2i16" opcode="0xB5"/>
2007bf215546Sopenharmony_ci    <ins name="RSHIFT_OR.v4i8" opcode="0xB6"/>
2008bf215546Sopenharmony_ci    <ins name="RSHIFT_OR.i64" opcode="0x1B7"/>
2009bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
2010bf215546Sopenharmony_ci    <desc>
2011bf215546Sopenharmony_ci      Right shifts its first source by a specified amount and bitwise ORs it with the
2012bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result. If
2013bf215546Sopenharmony_ci      `signed` is set, the hardware performs an arithmetic right shift; otherwise,
2014bf215546Sopenharmony_ci      it performs an unsigned right shift.
2015bf215546Sopenharmony_ci   </desc>
2016bf215546Sopenharmony_ci    <mod name="signed" start="34" size="1"/>
2017bf215546Sopenharmony_ci    <not_result/>
2018bf215546Sopenharmony_ci    <src widen="true">A</src>
2019bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
2020bf215546Sopenharmony_ci    <src not="true">B</src>
2021bf215546Sopenharmony_ci  </group>
2022bf215546Sopenharmony_ci
2023bf215546Sopenharmony_ci  <group name="LSHIFT_XOR" title="Left shift and bitwise XOR" dests="1" opcode2="0x102" unit="SFU">
2024bf215546Sopenharmony_ci    <ins name="LSHIFT_XOR.i32" opcode="0xB4"/>
2025bf215546Sopenharmony_ci    <ins name="LSHIFT_XOR.v2i16" opcode="0xB5"/>
2026bf215546Sopenharmony_ci    <ins name="LSHIFT_XOR.v4i8" opcode="0xB6"/>
2027bf215546Sopenharmony_ci    <ins name="LSHIFT_XOR.i64" opcode="0x1B7"/>
2028bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
2029bf215546Sopenharmony_ci    <desc>
2030bf215546Sopenharmony_ci      Left shifts its first source by a specified amount and bitwise XORs it with the
2031bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result.
2032bf215546Sopenharmony_ci    </desc>
2033bf215546Sopenharmony_ci    <not_result/>
2034bf215546Sopenharmony_ci    <src widen="true">A</src>
2035bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
2036bf215546Sopenharmony_ci    <src not="true">B</src>
2037bf215546Sopenharmony_ci  </group>
2038bf215546Sopenharmony_ci
2039bf215546Sopenharmony_ci  <group name="RSHIFT_XOR" title="Right shift and bitwise XOR" dests="1" opcode2="0x002" unit="SFU">
2040bf215546Sopenharmony_ci    <ins name="RSHIFT_XOR.i32" opcode="0xB4"/>
2041bf215546Sopenharmony_ci    <ins name="RSHIFT_XOR.v2i16" opcode="0xB5"/>
2042bf215546Sopenharmony_ci    <ins name="RSHIFT_XOR.v4i8" opcode="0xB6"/>
2043bf215546Sopenharmony_ci    <ins name="RSHIFT_XOR.i64" opcode="0x1B7"/>
2044bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
2045bf215546Sopenharmony_ci    <desc>
2046bf215546Sopenharmony_ci      Right shifts its first source by a specified amount and bitwise XORs it with the
2047bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result. If
2048bf215546Sopenharmony_ci      `signed` is set, the hardware performs an arithmetic right shift; otherwise,
2049bf215546Sopenharmony_ci      it performs an unsigned right shift.
2050bf215546Sopenharmony_ci    </desc>
2051bf215546Sopenharmony_ci    <mod name="signed" start="34" size="1"/>
2052bf215546Sopenharmony_ci    <not_result/>
2053bf215546Sopenharmony_ci    <src widen="true">A</src>
2054bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
2055bf215546Sopenharmony_ci    <src not="true">B</src>
2056bf215546Sopenharmony_ci  </group>
2057bf215546Sopenharmony_ci
2058bf215546Sopenharmony_ci  <ins name="MUX.i32" title="Mux" dests="1" opcode="0xB8" unit="SFU">
2059bf215546Sopenharmony_ci    <desc>
2060bf215546Sopenharmony_ci      Mux between A and B based on the provided mask. The condition specified
2061bf215546Sopenharmony_ci      as the `mux` modifier is evaluated on the mask. If true, `A` is chosen,
2062bf215546Sopenharmony_ci      else `B` is chosen. The `bit` modifier acts bitwise, equivalent to
2063bf215546Sopenharmony_ci      `bitselect()` in OpenCL, so `MUX.i32.bit A, B, mask` calculates
2064bf215546Sopenharmony_ci      `(A &amp; mask) | (A &amp; ~mask)`.
2065bf215546Sopenharmony_ci    </desc>
2066bf215546Sopenharmony_ci    <mod name="mux" start="32" size="2"/>
2067bf215546Sopenharmony_ci    <src>A</src>
2068bf215546Sopenharmony_ci    <src>B</src>
2069bf215546Sopenharmony_ci    <src>Mask</src>
2070bf215546Sopenharmony_ci  </ins>
2071bf215546Sopenharmony_ci
2072bf215546Sopenharmony_ci  <ins name="MUX.v2i16" title="Mux" dests="1" opcode="0xB9" unit="SFU">
2073bf215546Sopenharmony_ci    <desc>
2074bf215546Sopenharmony_ci      Mux between A and B based on the provided mask. The condition specified
2075bf215546Sopenharmony_ci      as the `mux` modifier is evaluated on the mask. If true, `A` is chosen,
2076bf215546Sopenharmony_ci      else `B` is chosen. The `bit` modifier acts bitwise, equivalent to
2077bf215546Sopenharmony_ci      `bitselect()` in OpenCL, so `MUX.i32.bit A, B, mask` calculates
2078bf215546Sopenharmony_ci      `(A &amp; mask) | (A &amp; ~mask)`.
2079bf215546Sopenharmony_ci    </desc>
2080bf215546Sopenharmony_ci    <mod name="mux" start="32" size="2"/>
2081bf215546Sopenharmony_ci    <src swizzle="true">A</src>
2082bf215546Sopenharmony_ci    <src swizzle="true">B</src>
2083bf215546Sopenharmony_ci    <src swizzle="true">Mask</src>
2084bf215546Sopenharmony_ci  </ins>
2085bf215546Sopenharmony_ci
2086bf215546Sopenharmony_ci  <ins name="MUX.v4i8" title="Mux" dests="1" opcode="0xBA" unit="SFU">
2087bf215546Sopenharmony_ci    <desc>
2088bf215546Sopenharmony_ci      Mux between A and B based on the provided mask. The condition specified
2089bf215546Sopenharmony_ci      as the `mux` modifier is evaluated on the mask. If true, `A` is chosen,
2090bf215546Sopenharmony_ci      else `B` is chosen. The `bit` modifier acts bitwise, equivalent to
2091bf215546Sopenharmony_ci      `bitselect()` in OpenCL, so `MUX.i32.bit A, B, mask` calculates
2092bf215546Sopenharmony_ci      `(A &amp; mask) | (A &amp; ~mask)`.
2093bf215546Sopenharmony_ci    </desc>
2094bf215546Sopenharmony_ci    <mod name="mux" start="32" size="2"/>
2095bf215546Sopenharmony_ci    <src>A</src>
2096bf215546Sopenharmony_ci    <src>B</src>
2097bf215546Sopenharmony_ci    <src>Mask</src>
2098bf215546Sopenharmony_ci  </ins>
2099bf215546Sopenharmony_ci
2100bf215546Sopenharmony_ci  <ins name="CUBE_SSEL" title="Cube S-coordinate select" dests="1" opcode="0xBC" opcode2="0" unit="SFU">
2101bf215546Sopenharmony_ci    <desc>During a cube map transform, select the S coordinate given a selected face.</desc>
2102bf215546Sopenharmony_ci    <src absneg="true">Z coordinate as 32-bit floating point</src>
2103bf215546Sopenharmony_ci    <src absneg="true">X coordinate as 32-bit floating point</src>
2104bf215546Sopenharmony_ci    <src>Cube face index</src>
2105bf215546Sopenharmony_ci  </ins>
2106bf215546Sopenharmony_ci
2107bf215546Sopenharmony_ci  <ins name="CUBE_TSEL" title="Cube T-coordinate select" dests="1" opcode="0xBC" opcode2="1" unit="SFU">
2108bf215546Sopenharmony_ci    <desc>During a cube map transform, select the T coordinate given a selected face.</desc>
2109bf215546Sopenharmony_ci    <src absneg="true">Y coordinate as 32-bit floating point</src>
2110bf215546Sopenharmony_ci    <src absneg="true">Z coordinate as 32-bit floating point</src>
2111bf215546Sopenharmony_ci    <src>Cube face index</src>
2112bf215546Sopenharmony_ci  </ins>
2113bf215546Sopenharmony_ci
2114bf215546Sopenharmony_ci  <ins name="MKVEC.v2i8" title="Make 8-bit vector" dests="1" opcode="0xBD" unit="CVT">
2115bf215546Sopenharmony_ci    <desc>
2116bf215546Sopenharmony_ci      Calculates $A | (B \ll 8) | (CD \ll 16)$ for 8-bit A and B and 16-bit CD.
2117bf215546Sopenharmony_ci
2118bf215546Sopenharmony_ci      To implement `(uchar4) (A, B, C, D)` in full generality, use the sequence
2119bf215546Sopenharmony_ci      `MKVEC.v2i8 CD, C, D, #0; MKVEC.v2i8 out, A, B, CD`
2120bf215546Sopenharmony_ci
2121bf215546Sopenharmony_ci      `MKVEC.v2i8` also allows zero extending arbitrary 8-bit lanes. For
2122bf215546Sopenharmony_ci      example, to extend `r0.b3` to `r1`, use `MKVEC.v2i8 r1, r0.b3, 0x0.b0, 0x0`.
2123bf215546Sopenharmony_ci    </desc>
2124bf215546Sopenharmony_ci    <src lane="true">A</src>
2125bf215546Sopenharmony_ci    <src lane="true">B</src>
2126bf215546Sopenharmony_ci    <src>CD</src>
2127bf215546Sopenharmony_ci  </ins>
2128bf215546Sopenharmony_ci
2129bf215546Sopenharmony_ci  <ins name="CUBEFACE1" title="Cube map transform step 1" dests="1" opcode="0xC0" unit="SFU">
2130bf215546Sopenharmony_ci    <desc>Select the maximum absolute value of its arguments.</desc>
2131bf215546Sopenharmony_ci    <src absneg="true">X coordinate as 32-bit floating point</src>
2132bf215546Sopenharmony_ci    <src absneg="true">Y coordinate as 32-bit floating point</src>
2133bf215546Sopenharmony_ci    <src absneg="true">Z coordinate as 32-bit floating point</src>
2134bf215546Sopenharmony_ci  </ins>
2135bf215546Sopenharmony_ci
2136bf215546Sopenharmony_ci  <ins name="CUBEFACE2" title="Cube map transform step 2" dests="1" opcode="0xC1" unit="SFU">
2137bf215546Sopenharmony_ci    <desc>Select the cube face index corresponding to the arguments.</desc>
2138bf215546Sopenharmony_ci    <src absneg="true">X coordinate as 32-bit floating point</src>
2139bf215546Sopenharmony_ci    <src absneg="true">Y coordinate as 32-bit floating point</src>
2140bf215546Sopenharmony_ci    <src absneg="true">Z coordinate as 32-bit floating point</src>
2141bf215546Sopenharmony_ci  </ins>
2142bf215546Sopenharmony_ci
2143bf215546Sopenharmony_ci  <group name="IDP" title="8-bit dot product" dests="1" opcode="0xC2" unit="FMA">
2144bf215546Sopenharmony_ci    <desc>
2145bf215546Sopenharmony_ci      8-bit integer dot product between 4 channel vectors, intended for machine
2146bf215546Sopenharmony_ci      learning. Available in both unsigned and signed variants, controlling
2147bf215546Sopenharmony_ci      sign-extension/zero-extension behaviour to the final 32-bit destination.
2148bf215546Sopenharmony_ci      Saturation is available. Corresponds to the `cl_arm_integer_dot_product_*`
2149bf215546Sopenharmony_ci      family of OpenCL extensions. Not for actual use, just for completeness.
2150bf215546Sopenharmony_ci      Instead, use your platform's neural accelerator.
2151bf215546Sopenharmony_ci
2152bf215546Sopenharmony_ci      For $A, B \in \{ 0, \ldots, 255 \}^4$ and $\text{Accumulator} \in
2153bf215546Sopenharmony_ci      \mathbb{Z}$, calculates $(A \cdot B) + \text{Accumulator}$ and optionally
2154bf215546Sopenharmony_ci      saturates.
2155bf215546Sopenharmony_ci    </desc>
2156bf215546Sopenharmony_ci    <ins name="IDP.v4s8" opcode2="0"/>
2157bf215546Sopenharmony_ci    <ins name="IDP.v4u8" opcode2="1"/>
2158bf215546Sopenharmony_ci    <src>A</src>
2159bf215546Sopenharmony_ci    <src>B</src>
2160bf215546Sopenharmony_ci    <src>Accumulator</src>
2161bf215546Sopenharmony_ci    <saturate/>
2162bf215546Sopenharmony_ci  </group>
2163bf215546Sopenharmony_ci
2164bf215546Sopenharmony_ci  <group name="ICMP" title="Unsigned integer compare" dests="1" unit="CVT">
2165bf215546Sopenharmony_ci    <desc>
2166bf215546Sopenharmony_ci      Evaluates the given condition, do a logical and/or with the condition in
2167bf215546Sopenharmony_ci      the result source, and return in the given result type (integer
2168bf215546Sopenharmony_ci      one, integer minus one, or floating-point one). The third source is useful
2169bf215546Sopenharmony_ci      for chaining together conditions without intermediate bitwise arithmetic;
2170bf215546Sopenharmony_ci      when this is not desired, tie it to zero and use the OR combine mode (do
2171bf215546Sopenharmony_ci      not set the `.and` modifier).
2172bf215546Sopenharmony_ci
2173bf215546Sopenharmony_ci      The sequence modifier `.seq` is used to construct 64-bit compares in 2
2174bf215546Sopenharmony_ci      `ICMP.u32` instructions, in conjunction with the `u1` result type on the
2175bf215546Sopenharmony_ci      low half, the `m1` result type on the high half, and the result of the low
2176bf215546Sopenharmony_ci      half comparison passed as the third source. For comparisons other than
2177bf215546Sopenharmony_ci      64-bit, do not set the `.seq` modifier and do not use the `u1` result
2178bf215546Sopenharmony_ci      type.
2179bf215546Sopenharmony_ci    </desc>
2180bf215546Sopenharmony_ci    <ins name="ICMP.u32" opcode="0xF0"/>
2181bf215546Sopenharmony_ci    <ins name="ICMP.v2u16" opcode="0xF1"/>
2182bf215546Sopenharmony_ci    <ins name="ICMP.v4u8" opcode="0xF2"/>
2183bf215546Sopenharmony_ci    <cmp/>
2184bf215546Sopenharmony_ci    <result_type/>
2185bf215546Sopenharmony_ci    <mod name="and" start="24" size="1"/>
2186bf215546Sopenharmony_ci    <mod name="seq" start="25" size="1"/>
2187bf215546Sopenharmony_ci    <src widen="true">A</src>
2188bf215546Sopenharmony_ci    <src widen="true">B</src>
2189bf215546Sopenharmony_ci    <src>C</src>
2190bf215546Sopenharmony_ci  </group>
2191bf215546Sopenharmony_ci
2192bf215546Sopenharmony_ci  <group name="FCMP" title="Floating-point compare" dests="1" unit="CVT">
2193bf215546Sopenharmony_ci    <desc>
2194bf215546Sopenharmony_ci      Evaluates the given condition, do a logical and/or with the condition in
2195bf215546Sopenharmony_ci      the result source, and return in the given result type (integer
2196bf215546Sopenharmony_ci      one, integer minus one, or floating-point one). The third source is useful
2197bf215546Sopenharmony_ci      for chaining together conditions without intermediate bitwise arithmetic;
2198bf215546Sopenharmony_ci      when this is not desired, tie it to zero and use the OR combine mode (do
2199bf215546Sopenharmony_ci      not set the `.and` modifier).
2200bf215546Sopenharmony_ci    </desc>
2201bf215546Sopenharmony_ci    <ins name="FCMP.f32" opcode="0xF4"/>
2202bf215546Sopenharmony_ci    <ins name="FCMP.v2f16" opcode="0xF5"/>
2203bf215546Sopenharmony_ci    <cmp/>
2204bf215546Sopenharmony_ci    <result_type/>
2205bf215546Sopenharmony_ci    <mod name="and" start="24" size="1"/>
2206bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
2207bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">B</src>
2208bf215546Sopenharmony_ci    <src>C</src>
2209bf215546Sopenharmony_ci  </group>
2210bf215546Sopenharmony_ci
2211bf215546Sopenharmony_ci  <group name="ICMP" title="Signed integer compare" dests="1" unit="CVT">
2212bf215546Sopenharmony_ci    <desc>
2213bf215546Sopenharmony_ci      Evaluates the given condition, do a logical and/or with the condition in
2214bf215546Sopenharmony_ci      the result source, and return in the given result type (integer
2215bf215546Sopenharmony_ci      one, integer minus one, or floating-point one). The third source is useful
2216bf215546Sopenharmony_ci      for chaining together conditions without intermediate bitwise arithmetic;
2217bf215546Sopenharmony_ci      when this is not desired, tie it to zero and use the OR combine mode (do
2218bf215546Sopenharmony_ci      not set the `.and` modifier).
2219bf215546Sopenharmony_ci
2220bf215546Sopenharmony_ci      The sequence modifier `.seq` is used to construct signed 64-bit compares
2221bf215546Sopenharmony_ci      in 1 `ICMP.u32` and 1 `ICMP.s32` instruction, in conjunction with the `u1`
2222bf215546Sopenharmony_ci      result type on the low half, the `m1` result type on the high half, and
2223bf215546Sopenharmony_ci      the result of the low half comparison passed as the third source. For
2224bf215546Sopenharmony_ci      comparisons other than 64-bit, do not set the `.seq` modifier and do not
2225bf215546Sopenharmony_ci      use the `u1` result type.
2226bf215546Sopenharmony_ci    </desc>
2227bf215546Sopenharmony_ci    <ins name="ICMP.s32" opcode="0xF8"/>
2228bf215546Sopenharmony_ci    <ins name="ICMP.v2s16" opcode="0xF9"/>
2229bf215546Sopenharmony_ci    <ins name="ICMP.v4s8" opcode="0xFA"/>
2230bf215546Sopenharmony_ci    <cmp/>
2231bf215546Sopenharmony_ci    <result_type/>
2232bf215546Sopenharmony_ci    <mod name="and" start="24" size="1"/>
2233bf215546Sopenharmony_ci    <mod name="seq" start="25" size="1"/>
2234bf215546Sopenharmony_ci    <src widen="true">A</src>
2235bf215546Sopenharmony_ci    <src widen="true">B</src>
2236bf215546Sopenharmony_ci    <src>C</src>
2237bf215546Sopenharmony_ci  </group>
2238bf215546Sopenharmony_ci
2239bf215546Sopenharmony_ci  <ins name="IADD_IMM.i32" title="Integer addition with immediate" dests="1" opcode="0x110" unit="CVT">
2240bf215546Sopenharmony_ci    <desc>
2241bf215546Sopenharmony_ci      Adds an arbitrary 32-bit immediate embedded within the instruction stream.
2242bf215546Sopenharmony_ci      If no modifiers are required, this is preferred to `IADD.i32` with a
2243bf215546Sopenharmony_ci      constant accessed as a uniform. However, if the constant is available
2244bf215546Sopenharmony_ci      inline, `IADD.f32` is preferred.
2245bf215546Sopenharmony_ci
2246bf215546Sopenharmony_ci      `IADD_IMM.i32` with the source tied to zero is the canonical immediate move.
2247bf215546Sopenharmony_ci    </desc>
2248bf215546Sopenharmony_ci    <src>A</src>
2249bf215546Sopenharmony_ci    <imm name="constant" start="8" size="32"/>
2250bf215546Sopenharmony_ci  </ins>
2251bf215546Sopenharmony_ci
2252bf215546Sopenharmony_ci  <ins name="IADD_IMM.v2i16" title="Integer addition with immediate" dests="1" opcode="0x111" unit="CVT">
2253bf215546Sopenharmony_ci    <desc>
2254bf215546Sopenharmony_ci      Adds an arbitrary pair of 16-bit immediates embedded within the
2255bf215546Sopenharmony_ci      instruction stream. If no modifiers are required, this is preferred to
2256bf215546Sopenharmony_ci      `IADD.v2i16` with a constant accessed as a uniform. However, if the
2257bf215546Sopenharmony_ci      constant is available inline, `IADD.v2i16` is preferred. Adding only a
2258bf215546Sopenharmony_ci      single 16-bit constant requires replication of the constant.
2259bf215546Sopenharmony_ci    </desc>
2260bf215546Sopenharmony_ci    <src>A</src>
2261bf215546Sopenharmony_ci    <imm name="constant" start="8" size="32"/>
2262bf215546Sopenharmony_ci  </ins>
2263bf215546Sopenharmony_ci
2264bf215546Sopenharmony_ci  <ins name="IADD_IMM.v4i8" title="Integer addition with immediate" dests="1" opcode="0x112" unit="CVT">
2265bf215546Sopenharmony_ci    <desc>
2266bf215546Sopenharmony_ci      Adds an arbitrary quad of 8-bit immediates embedded within the
2267bf215546Sopenharmony_ci      instruction stream. If no modifiers are required, this is preferred to
2268bf215546Sopenharmony_ci      `IADD.v4i8` with a constant accessed as a uniform. However, if the
2269bf215546Sopenharmony_ci      constant is available inline, `IADD.v4i8` is preferred. Adding only a
2270bf215546Sopenharmony_ci      single 8-bit constant requires replication of the constant.
2271bf215546Sopenharmony_ci    </desc>
2272bf215546Sopenharmony_ci    <src>A</src>
2273bf215546Sopenharmony_ci    <imm name="constant" start="8" size="32"/>
2274bf215546Sopenharmony_ci  </ins>
2275bf215546Sopenharmony_ci
2276bf215546Sopenharmony_ci  <ins name="FADD_IMM.f32" title="Floating-point addition with immediate" dests="1" opcode="0x114" unit="FMA">
2277bf215546Sopenharmony_ci    <desc>
2278bf215546Sopenharmony_ci      Adds an arbitrary 32-bit immediate embedded within the instruction stream.
2279bf215546Sopenharmony_ci      If no modifiers are required, this is preferred to `FADD.f32` with a
2280bf215546Sopenharmony_ci      constant accessed as a uniform. However, if the constant is available
2281bf215546Sopenharmony_ci      inline, `FADD.f32` is preferred.
2282bf215546Sopenharmony_ci    </desc>
2283bf215546Sopenharmony_ci    <src>A</src>
2284bf215546Sopenharmony_ci    <imm name="constant" start="8" size="32"/>
2285bf215546Sopenharmony_ci  </ins>
2286bf215546Sopenharmony_ci
2287bf215546Sopenharmony_ci  <ins name="FADD_IMM.v2f16" title="Floating-point addition with immediate" dests="1" opcode="0x115" unit="FMA">
2288bf215546Sopenharmony_ci    <desc>
2289bf215546Sopenharmony_ci      Adds an arbitrary pair of 16-bit immediates embedded within the
2290bf215546Sopenharmony_ci      instruction stream. If no modifiers are required, this is preferred to
2291bf215546Sopenharmony_ci      `FADD.v2f16` with a constant accessed as a uniform. However, if the
2292bf215546Sopenharmony_ci      constant is available inline, `FADD.v2f16` is preferred. Adding only a
2293bf215546Sopenharmony_ci      single 16-bit constant requires replication of the constant.
2294bf215546Sopenharmony_ci    </desc>
2295bf215546Sopenharmony_ci    <src float="true">A</src>
2296bf215546Sopenharmony_ci    <imm name="constant" start="8" size="32"/>
2297bf215546Sopenharmony_ci  </ins>
2298bf215546Sopenharmony_ci
2299bf215546Sopenharmony_ci  <ins name="ATOM1_RETURN.i32" title="Atomic operations on memory with 1" opcode="0x69" opcode2="3" unit="LS">
2300bf215546Sopenharmony_ci    <slot/>
2301bf215546Sopenharmony_ci    <sr_count/>
2302bf215546Sopenharmony_ci    <atom_opc_1/>
2303bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
2304bf215546Sopenharmony_ci
2305bf215546Sopenharmony_ci    <!-- Optional for ATOM1.i32, in which sr_count must be 0 -->
2306bf215546Sopenharmony_ci    <sr write="true"/>
2307bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
2308bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
2309bf215546Sopenharmony_ci  </ins>
2310bf215546Sopenharmony_ci
2311bf215546Sopenharmony_ci  <ins name="ATOM1_RETURN.i64" title="Atomic operations on memory with 1" opcode="0x69" opcode2="5" unit="LS">
2312bf215546Sopenharmony_ci    <slot/>
2313bf215546Sopenharmony_ci    <sr_count/>
2314bf215546Sopenharmony_ci    <atom_opc_1/>
2315bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
2316bf215546Sopenharmony_ci
2317bf215546Sopenharmony_ci    <!-- Optional for ATOM1.i64, in which sr_count must be 0 -->
2318bf215546Sopenharmony_ci    <sr write="true"/>
2319bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
2320bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
2321bf215546Sopenharmony_ci  </ins>
2322bf215546Sopenharmony_ci
2323bf215546Sopenharmony_ci  <ins name="ATOM.i32" title="Atomic operations on memory" opcode="0x68" opcode2="3" unit="LS">
2324bf215546Sopenharmony_ci    <slot/>
2325bf215546Sopenharmony_ci    <sr_count/>
2326bf215546Sopenharmony_ci    <atom_opc/>
2327bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
2328bf215546Sopenharmony_ci
2329bf215546Sopenharmony_ci    <sr read="true"/>
2330bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
2331bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
2332bf215546Sopenharmony_ci  </ins>
2333bf215546Sopenharmony_ci
2334bf215546Sopenharmony_ci  <ins name="ATOM.i64" title="Atomic operations on memory" opcode="0x68" opcode2="5" unit="LS">
2335bf215546Sopenharmony_ci    <slot/>
2336bf215546Sopenharmony_ci    <sr_count/>
2337bf215546Sopenharmony_ci    <atom_opc/>
2338bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
2339bf215546Sopenharmony_ci
2340bf215546Sopenharmony_ci    <sr read="true"/>
2341bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
2342bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
2343bf215546Sopenharmony_ci  </ins>
2344bf215546Sopenharmony_ci
2345bf215546Sopenharmony_ci  <ins name="ATOM_RETURN.i32" title="Atomic operations on memory" opcode="0x120" opcode2="3" unit="LS">
2346bf215546Sopenharmony_ci    <slot/>
2347bf215546Sopenharmony_ci    <sr_count/>
2348bf215546Sopenharmony_ci    <sr_write_count/>
2349bf215546Sopenharmony_ci
2350bf215546Sopenharmony_ci    <!-- Only valid with .xchg to implement ACMPXCHG -->
2351bf215546Sopenharmony_ci    <mod name="compare" start="26" size="1"/>
2352bf215546Sopenharmony_ci
2353bf215546Sopenharmony_ci    <atom_opc/>
2354bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
2355bf215546Sopenharmony_ci
2356bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
2357bf215546Sopenharmony_ci    <sr read="true" flags="rw"/>
2358bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
2359bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
2360bf215546Sopenharmony_ci  </ins>
2361bf215546Sopenharmony_ci
2362bf215546Sopenharmony_ci  <ins name="ATOM_RETURN.i64" title="Atomic operations on memory" opcode="0x120" opcode2="5" unit="LS">
2363bf215546Sopenharmony_ci    <slot/>
2364bf215546Sopenharmony_ci    <sr_count/>
2365bf215546Sopenharmony_ci    <sr_write_count/>
2366bf215546Sopenharmony_ci    <mod name="compare" start="26" size="1"/>
2367bf215546Sopenharmony_ci    <atom_opc/>
2368bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
2369bf215546Sopenharmony_ci
2370bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
2371bf215546Sopenharmony_ci    <sr read="true" flags="rw"/>
2372bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
2373bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
2374bf215546Sopenharmony_ci  </ins>
2375bf215546Sopenharmony_ci
2376bf215546Sopenharmony_ci  <ins name="TEX_FETCH" title="Texel fetch" opcode="0x125" unit="T">
2377bf215546Sopenharmony_ci    <desc>Unfiltered textured instruction.</desc>
2378bf215546Sopenharmony_ci    <slot/>
2379bf215546Sopenharmony_ci    <skip/>
2380bf215546Sopenharmony_ci    <register_type/>
2381bf215546Sopenharmony_ci    <register_width/>
2382bf215546Sopenharmony_ci    <write_mask/>
2383bf215546Sopenharmony_ci    <dimension/>
2384bf215546Sopenharmony_ci    <wide_indices/>
2385bf215546Sopenharmony_ci    <array_enable/>
2386bf215546Sopenharmony_ci    <texel_offset/>
2387bf215546Sopenharmony_ci
2388bf215546Sopenharmony_ci    <!-- Leave secondary_register_width as 0 -->
2389bf215546Sopenharmony_ci    <sr_count/>
2390bf215546Sopenharmony_ci    <sr_write_count/>
2391bf215546Sopenharmony_ci
2392bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
2393bf215546Sopenharmony_ci    <sr read="true" flags="false"/>
2394bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2395bf215546Sopenharmony_ci  </ins>
2396bf215546Sopenharmony_ci
2397bf215546Sopenharmony_ci  <ins name="TEX_SINGLE" title="Texture load" opcode="0x128" unit="T">
2398bf215546Sopenharmony_ci    <desc>Ordinary texturing instruction using a sampler.</desc>
2399bf215546Sopenharmony_ci    <slot/>
2400bf215546Sopenharmony_ci    <skip/>
2401bf215546Sopenharmony_ci    <register_type/>
2402bf215546Sopenharmony_ci    <register_width/>
2403bf215546Sopenharmony_ci    <write_mask/>
2404bf215546Sopenharmony_ci    <dimension/>
2405bf215546Sopenharmony_ci    <wide_indices/>
2406bf215546Sopenharmony_ci    <array_enable/>
2407bf215546Sopenharmony_ci    <texel_offset/>
2408bf215546Sopenharmony_ci    <shadow/>
2409bf215546Sopenharmony_ci    <lod_mode/>
2410bf215546Sopenharmony_ci
2411bf215546Sopenharmony_ci    <!-- Leave secondary_register_width as 0 -->
2412bf215546Sopenharmony_ci    <sr_count/>
2413bf215546Sopenharmony_ci    <sr_write_count/>
2414bf215546Sopenharmony_ci
2415bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
2416bf215546Sopenharmony_ci    <sr read="true" flags="false"/>
2417bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2418bf215546Sopenharmony_ci  </ins>
2419bf215546Sopenharmony_ci
2420bf215546Sopenharmony_ci  <ins name="TEX_GATHER" title="Texel gather" opcode="0x129" unit="T">
2421bf215546Sopenharmony_ci    <desc>Texture gather instruction.</desc>
2422bf215546Sopenharmony_ci    <slot/>
2423bf215546Sopenharmony_ci    <skip/>
2424bf215546Sopenharmony_ci    <register_type/>
2425bf215546Sopenharmony_ci    <register_width/>
2426bf215546Sopenharmony_ci    <write_mask/>
2427bf215546Sopenharmony_ci    <dimension/>
2428bf215546Sopenharmony_ci    <wide_indices/>
2429bf215546Sopenharmony_ci    <array_enable/>
2430bf215546Sopenharmony_ci    <texel_offset/>
2431bf215546Sopenharmony_ci    <integer_coordinates/>
2432bf215546Sopenharmony_ci    <fetch_component/>
2433bf215546Sopenharmony_ci    <shadow/>
2434bf215546Sopenharmony_ci
2435bf215546Sopenharmony_ci    <!-- Leave secondary_register_width as 0 -->
2436bf215546Sopenharmony_ci    <sr_count/>
2437bf215546Sopenharmony_ci    <sr_write_count/>
2438bf215546Sopenharmony_ci
2439bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
2440bf215546Sopenharmony_ci    <sr read="true" flags="false"/>
2441bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2442bf215546Sopenharmony_ci  </ins>
2443bf215546Sopenharmony_ci
2444bf215546Sopenharmony_ci  <ins name="TEX_DUAL" title="Dual texture" opcode="0x12F" unit="T">
2445bf215546Sopenharmony_ci    <desc>Pair of texture instructions.</desc>
2446bf215546Sopenharmony_ci    <slot/>
2447bf215546Sopenharmony_ci    <skip/>
2448bf215546Sopenharmony_ci    <register_type/>
2449bf215546Sopenharmony_ci    <register_width/>
2450bf215546Sopenharmony_ci    <secondary_register_width/>
2451bf215546Sopenharmony_ci    <write_mask/>
2452bf215546Sopenharmony_ci    <dimension/>
2453bf215546Sopenharmony_ci    <wide_indices/>
2454bf215546Sopenharmony_ci    <array_enable/>
2455bf215546Sopenharmony_ci    <texel_offset/>
2456bf215546Sopenharmony_ci    <shadow/>
2457bf215546Sopenharmony_ci    <lod_mode/>
2458bf215546Sopenharmony_ci
2459bf215546Sopenharmony_ci    <sr_count/>
2460bf215546Sopenharmony_ci    <sr_write_count/>
2461bf215546Sopenharmony_ci
2462bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
2463bf215546Sopenharmony_ci    <sr read="true" flags="false"/>
2464bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2465bf215546Sopenharmony_ci  </ins>
2466bf215546Sopenharmony_ci
2467bf215546Sopenharmony_ci  <ins name="VAR_TEX_BUF_SINGLE" title="Fused varying-texturing" opcode="0x130" unit="VT">
2468bf215546Sopenharmony_ci    <desc>
2469bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
2470bf215546Sopenharmony_ci      to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units.
2471bf215546Sopenharmony_ci    </desc>
2472bf215546Sopenharmony_ci    <slot/>
2473bf215546Sopenharmony_ci    <skip/>
2474bf215546Sopenharmony_ci    <sample_and_update/>
2475bf215546Sopenharmony_ci    <register_type/>
2476bf215546Sopenharmony_ci    <vartex_register_width/>
2477bf215546Sopenharmony_ci    <dimension/>
2478bf215546Sopenharmony_ci    <array_enable/>
2479bf215546Sopenharmony_ci    <shadow/>
2480bf215546Sopenharmony_ci    <lod_mode/>
2481bf215546Sopenharmony_ci
2482bf215546Sopenharmony_ci    <sr_write_count/>
2483bf215546Sopenharmony_ci
2484bf215546Sopenharmony_ci    <sr write="true"/>
2485bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2486bf215546Sopenharmony_ci    <src>Varying offset</src>
2487bf215546Sopenharmony_ci  </ins>
2488bf215546Sopenharmony_ci
2489bf215546Sopenharmony_ci  <ins name="VAR_TEX_BUF_GATHER" title="Fused varying-texturing" opcode="0x131" unit="VT">
2490bf215546Sopenharmony_ci    <desc>
2491bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
2492bf215546Sopenharmony_ci      to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units.
2493bf215546Sopenharmony_ci    </desc>
2494bf215546Sopenharmony_ci    <slot/>
2495bf215546Sopenharmony_ci    <skip/>
2496bf215546Sopenharmony_ci    <sample_and_update/>
2497bf215546Sopenharmony_ci    <register_type/>
2498bf215546Sopenharmony_ci    <vartex_register_width/>
2499bf215546Sopenharmony_ci    <dimension/>
2500bf215546Sopenharmony_ci    <array_enable/>
2501bf215546Sopenharmony_ci    <integer_coordinates/>
2502bf215546Sopenharmony_ci    <fetch_component/>
2503bf215546Sopenharmony_ci    <shadow/>
2504bf215546Sopenharmony_ci
2505bf215546Sopenharmony_ci    <sr_write_count/>
2506bf215546Sopenharmony_ci
2507bf215546Sopenharmony_ci    <sr write="true"/>
2508bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2509bf215546Sopenharmony_ci    <src>Varying offset</src>
2510bf215546Sopenharmony_ci  </ins>
2511bf215546Sopenharmony_ci
2512bf215546Sopenharmony_ci  <ins name="VAR_TEX_BUF_GRADIENT" title="Fused varying-texturing" opcode="0x132" unit="VT">
2513bf215546Sopenharmony_ci    <desc>
2514bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
2515bf215546Sopenharmony_ci      to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units.
2516bf215546Sopenharmony_ci    </desc>
2517bf215546Sopenharmony_ci    <slot/>
2518bf215546Sopenharmony_ci    <skip/>
2519bf215546Sopenharmony_ci    <sample_and_update/>
2520bf215546Sopenharmony_ci    <register_type/>
2521bf215546Sopenharmony_ci    <vartex_register_width/>
2522bf215546Sopenharmony_ci    <dimension/>
2523bf215546Sopenharmony_ci    <array_enable/>
2524bf215546Sopenharmony_ci    <shadow/>
2525bf215546Sopenharmony_ci    <lod_bias_disable/>
2526bf215546Sopenharmony_ci    <lod_clamp_disable/>
2527bf215546Sopenharmony_ci
2528bf215546Sopenharmony_ci    <sr_write_count/>
2529bf215546Sopenharmony_ci
2530bf215546Sopenharmony_ci    <sr write="true"/>
2531bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2532bf215546Sopenharmony_ci    <src>Varying offset</src>
2533bf215546Sopenharmony_ci  </ins>
2534bf215546Sopenharmony_ci
2535bf215546Sopenharmony_ci  <ins name="VAR_TEX_BUF_DUAL" title="Fused varying-texturing" opcode="0x137" unit="VT">
2536bf215546Sopenharmony_ci    <desc>
2537bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
2538bf215546Sopenharmony_ci      to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX_DUAL, using both V and T units.
2539bf215546Sopenharmony_ci    </desc>
2540bf215546Sopenharmony_ci    <slot/>
2541bf215546Sopenharmony_ci    <skip/>
2542bf215546Sopenharmony_ci    <sample_and_update/>
2543bf215546Sopenharmony_ci    <register_type/>
2544bf215546Sopenharmony_ci    <vartex_register_width/>
2545bf215546Sopenharmony_ci    <dimension/>
2546bf215546Sopenharmony_ci    <array_enable/>
2547bf215546Sopenharmony_ci    <shadow/>
2548bf215546Sopenharmony_ci    <lod_mode/>
2549bf215546Sopenharmony_ci
2550bf215546Sopenharmony_ci    <sr_write_count/>
2551bf215546Sopenharmony_ci
2552bf215546Sopenharmony_ci    <sr write="true"/>
2553bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2554bf215546Sopenharmony_ci    <src>Varying offset</src>
2555bf215546Sopenharmony_ci  </ins>
2556bf215546Sopenharmony_ci
2557bf215546Sopenharmony_ci  <ins name="VAR_TEX_SINGLE" title="Fused varying-texturing" opcode="0x138" unit="VT">
2558bf215546Sopenharmony_ci    <desc>
2559bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
2560bf215546Sopenharmony_ci      to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units.
2561bf215546Sopenharmony_ci    </desc>
2562bf215546Sopenharmony_ci    <slot/>
2563bf215546Sopenharmony_ci    <skip/>
2564bf215546Sopenharmony_ci    <sample_and_update/>
2565bf215546Sopenharmony_ci    <register_type/>
2566bf215546Sopenharmony_ci    <vartex_register_width/>
2567bf215546Sopenharmony_ci    <dimension/>
2568bf215546Sopenharmony_ci    <array_enable/>
2569bf215546Sopenharmony_ci    <shadow/>
2570bf215546Sopenharmony_ci    <lod_mode/>
2571bf215546Sopenharmony_ci
2572bf215546Sopenharmony_ci    <sr_write_count/>
2573bf215546Sopenharmony_ci
2574bf215546Sopenharmony_ci    <sr write="true"/>
2575bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2576bf215546Sopenharmony_ci    <src>Varying offset</src>
2577bf215546Sopenharmony_ci  </ins>
2578bf215546Sopenharmony_ci
2579bf215546Sopenharmony_ci  <ins name="VAR_TEX_GATHER" title="Fused varying-texturing" opcode="0x139" unit="VT">
2580bf215546Sopenharmony_ci    <desc>
2581bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
2582bf215546Sopenharmony_ci      to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units.
2583bf215546Sopenharmony_ci    </desc>
2584bf215546Sopenharmony_ci    <slot/>
2585bf215546Sopenharmony_ci    <skip/>
2586bf215546Sopenharmony_ci    <sample_and_update/>
2587bf215546Sopenharmony_ci    <register_type/>
2588bf215546Sopenharmony_ci    <vartex_register_width/>
2589bf215546Sopenharmony_ci    <dimension/>
2590bf215546Sopenharmony_ci    <array_enable/>
2591bf215546Sopenharmony_ci    <integer_coordinates/>
2592bf215546Sopenharmony_ci    <fetch_component/>
2593bf215546Sopenharmony_ci    <shadow/>
2594bf215546Sopenharmony_ci
2595bf215546Sopenharmony_ci    <sr_write_count/>
2596bf215546Sopenharmony_ci
2597bf215546Sopenharmony_ci    <sr write="true"/>
2598bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2599bf215546Sopenharmony_ci    <src>Varying offset</src>
2600bf215546Sopenharmony_ci  </ins>
2601bf215546Sopenharmony_ci
2602bf215546Sopenharmony_ci  <ins name="VAR_TEX_GRADIENT" title="Fused varying-texturing" opcode="0x13A" unit="VT">
2603bf215546Sopenharmony_ci    <desc>
2604bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
2605bf215546Sopenharmony_ci      to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units.
2606bf215546Sopenharmony_ci    </desc>
2607bf215546Sopenharmony_ci    <slot/>
2608bf215546Sopenharmony_ci    <skip/>
2609bf215546Sopenharmony_ci    <sample_and_update/>
2610bf215546Sopenharmony_ci    <register_type/>
2611bf215546Sopenharmony_ci    <vartex_register_width/>
2612bf215546Sopenharmony_ci    <dimension/>
2613bf215546Sopenharmony_ci    <array_enable/>
2614bf215546Sopenharmony_ci    <shadow/>
2615bf215546Sopenharmony_ci    <lod_bias_disable/>
2616bf215546Sopenharmony_ci    <lod_clamp_disable/>
2617bf215546Sopenharmony_ci
2618bf215546Sopenharmony_ci    <sr_write_count/>
2619bf215546Sopenharmony_ci
2620bf215546Sopenharmony_ci    <sr write="true"/>
2621bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2622bf215546Sopenharmony_ci    <src>Varying offset</src>
2623bf215546Sopenharmony_ci  </ins>
2624bf215546Sopenharmony_ci
2625bf215546Sopenharmony_ci  <ins name="VAR_TEX_DUAL" title="Fused varying-texturing" opcode="0x13F" unit="VT">
2626bf215546Sopenharmony_ci    <desc>
2627bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
2628bf215546Sopenharmony_ci      to LD_VAR_IMM_F32.v2.f32 followed by TEX_DUAL, using both V and T units.
2629bf215546Sopenharmony_ci    </desc>
2630bf215546Sopenharmony_ci    <slot/>
2631bf215546Sopenharmony_ci    <skip/>
2632bf215546Sopenharmony_ci    <sample_and_update/>
2633bf215546Sopenharmony_ci    <register_type/>
2634bf215546Sopenharmony_ci    <vartex_register_width/>
2635bf215546Sopenharmony_ci    <dimension/>
2636bf215546Sopenharmony_ci    <array_enable/>
2637bf215546Sopenharmony_ci    <shadow/>
2638bf215546Sopenharmony_ci    <lod_mode/>
2639bf215546Sopenharmony_ci
2640bf215546Sopenharmony_ci    <sr_write_count/>
2641bf215546Sopenharmony_ci
2642bf215546Sopenharmony_ci    <sr write="true"/>
2643bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
2644bf215546Sopenharmony_ci    <src>Varying offset</src>
2645bf215546Sopenharmony_ci  </ins>
2646bf215546Sopenharmony_ci
2647bf215546Sopenharmony_ci  <ins name="FMA_RSCALE.f32" title="Fused floating-point multiply add with exponent bias" dests="1" opcode="0x160" unit="FMA">
2648bf215546Sopenharmony_ci    <desc>
2649bf215546Sopenharmony_ci      First calculates $A \cdot B + C$ and then biases the exponent by D. Used in
2650bf215546Sopenharmony_ci      special transcendental function sequences. It should not be used for
2651bf215546Sopenharmony_ci      general code as its special case handling differs from two back-to-back
2652bf215546Sopenharmony_ci      `FMA.f32` operations. Equivalent to `FMA.f32` back-to-back with
2653bf215546Sopenharmony_ci      `LDEXP.f32`
2654bf215546Sopenharmony_ci    </desc>
2655bf215546Sopenharmony_ci    <clamp/>
2656bf215546Sopenharmony_ci    <src absneg="true">A</src>
2657bf215546Sopenharmony_ci    <src absneg="true">B</src>
2658bf215546Sopenharmony_ci    <src absneg="true">C</src>
2659bf215546Sopenharmony_ci    <src>D</src>
2660bf215546Sopenharmony_ci  </ins>
2661bf215546Sopenharmony_ci
2662bf215546Sopenharmony_ci  <ins name="FMA_RSCALE_N.f32" title="Fused floating-point multiply add with exponent bias and zero override" dests="1" opcode="0x161" unit="FMA">
2663bf215546Sopenharmony_ci    <desc>
2664bf215546Sopenharmony_ci      First calculates $A \cdot B + C$ and then biases the exponent by D. If $A
2665bf215546Sopenharmony_ci      = 0$ or $B = 0$, the multiply $A \cdot B$ is treated as zero even if an
2666bf215546Sopenharmony_ci      ordinary multiply would return NaN. Used in special transcendental
2667bf215546Sopenharmony_ci      function sequences. It should not be used for general code as its special
2668bf215546Sopenharmony_ci      case handling differs from two back-to-back `FMA.f32` operations.
2669bf215546Sopenharmony_ci      Equivalent to `FMA.f32` back-to-back with `LDEXP.f32`
2670bf215546Sopenharmony_ci    </desc>
2671bf215546Sopenharmony_ci    <clamp/>
2672bf215546Sopenharmony_ci    <src absneg="true">A</src>
2673bf215546Sopenharmony_ci    <src absneg="true">B</src>
2674bf215546Sopenharmony_ci    <src absneg="true">C</src>
2675bf215546Sopenharmony_ci    <src>D</src>
2676bf215546Sopenharmony_ci  </ins>
2677bf215546Sopenharmony_ci
2678bf215546Sopenharmony_ci  <ins name="FMA_RSCALE_LEFT.f32" title="Fused floating-point multiply add with exponent bias and asymmetric zero handling" dests="1" opcode="0x162" unit="FMA">
2679bf215546Sopenharmony_ci    <desc>
2680bf215546Sopenharmony_ci      First calculates $A \cdot B + C$ and then biases the exponent by D. If $A
2681bf215546Sopenharmony_ci      = 0$ or $B = 0$, the multiply is treated as $A$ even if an
2682bf215546Sopenharmony_ci      ordinary multiply would return NaN. Used in special transcendental
2683bf215546Sopenharmony_ci      function sequences. It should not be used for general code as its special
2684bf215546Sopenharmony_ci      case handling differs from two back-to-back `FMA.f32` operations.
2685bf215546Sopenharmony_ci      Equivalent to `FMA.f32` back-to-back with `LDEXP.f32`
2686bf215546Sopenharmony_ci    </desc>
2687bf215546Sopenharmony_ci    <clamp/>
2688bf215546Sopenharmony_ci    <src absneg="true">A</src>
2689bf215546Sopenharmony_ci    <src absneg="true">B</src>
2690bf215546Sopenharmony_ci    <src absneg="true">C</src>
2691bf215546Sopenharmony_ci    <src>D</src>
2692bf215546Sopenharmony_ci  </ins>
2693bf215546Sopenharmony_ci
2694bf215546Sopenharmony_ci  <ins name="FMA_RSCALE_SCALE16.f32" title="Fused floating-point multiply add with 16-bit exponent bias" dests="1" opcode="0x163" unit="FMA">
2695bf215546Sopenharmony_ci    <desc>
2696bf215546Sopenharmony_ci      First calculates $A \cdot B + C$ and then biases the exponent by D,
2697bf215546Sopenharmony_ci      interpreted as a 16-bit value. Used in special transcendental function
2698bf215546Sopenharmony_ci      sequences. It should not be used for general code as its special case
2699bf215546Sopenharmony_ci      handling differs from two back-to-back `FMA.f32` operations.  Equivalent
2700bf215546Sopenharmony_ci      to `FMA.f32` back-to-back with `LDEXP.f32`
2701bf215546Sopenharmony_ci    </desc>
2702bf215546Sopenharmony_ci    <clamp/>
2703bf215546Sopenharmony_ci    <src absneg="true">A</src>
2704bf215546Sopenharmony_ci    <src absneg="true">B</src>
2705bf215546Sopenharmony_ci    <src absneg="true">C</src>
2706bf215546Sopenharmony_ci    <src>D</src>
2707bf215546Sopenharmony_ci  </ins>
2708bf215546Sopenharmony_ci
2709bf215546Sopenharmony_ci</valhall>
2710