bifrost/valhall/ISA.xml

bf215546Sopenharmony_ci<!--
bf215546Sopenharmony_ci  Copyright (C) 2021 Collabora Ltd.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  Permission is hereby granted, free of charge, to any person obtaining a
bf215546Sopenharmony_ci  copy of this software and associated documentation files (the "Software"),
bf215546Sopenharmony_ci  to deal in the Software without restriction, including without limitation
bf215546Sopenharmony_ci  the rights to use, copy, modify, merge, publish, distribute, sublicense,
bf215546Sopenharmony_ci  and/or sell copies of the Software, and to permit persons to whom the
bf215546Sopenharmony_ci  Software is furnished to do so, subject to the following conditions:
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  The above copyright notice and this permission notice (including the next
bf215546Sopenharmony_ci  paragraph) shall be included in all copies or substantial portions of the
bf215546Sopenharmony_ci  Software.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
bf215546Sopenharmony_ci  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
bf215546Sopenharmony_ci  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
bf215546Sopenharmony_ci  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
bf215546Sopenharmony_ci  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
bf215546Sopenharmony_ci  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
bf215546Sopenharmony_ci  SOFTWARE.
bf215546Sopenharmony_ci-->
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci<valhall>
bf215546Sopenharmony_ci  <lut name="Immediates">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      This immediates are accessible in (almost) any instruction, provided the
bf215546Sopenharmony_ci      immediate mode is kept to the default. They optimize for the most common
bf215546Sopenharmony_ci      immediate values; any immediate listed here may be used without taking up
bf215546Sopenharmony_ci      a uniform slot or a register. Most integer instructions can access
bf215546Sopenharmony_ci      separate half-words and individual bytes via swizzles on the source.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <constant desc="Zero">0x00000000</constant>
bf215546Sopenharmony_ci    <constant desc="All ones; integer $-1$">0xFFFFFFFF</constant>
bf215546Sopenharmony_ci    <constant desc="Maximum integer; floating-point NaN">0x7FFFFFFF</constant>
bf215546Sopenharmony_ci    <constant desc="Integers $(-2, -3, -4, -5)$">0xFAFCFDFE</constant>
bf215546Sopenharmony_ci    <constant desc="16-bit integer $2^8$">0x01000000</constant>
bf215546Sopenharmony_ci    <constant desc="Multiples of 16 $(0, 32, 0, 128)$">0x80002000</constant>
bf215546Sopenharmony_ci    <constant desc="Multiples of 16 $(48, 80, 96, 112)$">0x70605030</constant>
bf215546Sopenharmony_ci    <constant desc="Multiples of 16 $(144, 160, 176, 192)$">0xC0B0A090</constant>
bf215546Sopenharmony_ci    <constant desc="Integers $(0, 1, 2, 3)$">0x03020100</constant>
bf215546Sopenharmony_ci    <constant desc="Integers $(4, 5, 6, 7)$">0x07060504</constant>
bf215546Sopenharmony_ci    <constant desc="Integers $(8, 9, 10, 11)$">0x0B0A0908</constant>
bf215546Sopenharmony_ci    <constant desc="Integers $(12, 13, 14, 15)$">0x0F0E0D0C</constant>
bf215546Sopenharmony_ci    <constant desc="Integers $(16, 17, 18, 19)$">0x13121110</constant>
bf215546Sopenharmony_ci    <constant desc="Integers $(20, 21, 22, 23)$">0x17161514</constant>
bf215546Sopenharmony_ci    <constant desc="Integers $(24, 25, 26, 27)$">0x1B1A1918</constant>
bf215546Sopenharmony_ci    <constant desc="Integers $(28, 29, 30, 31)$">0x1F1E1D1C</constant>
bf215546Sopenharmony_ci    <constant desc="Float $1.0$">0x3F800000</constant>
bf215546Sopenharmony_ci    <constant desc="Float $0.1$">0x3DCCCCCD</constant>
bf215546Sopenharmony_ci    <constant desc="Float $1 / \pi$">0x3EA2F983</constant>
bf215546Sopenharmony_ci    <constant desc="Float $\log(2)$">0x3F317218</constant>
bf215546Sopenharmony_ci    <constant desc="Float $\pi$">0x40490FDB</constant>
bf215546Sopenharmony_ci    <constant desc="Float $0.0$">0x00000000</constant>
bf215546Sopenharmony_ci    <constant desc="Float $65535.0 = 2^{16} - 1$">0x477FFF00</constant>
bf215546Sopenharmony_ci    <constant desc="Half-float $(255.0, 256.0) = (2^8 - 1, 2^8)$">0x5C005BF8</constant>
bf215546Sopenharmony_ci    <constant desc="Half-float $0.1 = 1 / 10$">0x2E660000</constant>
bf215546Sopenharmony_ci    <constant desc="Half-float $0.25 = 2^{-2}$">0x34000000</constant>
bf215546Sopenharmony_ci    <constant desc="Half-float $0.5 = 2^{-1}$">0x38000000</constant>
bf215546Sopenharmony_ci    <constant desc="Half-float $1.0 = 2^0$">0x3C000000</constant>
bf215546Sopenharmony_ci    <constant desc="Half-float $2.0 = 2^1$">0x40000000</constant>
bf215546Sopenharmony_ci    <constant desc="Half-float $4.0 = 2^2$">0x44000000</constant>
bf215546Sopenharmony_ci    <constant desc="Half-float $8.0 = 2^3$">0x48000000</constant>
bf215546Sopenharmony_ci    <constant desc="Half-float $\pi$">0x42480000</constant>
bf215546Sopenharmony_ci  </lut>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Flow">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Every Valhall instruction can wait on dependency
bf215546Sopenharmony_ci      slots. A few special flows are available, specified in the instruction
bf215546Sopenharmony_ci      metadata from this enum. The `wait0126` flow is required to wait on
bf215546Sopenharmony_ci      dependency slot #6 and should be set on the instruction immediately
bf215546Sopenharmony_ci      preceding `ATEST`. The `wait` flow should be set for barriers.
bf215546Sopenharmony_ci      The `discard` flow only applies to fragment shaders and is used to
bf215546Sopenharmony_ci      terminate helper invocations, it should be set as early as possible after
bf215546Sopenharmony_ci      helper invocations are no longer needed as determined by data flow
bf215546Sopenharmony_ci      analysis. The `end` flow is used to terminate the shader, although it
bf215546Sopenharmony_ci      may be overloaded by the `BLEND` instruction.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      The `reconverge` flow is required on any instruction immediately
bf215546Sopenharmony_ci      preceding a possible change to the mask of active threads in a subgroup.
bf215546Sopenharmony_ci      This includes all divergent branches, but it also includes the final
bf215546Sopenharmony_ci      instruction at the end of any basic block where the immediate successor
bf215546Sopenharmony_ci      (fallthrough) is the target of a divergent branch.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value name="None" default="true">none</value>
bf215546Sopenharmony_ci    <value name="Wait on slot 0">wait0</value>
bf215546Sopenharmony_ci    <value name="Wait on slot 1">wait1</value>
bf215546Sopenharmony_ci    <value name="Wait on slots 0, 1">wait01</value>
bf215546Sopenharmony_ci    <value name="Wait on slot 2">wait2</value>
bf215546Sopenharmony_ci    <value name="Wait on slots 0, 2">wait02</value>
bf215546Sopenharmony_ci    <value name="Wait on slots 1, 2">wait12</value>
bf215546Sopenharmony_ci    <value name="Wait on slots 0, 1, 2">wait012</value>
bf215546Sopenharmony_ci    <value name="Wait on slots 0, 1, 2, 6">wait0126</value>
bf215546Sopenharmony_ci    <value name="Wait on slots 0, 1, 2, 6, 7">wait</value>
bf215546Sopenharmony_ci    <value name="Perform branch reconverge">reconverge</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value name="Terminate discarded threads">discard</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value name="Return from shader">end</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="FAU special page 0">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Situated between the immediates hard-coded in the hardware and the
bf215546Sopenharmony_ci      uniforms defined purely in software, Valhall has a some special
bf215546Sopenharmony_ci      "constants" passing through data structures. These are encoded like the
bf215546Sopenharmony_ci      table of immediates, as if special constant $i$ were lookup table entry
bf215546Sopenharmony_ci      $32 + i$.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Warp ID and warps/core - 1">warp_id</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Bounding box maximum X/Y">framebuffer_size</value>
bf215546Sopenharmony_ci    <value desc="ATEST datum">atest_datum</value>
bf215546Sopenharmony_ci    <value desc="Sample positions">sample</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Blend descriptor 0">blend_descriptor_0</value>
bf215546Sopenharmony_ci    <value desc="Blend descriptor 1">blend_descriptor_1</value>
bf215546Sopenharmony_ci    <value desc="Blend descriptor 2">blend_descriptor_2</value>
bf215546Sopenharmony_ci    <value desc="Blend descriptor 3">blend_descriptor_3</value>
bf215546Sopenharmony_ci    <value desc="Blend descriptor 4">blend_descriptor_4</value>
bf215546Sopenharmony_ci    <value desc="Blend descriptor 5">blend_descriptor_5</value>
bf215546Sopenharmony_ci    <value desc="Blend descriptor 6">blend_descriptor_6</value>
bf215546Sopenharmony_ci    <value desc="Blend descriptor 7">blend_descriptor_7</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="FAU special page 1">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Situated between the immediates hard-coded in the hardware and the
bf215546Sopenharmony_ci      uniforms defined purely in software, Valhall has a some special
bf215546Sopenharmony_ci      "constants" passing through data structures. These are encoded like the
bf215546Sopenharmony_ci      table of immediates, as if special constant $i$ were lookup table entry
bf215546Sopenharmony_ci      $32 + i$.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Thread local storage base pointer">thread_local_pointer</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Workgroup local storage base pointer">workgroup_local_pointer</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Shader resource table base pointer">resource_table_pointer</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="FAU special page 3">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Situated between the immediates hard-coded in the hardware and the
bf215546Sopenharmony_ci      uniforms defined purely in software, Valhall has a some special
bf215546Sopenharmony_ci      "constants" passing through data structures. These are encoded like the
bf215546Sopenharmony_ci      table of immediates, as if special constant $i$ were lookup table entry
bf215546Sopenharmony_ci      $32 + i$.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Lane ID">lane_id</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Core ID">core_id</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Program counter">program_counter</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Swizzles (8-bit)">
bf215546Sopenharmony_ci    <value default="true">b0123</value>
bf215546Sopenharmony_ci    <value>b3210</value>
bf215546Sopenharmony_ci    <value>b0101</value>
bf215546Sopenharmony_ci    <value>b2323</value>
bf215546Sopenharmony_ci    <value>b0000</value>
bf215546Sopenharmony_ci    <value>b1111</value>
bf215546Sopenharmony_ci    <value>b2222</value>
bf215546Sopenharmony_ci    <value>b3333</value>
bf215546Sopenharmony_ci    <value>b2301</value>
bf215546Sopenharmony_ci    <value>b1032</value>
bf215546Sopenharmony_ci    <value>b0011</value>
bf215546Sopenharmony_ci    <value>b2233</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Lanes (8-bit)">
bf215546Sopenharmony_ci    <desc>Used to select the 2 bytes for shifts of 16-bit vectors</desc>
bf215546Sopenharmony_ci    <value>b02</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value>b00</value>
bf215546Sopenharmony_ci    <value>b11</value>
bf215546Sopenharmony_ci    <value>b22</value>
bf215546Sopenharmony_ci    <value>b33</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value>b01</value>
bf215546Sopenharmony_ci    <value>b23</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Half-swizzles (8-bit)">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Used to select the 2 bytes to convert for conversions from 8-bit vectors
bf215546Sopenharmony_ci      to 16-bit vectors
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value>b00</value>
bf215546Sopenharmony_ci    <value>b10</value>
bf215546Sopenharmony_ci    <value>b20</value>
bf215546Sopenharmony_ci    <value>b30</value>
bf215546Sopenharmony_ci    <value>b01</value>
bf215546Sopenharmony_ci    <value>b11</value>
bf215546Sopenharmony_ci    <value>b21</value>
bf215546Sopenharmony_ci    <value>b31</value>
bf215546Sopenharmony_ci    <value>b02</value>
bf215546Sopenharmony_ci    <value>b12</value>
bf215546Sopenharmony_ci    <value>b22</value>
bf215546Sopenharmony_ci    <value>b32</value>
bf215546Sopenharmony_ci    <value>b03</value>
bf215546Sopenharmony_ci    <value>b13</value>
bf215546Sopenharmony_ci    <value>b23</value>
bf215546Sopenharmony_ci    <value>b33</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Swizzles (16-bit)">
bf215546Sopenharmony_ci    <value>h00</value> <!-- 0,2 -->
bf215546Sopenharmony_ci    <value>h10</value>
bf215546Sopenharmony_ci    <value default="true">h01</value>
bf215546Sopenharmony_ci    <value>h11</value>
bf215546Sopenharmony_ci    <value>b00</value> <!-- 0,0 -->
bf215546Sopenharmony_ci    <value>b20</value> <!-- 1,1 -->
bf215546Sopenharmony_ci    <value>b02</value> <!-- 2,2 -->
bf215546Sopenharmony_ci    <value>b22</value> <!-- 3,3 -->
bf215546Sopenharmony_ci    <value>b11</value>
bf215546Sopenharmony_ci    <value>b31</value>
bf215546Sopenharmony_ci    <value>b13</value> <!-- 0,1 -->
bf215546Sopenharmony_ci    <value>b33</value> <!-- 2,3 -->
bf215546Sopenharmony_ci    <value>b01</value>
bf215546Sopenharmony_ci    <value>b23</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Swizzles (32-bit)">
bf215546Sopenharmony_ci    <value default="true">none</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value>h0</value>
bf215546Sopenharmony_ci    <value>h1</value>
bf215546Sopenharmony_ci    <value>b0</value>
bf215546Sopenharmony_ci    <value>b1</value>
bf215546Sopenharmony_ci    <value>b2</value>
bf215546Sopenharmony_ci    <value>b3</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Swizzles (64-bit)">
bf215546Sopenharmony_ci    <value default="true">none</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value>h0</value>
bf215546Sopenharmony_ci    <value>h1</value>
bf215546Sopenharmony_ci    <value>b0</value>
bf215546Sopenharmony_ci    <value>b1</value>
bf215546Sopenharmony_ci    <value>b2</value>
bf215546Sopenharmony_ci    <value>b3</value>
bf215546Sopenharmony_ci    <value>w0</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Lane (8-bit)" implied="true">
bf215546Sopenharmony_ci    <value>b0</value>
bf215546Sopenharmony_ci    <value>b1</value>
bf215546Sopenharmony_ci    <value>b2</value>
bf215546Sopenharmony_ci    <value>b3</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Combine">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Used for the lane select of `BRANCHZ`. To use an 8-bit condition, a
bf215546Sopenharmony_ci      separate `ICMP` is required to cast to 16-bit.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value default="true">none</value>
bf215546Sopenharmony_ci    <value>h0</value>
bf215546Sopenharmony_ci    <value>h1</value>
bf215546Sopenharmony_ci    <value>and</value>
bf215546Sopenharmony_ci    <value>lowbits</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Lane (16-bit)" implied="true">
bf215546Sopenharmony_ci    <value>h0</value>
bf215546Sopenharmony_ci    <value>h1</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Load lane (8-bit)">
bf215546Sopenharmony_ci    <value default="true">b0</value>
bf215546Sopenharmony_ci    <value>b1</value>
bf215546Sopenharmony_ci    <value>b2</value>
bf215546Sopenharmony_ci    <value>b3</value>
bf215546Sopenharmony_ci    <value desc="Zero-extend to 16-bit, low-half">h0</value>
bf215546Sopenharmony_ci    <value desc="Zero-extend to 16-bit, high-half">h1</value>
bf215546Sopenharmony_ci    <value desc="Zero-extend to 32-bit">w0</value>
bf215546Sopenharmony_ci    <value desc="Zero-extend to 32-bit">d0</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Load lane (16-bit)">
bf215546Sopenharmony_ci    <value desc="Low half" default="true">h0</value>
bf215546Sopenharmony_ci    <value desc="High half">h1</value>
bf215546Sopenharmony_ci    <value desc="Zero-extend to 32-bit">w0</value>
bf215546Sopenharmony_ci    <value desc="Zero-extend to 64-bit">d0</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Load lane (24-bit)" implied="true">
bf215546Sopenharmony_ci    <value default="true">identity</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Load lane (32-bit)">
bf215546Sopenharmony_ci    <value default="true">w0</value>
bf215546Sopenharmony_ci    <value desc="Zero-extend to 64-bit">d0</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Load lane (48-bit)">
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value default="true">identity</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Load lane (64-bit)">
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value default="true">identity</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Load lane (96-bit)">
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value default="true">identity</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Load lane (128-bit)">
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value default="true">identity</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Round mode">
bf215546Sopenharmony_ci    <desc>Corresponds to IEEE 754 rounding modes</desc>
bf215546Sopenharmony_ci    <value desc="Round to nearest even" default="true">rte</value>
bf215546Sopenharmony_ci    <value desc="Round to positive infinity">rtp</value>
bf215546Sopenharmony_ci    <value desc="Round to negative infinity">rtn</value>
bf215546Sopenharmony_ci    <value desc="Round to zero">rtz</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Result type">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Comparison instructions like `FCMP` return a boolean but may encode this
bf215546Sopenharmony_ci      boolean in a variety of ways. `i1` gives a OpenGL style `0/1` boolean.
bf215546Sopenharmony_ci      `m1` gives a Direct3D style `0/~0` boolean. `f1` gives a floating-point
bf215546Sopenharmony_ci      `0.0f / 1.0f` boolean. Switching between these modes is useful to fold a
bf215546Sopenharmony_ci      boolean type convert into a comparison. `u1` is used internally to
bf215546Sopenharmony_ci      implement 64-bit comparisons.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value desc="Integer 1">i1</value>
bf215546Sopenharmony_ci    <value desc="Float 1">f1</value>
bf215546Sopenharmony_ci    <value desc="Minus 1">m1</value>
bf215546Sopenharmony_ci    <value desc="Low half of 64-bit compare">u1</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Widen">
bf215546Sopenharmony_ci    <value default="true">none</value>
bf215546Sopenharmony_ci    <value>h0</value>
bf215546Sopenharmony_ci    <value>h1</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Clamp">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Clamp applied to the destination of a floating-point instruction. Note the
bf215546Sopenharmony_ci      clamps may be decomposed as two independent bits for `clamp_0_inf` and
bf215546Sopenharmony_ci      `clamp_m1_1`, with `clamp_0_1` arising as the composition of `clamp_0_inf`
bf215546Sopenharmony_ci      and `clamp_m1_1` in either order.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      Clamps are implemented per the SPIR-V specification:
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      $$\text{clamp} \; (x, \ell, h) = \min( \max( x, \ell ), h)$$
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      The min/max functions return the other operand if one operand is NaN, and
bf215546Sopenharmony_ci      compare $-0 &lt; +0$. That means the following identities hold for Valhall
bf215546Sopenharmony_ci      clamps:
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      \begin{align*}
bf215546Sopenharmony_ci        \text{clamp}(-0.0, 0.0, 1.0) &amp; = +0.0 \\
bf215546Sopenharmony_ci        \text{clamp}(-\text{NaN}, 0.0, 1.0) &amp; = +0.0 \\
bf215546Sopenharmony_ci        \text{clamp}(\text{NaN}, 0.0, 1.0) &amp; = +0.0 \\
bf215546Sopenharmony_ci        &amp; \\
bf215546Sopenharmony_ci        \text{clamp}(-0.0, -1.0, 1.0) &amp; = -0.0 \\
bf215546Sopenharmony_ci        \text{clamp}(\text{NaN}, -1.0, 1.0) &amp; = -1.0 \\
bf215546Sopenharmony_ci        \text{clamp}(-\text{NaN}, -1.0, 1.0) &amp; = -1.0 \\
bf215546Sopenharmony_ci        &amp; \\
bf215546Sopenharmony_ci        \max(\text{NaN}, 0.0) &amp; = +0.0 \\
bf215546Sopenharmony_ci        \max(-\text{NaN}, 0.0) &amp; = +0.0 \\
bf215546Sopenharmony_ci        \max(-0.0, 0.0) &amp; = +0.0 \\
bf215546Sopenharmony_ci      \end{align*}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      This behaviour is consistent with the FMin/FMax/FClamp and
bf215546Sopenharmony_ci      NMin/NMax/NClamp rules prescribed by SPIR-V and governed by IEEE-754. As
bf215546Sopenharmony_ci      a consequence, substituting these clamps for equivalent minimum/maximum
bf215546Sopenharmony_ci      exprssions is legal even with strict floating point rules.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value default="true" desc="Identity">none</value>
bf215546Sopenharmony_ci    <value desc="Clamp positive">clamp_0_inf</value>
bf215546Sopenharmony_ci    <value desc="Clamp to $[-1, 1]$">clamp_m1_1</value>
bf215546Sopenharmony_ci    <value desc="Clamp to $[0, 1]$">clamp_0_1</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Condition">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Condition code. Type must be inferred from the instruction. IEEE 754 total
bf215546Sopenharmony_ci      ordering only applies to floating point compares. "Not equal" and "greater
bf215546Sopenharmony_ci      than or less than" are distinguished by NaN behaviour conforming to
bf215546Sopenharmony_ci      the IEEE 754 specification.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value desc="Equal">eq</value>
bf215546Sopenharmony_ci    <value desc="Greater than">gt</value>
bf215546Sopenharmony_ci    <value desc="Greater than or equal">ge</value>
bf215546Sopenharmony_ci    <value desc="Not equal">ne</value>
bf215546Sopenharmony_ci    <value desc="Less than">lt</value>
bf215546Sopenharmony_ci    <value desc="Less than or equal">le</value>
bf215546Sopenharmony_ci    <value desc="Greater than or less than">gtlt</value>
bf215546Sopenharmony_ci    <value desc="Totally ordered">total</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Dimension">
bf215546Sopenharmony_ci    <desc>Texture dimension.</desc>
bf215546Sopenharmony_ci    <value desc="1D or buffer">1d</value>
bf215546Sopenharmony_ci    <value desc="2D or 2D array">2d</value>
bf215546Sopenharmony_ci    <value desc="3D or 3D array">3d</value>
bf215546Sopenharmony_ci    <value desc="Cube map or cube map array">cube</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="LOD mode">
bf215546Sopenharmony_ci    <desc>Level-of-detail selection mode in a texture instruction.</desc>
bf215546Sopenharmony_ci    <value desc="Set to zero">zero</value>
bf215546Sopenharmony_ci    <value desc="Computed based on neighboring fragments">computed</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Explicitly specified in a register">explicit</value>
bf215546Sopenharmony_ci    <value desc="Computed based on neighboring fragments added with bias in a register">computed_bias</value>
bf215546Sopenharmony_ci    <value desc="Derived from a gradient descriptor in registers">grdesc</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Register format">
bf215546Sopenharmony_ci    <desc>Format of data loaded to / stored from registers for general memory access.</desc>
bf215546Sopenharmony_ci    <value desc="32-bit type based on descriptor format">auto</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="32-bit floats">f32</value>
bf215546Sopenharmony_ci    <value desc="16-bit floats">f16</value>
bf215546Sopenharmony_ci    <value desc="32-bit signed integers">s32</value>
bf215546Sopenharmony_ci    <value desc="16-bit signed integers">s16</value>
bf215546Sopenharmony_ci    <value desc="32-bit unsigned integers">u32</value>
bf215546Sopenharmony_ci    <value desc="16-bit unsigned integers">u16</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Staging register count" implied="true">
bf215546Sopenharmony_ci    <value>sr0</value>
bf215546Sopenharmony_ci    <value>sr1</value>
bf215546Sopenharmony_ci    <value>sr2</value>
bf215546Sopenharmony_ci    <value>sr3</value>
bf215546Sopenharmony_ci    <value>sr4</value>
bf215546Sopenharmony_ci    <value>sr5</value>
bf215546Sopenharmony_ci    <value>sr6</value>
bf215546Sopenharmony_ci    <value>sr7</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Staging register write count" implied="true">
bf215546Sopenharmony_ci    <value>write1</value>
bf215546Sopenharmony_ci    <value>write2</value>
bf215546Sopenharmony_ci    <value>write3</value>
bf215546Sopenharmony_ci    <value>write4</value>
bf215546Sopenharmony_ci    <value>write5</value>
bf215546Sopenharmony_ci    <value>write6</value>
bf215546Sopenharmony_ci    <value>write7</value>
bf215546Sopenharmony_ci    <value>write8</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Write mask">
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value>r</value>
bf215546Sopenharmony_ci    <value>g</value>
bf215546Sopenharmony_ci    <value>rg</value>
bf215546Sopenharmony_ci    <value>b</value>
bf215546Sopenharmony_ci    <value>rb</value>
bf215546Sopenharmony_ci    <value>gb</value>
bf215546Sopenharmony_ci    <value>rgb</value>
bf215546Sopenharmony_ci    <value>a</value>
bf215546Sopenharmony_ci    <value>ra</value>
bf215546Sopenharmony_ci    <value>ga</value>
bf215546Sopenharmony_ci    <value>rga</value>
bf215546Sopenharmony_ci    <value>ba</value>
bf215546Sopenharmony_ci    <value>rba</value>
bf215546Sopenharmony_ci    <value>gba</value>
bf215546Sopenharmony_ci    <value default="true">rgba</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Fetch component">
bf215546Sopenharmony_ci    <value desc="Red">gather4_r</value>
bf215546Sopenharmony_ci    <value desc="Green">gather4_g</value>
bf215546Sopenharmony_ci    <value desc="Blue">gather4_b</value>
bf215546Sopenharmony_ci    <value desc="Alpha">gather4_a</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Register type">
bf215546Sopenharmony_ci    <desc>Unsized type, part of a register format.</desc>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value name="Float">f</value>
bf215546Sopenharmony_ci    <value name="Unsigned">u</value>
bf215546Sopenharmony_ci    <value name="Signed">s</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Register width">
bf215546Sopenharmony_ci    <desc>Untyped size, part of a register format.</desc>
bf215546Sopenharmony_ci    <value>16</value>
bf215546Sopenharmony_ci    <value>32</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Varying texture register width">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Size of results for varying texture instructions. For dual 16-bit results
bf215546Sopenharmony_ci      use "16-bit".
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value desc="16-bit">16</value>
bf215546Sopenharmony_ci    <value desc="32-bit">32</value>
bf215546Sopenharmony_ci    <value desc="16-bit, 32-bit">16.32</value>
bf215546Sopenharmony_ci    <value desc="32-bit, 32-bit">32.32</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Vector size">
bf215546Sopenharmony_ci    <desc>Number of channels loaded/stored for general memory access.</desc>
bf215546Sopenharmony_ci    <value default="true" desc="Scalar">none</value>
bf215546Sopenharmony_ci    <value desc="2 channels">v2</value>
bf215546Sopenharmony_ci    <value desc="3 channels">v3</value>
bf215546Sopenharmony_ci    <value desc="4 channels">v4</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Slot">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Dependency slot set on a message-passing instruction that writes to
bf215546Sopenharmony_ci      registers. Before reading the destination, a future instruction must wait
bf215546Sopenharmony_ci      on the specified slot. Slot #7 is for `BARRIER` instructions only.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value desc="Slot #0">slot0</value>
bf215546Sopenharmony_ci    <value desc="Slot #1">slot1</value>
bf215546Sopenharmony_ci    <value desc="Slot #2">slot2</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Slot #7">slot7</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Memory access">
bf215546Sopenharmony_ci    <desc>Memory access hint for a `LOAD` or `STORE` instruction.</desc>
bf215546Sopenharmony_ci    <value desc="No hint (global)" default="true">none</value>
bf215546Sopenharmony_ci    <value desc="Internally streaming (position output)">istream</value>
bf215546Sopenharmony_ci    <value desc="Externally streaming (varying output)">estream</value>
bf215546Sopenharmony_ci    <value desc="Force access in discarded threads (thread local storage)">force</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Subgroup size">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Selects the effective subgroup size from subgroup operations. The hardware
bf215546Sopenharmony_ci      warps are sixteen threads on Valhall, but subdividing a warp may be useful
bf215546Sopenharmony_ci      for API requirements. In particular, derivatives may be calculated with
bf215546Sopenharmony_ci      quads (four threads).
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value desc="Two threads">subgroup2</value>
bf215546Sopenharmony_ci    <value desc="Four threads">subgroup4</value>
bf215546Sopenharmony_ci    <value desc="Eight threads">subgroup8</value>
bf215546Sopenharmony_ci    <value desc="Sixteen threads" default="true">subgroup16</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Lane operation">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Acts as a modifier on the lane specificier for a `CLPER` instruction. The
bf215546Sopenharmony_ci      `accumulate` mode is required for efficient subgroup reductions.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value name="No operation" default="true">none</value>
bf215546Sopenharmony_ci    <value name="Exclusive-or">xor</value>
bf215546Sopenharmony_ci    <value name="Accumulate">accumulate</value>
bf215546Sopenharmony_ci    <value name="Shift">shift</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Inactive result">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Accesses to inactive lanes (due to divergence) in a subgroup is generally
bf215546Sopenharmony_ci      undefined in APIs. However, the results of permuting with an inactive lane
bf215546Sopenharmony_ci      with `CLPER.i32` are well-defined in Valhall: they return one of the
bf215546Sopenharmony_ci      following values, as specified in the `CLPER.i32` instructions. Sometimes
bf215546Sopenharmony_ci      certain values enable small optimizations.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value name="0x00000000" default="true">zero</value>
bf215546Sopenharmony_ci    <value name="0xFFFFFFFF">umax</value>
bf215546Sopenharmony_ci    <value name="0x00000001">i1</value>
bf215546Sopenharmony_ci    <value name="0x00010001">v2i1</value>
bf215546Sopenharmony_ci    <value name="0x80000000">smin</value>
bf215546Sopenharmony_ci    <value name="0x7FFFFFFF">smax</value>
bf215546Sopenharmony_ci    <value name="0x80008000">v2smin</value>
bf215546Sopenharmony_ci    <value name="0x7FFF7FFF">v2smax</value>
bf215546Sopenharmony_ci    <value name="0x80808080">v4smin</value>
bf215546Sopenharmony_ci    <value name="0x7F7F7F7F">v4smax</value>
bf215546Sopenharmony_ci    <value name="0x3F800000">f1</value>
bf215546Sopenharmony_ci    <value name="0x3C003C00">v2f1</value>
bf215546Sopenharmony_ci    <value name="0xFF800000">infn</value>
bf215546Sopenharmony_ci    <value name="0x7F800000">inf</value>
bf215546Sopenharmony_ci    <value name="0xFC00FC00">v2infn</value>
bf215546Sopenharmony_ci    <value name="0x7C007C00">v2inf</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Mux">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Condition to use for a `MUX` instruction. `neg` checks the sign bit,
bf215546Sopenharmony_ci      `int_zero` compares to `0x00000000`, `fp_zero` compares to $\pm 0.0$ as
bf215546Sopenharmony_ci      an IEEE 754 float, and `bit` checks each bit separately. The `bit` mode
bf215546Sopenharmony_ci      acts like an imaginary `CSEL.v32u1` instruction, and implements
bf215546Sopenharmony_ci      `bitselect()` in OpenCL.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value desc="Negative">neg</value>
bf215546Sopenharmony_ci    <value desc="Integer zero" default="true">int_zero</value>
bf215546Sopenharmony_ci    <value desc="Floating point zero">fp_zero</value>
bf215546Sopenharmony_ci    <value desc="Bitwise">bit</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Sample mode">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Varying interpolation mode, for choosing the correct sample to
bf215546Sopenharmony_ci      interpolate at, allowing the `sample` and `centroid` qualifiers to be
bf215546Sopenharmony_ci      implemented, as well as the `interpolateAt*` functions.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value desc="Center">center</value>
bf215546Sopenharmony_ci    <value desc="Centroid">centroid</value>
bf215546Sopenharmony_ci    <value desc="Sample">sample</value>
bf215546Sopenharmony_ci    <value desc="Explicit">explicit</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Update mode">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      The Valhall GPU maintains hidden state when interpolating varyings, to
bf215546Sopenharmony_ci      allow reusing sample location calculations. The update mode of a varying
bf215546Sopenharmony_ci      load controls this hidden state.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value desc="Store interpolation position">store</value>
bf215546Sopenharmony_ci    <value desc="Retrieve interpolation position">retrieve</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Clobber saved position">clobber</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Sample and update mode">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      For fused varying/texture instructions, only the following specific
bf215546Sopenharmony_ci      combinations of sample and update modes are permitted.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value desc="Center, store">center_store</value>
bf215546Sopenharmony_ci    <value desc="Centroid, store">centroid_store</value>
bf215546Sopenharmony_ci    <value desc="Sample, store">sample_store</value>
bf215546Sopenharmony_ci    <value desc="Explicit, store">explicit_store</value>
bf215546Sopenharmony_ci    <value desc="Center, clobber">center_clobber</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Sample, clobber">sample_clobber</value>
bf215546Sopenharmony_ci    <value desc="Retrieve previous state">retrieve</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Source format">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      In-memory format of varyings.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      Note: src_flat32 is only valid with 32-bit varying instructions and
bf215546Sopenharmony_ci      src_flat16 is only valid with 16-bit varying instructions.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value desc="Uninterpreted 32-bit values">src_flat32</value>
bf215546Sopenharmony_ci    <value desc="Uninterpreted 16-bit values">src_flat16</value>
bf215546Sopenharmony_ci    <value desc="Interpolated 32-bit floats">src_f32</value>
bf215546Sopenharmony_ci    <value desc="Interpolated 16-bit floats">src_f16</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Atomic operation">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Operation performed in a general computational atomic instruction.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Add">aadd</value>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <reserved/>
bf215546Sopenharmony_ci    <value desc="Signed minimum">asmin</value>
bf215546Sopenharmony_ci    <value desc="Signed maximum">asmax</value>
bf215546Sopenharmony_ci    <value desc="Unsigned minimum">aumin</value>
bf215546Sopenharmony_ci    <value desc="Unsigned maximum">aumax</value>
bf215546Sopenharmony_ci    <value desc="Bitwise and">aand</value>
bf215546Sopenharmony_ci    <value desc="Bitwise or">aor</value>
bf215546Sopenharmony_ci    <value desc="Bitwise exclusive-or">axor</value>
bf215546Sopenharmony_ci    <value desc="Exchange (must return the value)">axchg</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <enum name="Atomic operation with 1">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Operation performed in a computational atomic-with-1 instruction.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <value desc="Increment">ainc</value>
bf215546Sopenharmony_ci    <value desc="Decrement">adec</value>
bf215546Sopenharmony_ci    <value desc="Unsigned maximum with 1">aumax1</value>
bf215546Sopenharmony_ci    <value desc="Signed maximum with 1">asmax1</value>
bf215546Sopenharmony_ci    <value desc="Set bottom bit">aor1</value>
bf215546Sopenharmony_ci  </enum>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="NOP" title="No operation" dests="0" opcode="0x00" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Do nothing. Useful at the start of a block for waiting on slots required
bf215546Sopenharmony_ci      by the first actual instruction of the block, to reconcile dependencies
bf215546Sopenharmony_ci      after a branch. Also useful as the sole instruction of an empty shader.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="BRANCHZ" title="Compare to zero and branch" dests="0" opcode="0x1F" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Branches to a specified relative offset if its source is nonzero (default)
bf215546Sopenharmony_ci      or if its source is zero (if `.eq` is set). The offset is 27-bits and
bf215546Sopenharmony_ci      sign-extended, giving an effective range of ±26-bits. The offset is
bf215546Sopenharmony_ci      specified in units of instructions, relative to the *next* instruction.
bf215546Sopenharmony_ci      Positive offsets may be interpreted as "number of instructions to skip".
bf215546Sopenharmony_ci      Since Valhall instructions are 8 bytes, this operates as:
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      $$PC := \begin{cases} PC + 8 \cdot (\text{offset} \; + 1) &amp; \text{if} \;
bf215546Sopenharmony_ci      \text{src} \stackrel{?}{=} 0 \\ PC + 8 &amp; \text{otherwise} \end{cases}$$
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      Used with comparison instructions to implement control flow. Tie the
bf215546Sopenharmony_ci      source to a nonzero constant to implement a jump. May introduce
bf215546Sopenharmony_ci      divergence, so generally requires `.reconverge` flow control.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src combine="true">Value to compare against zero</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="27" signed="true"/>
bf215546Sopenharmony_ci    <conservative/>
bf215546Sopenharmony_ci    <mod name="eq" start="36" size="1"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="DISCARD.f32" title="Discard fragment" dests="0" opcode="0x20" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Evaluates the given condition, and if it passes, discards the current
bf215546Sopenharmony_ci      fragment and terminates the thread. Only valid in a **fragment** shader.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <cmp/>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">Left value to compare</src>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">Right value to compare</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="BRANCHZI" title="Compare to zero and branch indirect" opcode="0x2F" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Jump to an indirectly specified (absolute or relative) address. Used to
bf215546Sopenharmony_ci      jump to blend shaders at the end of a fragment shader.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src combine="true">Value to compare against zero</src>
bf215546Sopenharmony_ci    <src>Branch target</src>
bf215546Sopenharmony_ci    <conservative/>
bf215546Sopenharmony_ci    <mod name="eq" start="36" size="1"/>
bf215546Sopenharmony_ci    <mod name="absolute" start="40" size="1"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="BARRIER" title="Execution and memory barrier" opcode="0x45" unit="NONE">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      General-purpose barrier. Must use slot #7. Must be paired with a
bf215546Sopenharmony_ci      `.wait` flow on the instruction.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="CSEL" title="Floating-point conditional select" dests="1" unit="CVT">
bf215546Sopenharmony_ci    <ins name="CSEL.f32" opcode="0x154"/>
bf215546Sopenharmony_ci    <ins name="CSEL.v2f16" opcode="0x155"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Evaluates the given condition and outputs either the true source or the
bf215546Sopenharmony_ci      false source.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <cmp/>
bf215546Sopenharmony_ci    <src float="true">Left value to compare</src>
bf215546Sopenharmony_ci    <src float="true">Right value to compare</src>
bf215546Sopenharmony_ci    <src float="true">Return value if true</src>
bf215546Sopenharmony_ci    <src float="true">Return value if false</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="CSEL" title="Integer conditional select" dests="1" unit="CVT">
bf215546Sopenharmony_ci    <ins name="CSEL.u32" opcode="0x150"/>
bf215546Sopenharmony_ci    <ins name="CSEL.v2u16" opcode="0x151"/>
bf215546Sopenharmony_ci    <ins name="CSEL.s32" opcode="0x158"/>
bf215546Sopenharmony_ci    <ins name="CSEL.v2s16" opcode="0x159"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Evaluates the given condition and outputs either the true source or the
bf215546Sopenharmony_ci      false source.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      Valhall lacks integer minimum/maximum instructions. `CSEL` instructions
bf215546Sopenharmony_ci      with tied operands form the canonical implementations of these
bf215546Sopenharmony_ci      instructions. Similarly, the integer $\text{sign}$ function is canonically
bf215546Sopenharmony_ci      implemented with a pair of `CSEL` instructions.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <cmp/>
bf215546Sopenharmony_ci    <src>Left value to compare</src>
bf215546Sopenharmony_ci    <src>Right value to compare</src>
bf215546Sopenharmony_ci    <src>Return value if true</src>
bf215546Sopenharmony_ci    <src>Return value if false</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_VAR_SPECIAL" title="Load special varying" opcode="0x56" unit="V">
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <sample/>
bf215546Sopenharmony_ci    <update/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci    <imm name="index" start="12" size="4"/> <!-- 0 for pointx, 1 for pointy, 2 for fragw, 3 for fragz -->
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="LD_VAR_BUF_IMM" title="Load immediate varying" unit="V">
bf215546Sopenharmony_ci    <desc>Interpolates a given varying from hardware buffer</desc>
bf215546Sopenharmony_ci    <ins name="LD_VAR_BUF_IMM.f32" opcode="0x5C"/>
bf215546Sopenharmony_ci    <ins name="LD_VAR_BUF_IMM.f16" opcode="0x5D"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <source_format/>
bf215546Sopenharmony_ci    <sample/>
bf215546Sopenharmony_ci    <update/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci    <imm name="index" start="16" size="8"/>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="LD_VAR_BUF" title="Load indirect varying" unit="V">
bf215546Sopenharmony_ci    <desc>Interpolates a given varying from hardware buffer</desc>
bf215546Sopenharmony_ci    <ins name="LD_VAR_BUF.f32" opcode="0x6C"/>
bf215546Sopenharmony_ci    <ins name="LD_VAR_BUF.f16" opcode="0x6D"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <source_format/>
bf215546Sopenharmony_ci    <sample/>
bf215546Sopenharmony_ci    <update/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_VAR" title="Load indirect varying" unit="V" opcode="0x64">
bf215546Sopenharmony_ci    <desc>Interpolates a given varying from a software buffer</desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <sample/>
bf215546Sopenharmony_ci    <update/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci    <src>Varying index and table</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_VAR_IMM" title="Load immediate varying" unit="V" opcode="0x54">
bf215546Sopenharmony_ci    <desc>Interpolates a given varying from a software buffer</desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <sample/>
bf215546Sopenharmony_ci    <update/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci    <imm name="table" start="8" size="4"/>
bf215546Sopenharmony_ci    <imm name="index" start="12" size="8"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_VAR_FLAT" title="Load indirect varying" unit="V" opcode="0x55">
bf215546Sopenharmony_ci    <desc>Fetches a given varying from a software buffer</desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <src>Varying index and table</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_VAR_FLAT_IMM" title="Load immediate varying" unit="V" opcode="0x41">
bf215546Sopenharmony_ci    <desc>Fetches a given varying from a software buffer</desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <imm name="table" start="8" size="4"/>
bf215546Sopenharmony_ci    <imm name="index" start="12" size="8"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_ATTR_IMM" title="Load immediate attribute" opcode="0x66" opcode2="0" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Load `vecsize` components from the attribute descriptor at entry `index`
bf215546Sopenharmony_ci      of resource table `table` at index (vertex ID, instance ID), converting
bf215546Sopenharmony_ci      to the specified register format.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src>Vertex ID</src>
bf215546Sopenharmony_ci    <src>Instance ID</src>
bf215546Sopenharmony_ci    <imm name="index" start="20" size="4"/>
bf215546Sopenharmony_ci    <imm name="table" start="16" size="4"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_ATTR" title="Load indirect attribute" opcode="0x76" opcode2="0" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Load `vecsize` components from the attribute descriptor at the specified
bf215546Sopenharmony_ci      location at index (vertex ID, instance ID), converting
bf215546Sopenharmony_ci      to the specified register format.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      The index must not diverge within a warp.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src>Vertex ID</src>
bf215546Sopenharmony_ci    <src>Instance ID</src>
bf215546Sopenharmony_ci    <src>Index and table</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_TEX_IMM" title="Load immediate texture" opcode="0x66" opcode2="1" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Load `vecsize` components from the texture descriptor at entry `index`
bf215546Sopenharmony_ci      of resource table `table`, converting
bf215546Sopenharmony_ci      to the specified register format.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src>X/Y coordinates (16:16)</src>
bf215546Sopenharmony_ci    <src>Z/W coordinates (16:16)</src>
bf215546Sopenharmony_ci    <imm name="index" start="20" size="4"/>
bf215546Sopenharmony_ci    <imm name="table" start="16" size="4"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_TEX" title="Load indirect texture" opcode="0x76" opcode2="1" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Load `vecsize` components from the texture descriptor at the specified
bf215546Sopenharmony_ci      location at index, converting
bf215546Sopenharmony_ci      to the specified register format.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src>X/Y coordinates (16:16)</src>
bf215546Sopenharmony_ci    <src>Z/W coordinates (16:16)</src>
bf215546Sopenharmony_ci    <src>Index and table</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LEA_ATTR_IMM" title="Load effective address of image texel" opcode="0x67" opcode2="0" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Load the effective address of an attribute specified with the
bf215546Sopenharmony_ci      given immediate index. Returns three staging register: the low/high
bf215546Sopenharmony_ci      32-bits of the address and the internal conversion descriptor.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src>Vertex index</src>
bf215546Sopenharmony_ci    <src>Instance index</src>
bf215546Sopenharmony_ci    <imm name="table" start="16" size="4"/>
bf215546Sopenharmony_ci    <imm name="index" start="20" size="4"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LEA_ATTR" title="Load effective address of image texel" opcode="0x77" opcode2="0" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Load the effective address of an attribute specified with the
bf215546Sopenharmony_ci      given index. Returns three staging register: the low/high
bf215546Sopenharmony_ci      32-bits of the address and the internal conversion descriptor.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src>Vertex index</src>
bf215546Sopenharmony_ci    <src>Instance index</src>
bf215546Sopenharmony_ci    <src>Attribute index and table</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LEA_TEX_IMM" title="Load effective address of image texel" opcode="0x67" opcode2="1" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Load the effective address of a texel from the image specified with the
bf215546Sopenharmony_ci      given immediate index. Returns three staging registers: the low/high
bf215546Sopenharmony_ci      32-bits of the address and the internal conversion descriptor. The format
bf215546Sopenharmony_ci      of the internal conversion descriptor is compatible with Bifrost but
bf215546Sopenharmony_ci      omits the register format, as this is specified with the ST_CVT
bf215546Sopenharmony_ci      instruction on Valhall.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      Coordinates are specified as 16-bit integers, packed into 32-bit sources.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src>X/Y coordinates (16:16)</src>
bf215546Sopenharmony_ci    <src>Z/W coordinates (16:16)</src>
bf215546Sopenharmony_ci    <imm name="table" start="16" size="4"/>
bf215546Sopenharmony_ci    <imm name="index" start="20" size="4"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LEA_TEX" title="Load effective address of image texel" opcode="0x77" opcode2="1" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Load the effective address of a texel from the image specified with the
bf215546Sopenharmony_ci      given index. Returns three staging register: the low/high
bf215546Sopenharmony_ci      32-bits of the address and the internal conversion descriptor. The format
bf215546Sopenharmony_ci      of the internal conversion descriptor is compatible with Bifrost but
bf215546Sopenharmony_ci      omits the register format, as this is specified with the ST_CVT
bf215546Sopenharmony_ci      instruction on Valhall.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      Coordinates are specified as 16-bit integers, packed into 32-bit sources.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="descriptor_type" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src size="16">X/Y coordinates (16:16)</src>
bf215546Sopenharmony_ci    <src>Z/W coordinates (16:16)</src>
bf215546Sopenharmony_ci    <src>Index and table</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i8" title="Global memory load" opcode="0x6a" opcode2="0" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
bf215546Sopenharmony_ci      the mode descriptor.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_8_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="32">Address to load from after adding offset</src>
bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i16" title="Global memory load" opcode="0x6a" opcode2="1" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
bf215546Sopenharmony_ci      the mode descriptor.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_16_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i24" title="Global memory load" opcode="0x6a" opcode2="2" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
bf215546Sopenharmony_ci      the mode descriptor.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_24_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i32" title="Global memory load" opcode="0x6a" opcode2="3" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
bf215546Sopenharmony_ci      the mode descriptor.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_32_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i48" title="Global memory load" opcode="0x6a" opcode2="4" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
bf215546Sopenharmony_ci      the mode descriptor.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_48_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i64" title="Global memory load" opcode="0x6a" opcode2="5" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
bf215546Sopenharmony_ci      the mode descriptor.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_64_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i96" title="Global memory load" opcode="0x6a" opcode2="6" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
bf215546Sopenharmony_ci      the mode descriptor.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_96_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_BUFFER.i128" title="Global memory load" opcode="0x6a" opcode2="7" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
bf215546Sopenharmony_ci      all-ones, load from the buffer descriptors in the table indexed by the
bf215546Sopenharmony_ci      bottom byte of the mode descriptor. If they are all zeroes, load the
bf215546Sopenharmony_ci      contents of the buffer in the first table indexed by the bottom byte of
bf215546Sopenharmony_ci      the mode descriptor.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_128_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="32">Byte offset</src>
bf215546Sopenharmony_ci    <src size="32">Mode descriptor</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LEA_BUF_IMM" title="Load buffer effective address" opcode="0x5E" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Load effective address of a buffer with an immediate offset added.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <imm name="table" start="8" size="4"/>
bf215546Sopenharmony_ci    <imm name="index" start="12" size="8"/>
bf215546Sopenharmony_ci    <src>Linear ID</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LOAD.i8" title="Global memory load" opcode="0x60" opcode2="0" unit="LS">
bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <memory_access/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_8_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LOAD.i16" title="Global memory load" opcode="0x60" opcode2="1" unit="LS">
bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <memory_access/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_16_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LOAD.i24" title="Global memory load" opcode="0x60" opcode2="2" unit="LS">
bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <memory_access/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_24_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LOAD.i32" title="Global memory load" opcode="0x60" opcode2="3" unit="LS">
bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <memory_access/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_32_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LOAD.i48" title="Global memory load" opcode="0x60" opcode2="4" unit="LS">
bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <memory_access/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_48_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LOAD.i64" title="Global memory load" opcode="0x60" opcode2="5" unit="LS">
bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <memory_access/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_64_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LOAD.i96" title="Global memory load" opcode="0x60" opcode2="6" unit="LS">
bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <memory_access/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_96_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LOAD.i128" title="Global memory load" opcode="0x60" opcode2="7" unit="LS">
bf215546Sopenharmony_ci    <desc>Loads from main memory</desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <memory_access/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <mod name="load_lane_128_bit" start="36" size="3"/>
bf215546Sopenharmony_ci    <mod name="unsigned" start="39" size="1"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="64">Address to load from after adding offset</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="STORE" title="Global memory store" opcode="0x61" unit="LS">
bf215546Sopenharmony_ci    <desc>Stores to main memory</desc>
bf215546Sopenharmony_ci    <sr read="true"/>
bf215546Sopenharmony_ci    <ins name="STORE.i8" opcode2="0x0"/>
bf215546Sopenharmony_ci    <ins name="STORE.i16" opcode2="0x1"/>
bf215546Sopenharmony_ci    <ins name="STORE.i24" opcode2="0x2"/>
bf215546Sopenharmony_ci    <ins name="STORE.i32" opcode2="0x3"/>
bf215546Sopenharmony_ci    <ins name="STORE.i48" opcode2="0x4"/>
bf215546Sopenharmony_ci    <ins name="STORE.i64" opcode2="0x5"/>
bf215546Sopenharmony_ci    <ins name="STORE.i96" opcode2="0x6"/>
bf215546Sopenharmony_ci    <ins name="STORE.i128" opcode2="0x7"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <memory_access/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src size="64">Address to store to after adding offset</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="16" signed="true"/>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="ST_CVT" title="Store with conversion" opcode="0x71" unit="LS">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Store to memory with data conversion. The address to store to is given in
bf215546Sopenharmony_ci      the first source, which must be a 64-bit register (a pair of 32-bit
bf215546Sopenharmony_ci      registers). The other source is the conversion descriptor used for the store.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      Used with LEA_TEX_IMM to implement image stores.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <mod name="memory_access" start="37" size="3"/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <sr read="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <src size="64">64-bit address to store to</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
bf215546Sopenharmony_ci    <src>Internal conversion descriptor</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="LD_TILE" title="Load from tilebuffer" opcode="0x78" unit="NONE">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Loads a given render target, specified in the pixel indices descriptor, at
bf215546Sopenharmony_ci      a given location and sample, and convert to the format specified in the
bf215546Sopenharmony_ci      internal conversion descriptor. Used to implement EXT_framebuffer_fetch
bf215546Sopenharmony_ci      and internally in blend shaders.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src>Pixel indices descriptor</src>
bf215546Sopenharmony_ci    <src>Coverage mask</src>
bf215546Sopenharmony_ci    <src>Conversion descriptor</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="ST_TILE" title="Store to tilebuffer" opcode="0x79" unit="NONE">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Store to given render target, specified in the pixel indices descriptor, at
bf215546Sopenharmony_ci      a given location and sample, and convert to the format specified in the
bf215546Sopenharmony_ci      internal conversion descriptor. Used internally in blend shaders.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr read="true"/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <src>Pixel indices descriptor</src>
bf215546Sopenharmony_ci    <src>Coverage mask</src>
bf215546Sopenharmony_ci    <src>Conversion descriptor</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="BLEND" title="Blend render target" opcode="0x7F" unit="NONE">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Blends a given render target. This loads the API-specified blend state for
bf215546Sopenharmony_ci      the render target from the first source. Blend descriptors are available
bf215546Sopenharmony_ci      as special immediates. It then reads the colour to be blended from the
bf215546Sopenharmony_ci      first staging register, with the specified vector size and register format
bf215546Sopenharmony_ci      as desired. The resulting coverage mask is stored to the second set of
bf215546Sopenharmony_ci      staging registers.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      In the fixed-function path, `BLEND` sends the colour to the blender to be
bf215546Sopenharmony_ci      written to the tilebuffer. Then, if the instruction's flow control
bf215546Sopenharmony_ci      specifies termination, the fragment program is ended. If it does not
bf215546Sopenharmony_ci      specify termination, `BLEND` acts as a relative branch, branching with the
bf215546Sopenharmony_ci      offset specified as `target`. This allows the subsequent instructions to
bf215546Sopenharmony_ci      be skipped when fixed-function blending is used. Note this implicit branch
bf215546Sopenharmony_ci      can never introduce divergence, so `.reconverge` is not required.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      In the blend shader path, `BLEND` ignores the specified flow control and
bf215546Sopenharmony_ci      does not branch to the specified offset. Instead, execution continues
bf215546Sopenharmony_ci      normally with the next instruction. The compiler should insert code for
bf215546Sopenharmony_ci      calling a blend shader after the `BLEND` instruction unless it is known
bf215546Sopenharmony_ci      that a blend shader will never be required.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      The indirection is required to support both fixed-function and blend
bf215546Sopenharmony_ci      shaders efficiently and without shader variants.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr read="true"/>
bf215546Sopenharmony_ci    <src size="64">Blend descriptor</src>
bf215546Sopenharmony_ci    <src>Sample coverage</src>
bf215546Sopenharmony_ci    <imm name="target" start="8" size="8"/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <vecsize/>
bf215546Sopenharmony_ci    <regfmt/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="ATEST" title="Alpha test" opcode="0x7D" unit="NONE">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Does alpha-to-coverage testing, updating the sample coverage mask. ATEST
bf215546Sopenharmony_ci      does not do an implicit discard. It should be executed before the first
bf215546Sopenharmony_ci      ZS_EMIT or BLEND instruction.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <sr write="true">Updated coverage mask</sr>
bf215546Sopenharmony_ci    <src>Input coverage mask</src>
bf215546Sopenharmony_ci    <src swizzle="true">Alpha value (render target 0)</src>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="ZS_EMIT" title="Depth/stencil write" opcode="0x7E" unit="NONE">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Programatically writes out depth, stencil, or both, depending on which
bf215546Sopenharmony_ci      modifiers are set. Used to implement gl_FragDepth and gl_FragStencil.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <mod name="z" start="25" size="1"/>
bf215546Sopenharmony_ci    <mod name="stencil" start="24" size="1"/>
bf215546Sopenharmony_ci    <sr write="true">Updated coverage mask</sr>
bf215546Sopenharmony_ci    <src>Depth value</src>
bf215546Sopenharmony_ci    <src>Stencil value</src>
bf215546Sopenharmony_ci    <src>Input coverage mask</src>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="CONVERT" title="Data conversions" dests="1" opcode="0x90" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Performs the given data conversion. Note that floating-point rounding is
bf215546Sopenharmony_ci      handled via the same hardware and therefore shares an encoding. Round mode
bf215546Sopenharmony_ci      is specified where it makes sense.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <ins name="V2S16_TO_V2F16" opcode2="0x7"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <ins name="S32_TO_F32" opcode2="0x9"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <ins name="V2U16_TO_V2F16" opcode2="0x17"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <ins name="U32_TO_F32" opcode2="0x19"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <roundmode/>
bf215546Sopenharmony_ci    <src widen="true">Value to convert</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="CONVERT" title="16->32 integer data conversions" dests="1" opcode="0x90" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Performs the given data conversion.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <ins name="S16_TO_S32" opcode2="0x4"/>
bf215546Sopenharmony_ci    <ins name="S16_TO_F32" opcode2="0x5"/>
bf215546Sopenharmony_ci    <ins name="U16_TO_U32" opcode2="0x14"/>
bf215546Sopenharmony_ci    <ins name="U16_TO_F32" opcode2="0x15"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <src swizzle="true" size="16">Value to convert</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="CONVERT" title="Float-to-int data conversions" dests="1" opcode="0x90" unit="CVT">
bf215546Sopenharmony_ci    <desc>Performs the given data conversion.</desc>
bf215546Sopenharmony_ci    <ins name="F32_TO_S32" opcode2="0xC"/>
bf215546Sopenharmony_ci    <ins name="F32_TO_U32" opcode2="0x1C"/>
bf215546Sopenharmony_ci    <roundmode/>
bf215546Sopenharmony_ci    <src absneg="true">Value to convert</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="CONVERT" title="Float-to-int data conversions" dests="1" opcode="0x90" unit="CVT">
bf215546Sopenharmony_ci    <desc>Performs the given data conversion.</desc>
bf215546Sopenharmony_ci    <ins name="V2F16_TO_V2S16" opcode2="0xE"/>
bf215546Sopenharmony_ci    <ins name="V2F16_TO_V2U16" opcode2="0x1E"/>
bf215546Sopenharmony_ci    <ins name="F16_TO_S32" opcode2="0xA"/>
bf215546Sopenharmony_ci    <ins name="F16_TO_U32" opcode2="0x1A"/>
bf215546Sopenharmony_ci    <roundmode/>
bf215546Sopenharmony_ci    <src swizzle="true" absneg="true" size="16">Value to convert</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="F16_TO_F32" title="16-bit float to 32-bit float conversion" dests="1" opcode="0x90" opcode2="0xB" unit="CVT">
bf215546Sopenharmony_ci    <desc>Converts up with the specified round mode.</desc>
bf215546Sopenharmony_ci    <roundmode/>
bf215546Sopenharmony_ci    <src lane="28" size="16" absneg="true">Value to convert</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="CONVERT" title="8-bit to 32-bit data conversions" dests="1" opcode="0x90" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Performs the given data conversion.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <ins name="S8_TO_S32" opcode2="0x0"/>
bf215546Sopenharmony_ci    <ins name="S8_TO_F32" opcode2="0x1"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <ins name="U8_TO_U32" opcode2="0x10"/>
bf215546Sopenharmony_ci    <ins name="U8_TO_F32" opcode2="0x11"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <src lane="28" size="8">Value to convert</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="CONVERT" title="8-bit to 16-bit data conversions" dests="1" opcode="0x90" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Performs the given data conversion.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <ins name="V2S8_TO_V2S16" opcode2="0x2"/>
bf215546Sopenharmony_ci    <ins name="V2S8_TO_V2F16" opcode2="0x3"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <ins name="V2U8_TO_V2U16" opcode2="0x12"/>
bf215546Sopenharmony_ci    <ins name="V2U8_TO_V2F16" opcode2="0x13"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <src halfswizzle="true" size="8">Value to convert</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="FROUND" title="Floating-point rounding" dests="1" opcode="0x90" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Performs the given rounding, using the convert unit.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <ins name="FROUND.f32" opcode2="0xD"/>
bf215546Sopenharmony_ci    <ins name="FROUND.v2f16" opcode2="0xF"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <roundmode/>
bf215546Sopenharmony_ci    <src swizzle="true" absneg="true">Value to convert</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="MOV.i32" title="Register move" dests="1" opcode="0x91" opcode2="0x0" unit="CVT">
bf215546Sopenharmony_ci    <desc>Canonical register-to-register move.</desc>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="CLZ.u32" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x4" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Used as a primitive for various bitwise operations.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="CLZ.v2u16" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x5" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Used as a primitive for various bitwise operations.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="CLZ.v4u8" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x6" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Used as a primitive for various bitwise operations.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="IABS.s32" title="Absolute value" dests="1" opcode="0x91" opcode2="0x8" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      64-bit abs may be constructed in 4 instructions (5 clocks) by checking the
bf215546Sopenharmony_ci      sign with `ICMP.s32.lt.m1 hi, 0` and negating based on the result with
bf215546Sopenharmony_ci      `IADD.s64` and `LSHIFT_XOR.i32` on each half.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src widen="true"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="IABS.v2s16" title="Absolute value" dests="1" opcode="0x91" opcode2="0x9" unit="CVT">
bf215546Sopenharmony_ci    <src widen="true"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="IABS.v4s8" title="Absolute value" dests="1" opcode="0x91" opcode2="0xa" unit="CVT">
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="POPCOUNT.i32" title="Population count" dests="1" opcode="0x91" opcode2="0xC" unit="SFU">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Only available as 32-bit. Smaller bitsizes require explicit conversions.
bf215546Sopenharmony_ci      64-bit popcount may be constructed in 3 clocks by separate 32-bit
bf215546Sopenharmony_ci      popcounts of each half and a 32-bit add, which is guaranteed not to
bf215546Sopenharmony_ci      overflow.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="BITREV.i32" title="Bitwise reverse" dests="1" opcode="0x91" opcode2="0xD" unit="SFU">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Only available as 32-bit. Other bitsizes may be derived with swizzles.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="NOT_OLD.i32" title="Bitwise complement" dests="1" opcode="0x91" opcode2="0xE" unit="SFU">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      For fully featured bitwise operation, see the shift opcodes.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="NOT_OLD.i64" title="Bitwise complement" dests="1" opcode="0x191" opcode2="0xE" unit="SFU">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      For fully featured bitwise operation, see the shift opcodes.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="WMASK" title="Warp mask" dests="1" opcode="0x95" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Returns the mask of lanes ever active within the warp (subgroup), such
bf215546Sopenharmony_ci      that the source is nonzero. The number of work-items in a subgroup is
bf215546Sopenharmony_ci      given as the popcount of this value with a nonzero input.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      An `all()` subgroup operation may be constructed as `WMASK` of the input
bf215546Sopenharmony_ci      compared for equality with `WMASK` of an nonzero value.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      An `any()` subgroup operation may be constructed as `WMASK` of the input
bf215546Sopenharmony_ci      compared against zero.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci    <subgroup/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="FREXP" title="Fraction/exponent extract" dests="1" opcode="0x99" unit="CVT">
bf215546Sopenharmony_ci    <ins name="FREXPM.f32" opcode2="0"/>
bf215546Sopenharmony_ci    <ins name="FREXPM.v2f16" opcode2="1"/>
bf215546Sopenharmony_ci    <ins name="FREXPE.f32" opcode2="2"/>
bf215546Sopenharmony_ci    <ins name="FREXPE.v2f16" opcode2="3"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Breaks up the floating-point input into its fractional (mantissa) and
bf215546Sopenharmony_ci      exponent parts. By default, this is compatible with the `frexp()` function
bf215546Sopenharmony_ci      in APIs. With the log/sqrt modifiers, the floating point format is
bf215546Sopenharmony_ci      adjusted to be compatible with Valhall's argument reduction for logarithm
bf215546Sopenharmony_ci      and square root computation respectively.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <mod name="sqrt" start="24" size="1"/>
bf215546Sopenharmony_ci    <mod name="log" start="25" size="1"/>
bf215546Sopenharmony_ci    <src float="true" swizzle="true"/>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="SFU" title="Special function unit" dests="1" opcode="0x9C" unit="SFU">
bf215546Sopenharmony_ci    <ins name="FRCP.f32" opcode2="0"/>
bf215546Sopenharmony_ci    <ins name="FRCP.f16" opcode2="1"/>
bf215546Sopenharmony_ci    <ins name="FRSQ.f32" opcode2="2"/>
bf215546Sopenharmony_ci    <ins name="FRSQ.f16" opcode2="3"/>
bf215546Sopenharmony_ci    <ins name="FLOGD.f32" opcode2="8"/>
bf215546Sopenharmony_ci    <ins name="FPCLASS.f32" opcode2="10"/>
bf215546Sopenharmony_ci    <ins name="FPCLASS.f16" opcode2="11"/>
bf215546Sopenharmony_ci    <ins name="FLOG_TABLE.f32" opcode2="12"/>
bf215546Sopenharmony_ci    <ins name="FRCP_APPROX.f32" opcode2="14"/>
bf215546Sopenharmony_ci    <ins name="FRSQ_APPROX.f32" opcode2="15"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Performs a given special function. The floating-point reciprocal (`FRCP`)
bf215546Sopenharmony_ci      and reciprocal square root (`FRSQ`) instructions may be freely used as-is.
bf215546Sopenharmony_ci      The logarithm instruction (`FLOGD.f32`) requires an argument
bf215546Sopenharmony_ci      reduction. See the transcendentals section for more information. Like the
bf215546Sopenharmony_ci      Bifrost op, `FRSQ_APPROX.f32` does an implicit `FREXPM.f32.sqrt` on the
bf215546Sopenharmony_ci      source.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src float="true" swizzle="true" absneg="true"/>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="SFU" title="Special function unit" dests="1" opcode="0x9C" unit="SFU">
bf215546Sopenharmony_ci    <ins name="FSIN_TABLE.u6" opcode2="4"/>
bf215546Sopenharmony_ci    <ins name="FCOS_TABLE.u6" opcode2="5"/>
bf215546Sopenharmony_ci    <ins name="FSINCOS_OFFSET.u6" opcode2="6"/>
bf215546Sopenharmony_ci    <ins name="FEXP_TABLE.u4" opcode2="13"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Performs a given special function. The trigonometric tables
bf215546Sopenharmony_ci      (`FSIN_TABLE.u6` and `FCOS_TABLE.u6`) are crude, requiring both an
bf215546Sopenharmony_ci      argument reduction and postprocessing.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="FADD" title="Floating-point add" dests="1" opcode2="0" unit="FMA">
bf215546Sopenharmony_ci    <ins name="FADD.f32" opcode="0xA4"/>
bf215546Sopenharmony_ci    <ins name="FADD.v2f16" opcode="0xA5"/>
bf215546Sopenharmony_ci    <desc>$A + B$</desc>
bf215546Sopenharmony_ci    <clamp/>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="FMIN" title="Floating-point minimum" dests="1" opcode2="2" unit="CVT">
bf215546Sopenharmony_ci    <ins name="FMIN.f32" opcode="0xA4"/>
bf215546Sopenharmony_ci    <ins name="FMIN.v2f16" opcode="0xA5"/>
bf215546Sopenharmony_ci    <desc>$\min \{ A, B \}$</desc>
bf215546Sopenharmony_ci    <clamp/>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="FMAX" title="Floating-point maximum" dests="1" opcode2="3" unit="CVT">
bf215546Sopenharmony_ci    <ins name="FMAX.f32" opcode="0xA4"/>
bf215546Sopenharmony_ci    <ins name="FMAX.v2f16" opcode="0xA5"/>
bf215546Sopenharmony_ci    <desc>$\max \{ A, B \}$</desc>
bf215546Sopenharmony_ci    <clamp/>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="V2F32_TO_V2F16" title="Vectorized floating-point conversion" dests="1" opcode2="4" unit="CVT">
bf215546Sopenharmony_ci    <ins name="V2F32_TO_V2F16" opcode="0xA5"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Given a pair of 32-bit floats, output a pair of 16-bit floats packed into
bf215546Sopenharmony_ci      a 32-bit destination.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <clamp/>
bf215546Sopenharmony_ci    <roundmode/>
bf215546Sopenharmony_ci    <src absneg="true">A</src>
bf215546Sopenharmony_ci    <src absneg="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="LDEXP" title="Floating-point rescaling" dests="1" opcode2="6" unit="FMA">
bf215546Sopenharmony_ci    <ins name="LDEXP.f32" opcode="0xA4"/>
bf215546Sopenharmony_ci    <ins name="LDEXP.v2f16" opcode="0xA5"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Computes $A \cdot 2^B$ by adding B to the exponent of A. Used to calculate
bf215546Sopenharmony_ci      various special functions, particularly base-2 exponents. Special case
bf215546Sopenharmony_ci      handling differs from an actual floating-point multiply, so this should
bf215546Sopenharmony_ci      not be used outside fixed instruction sequences.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
bf215546Sopenharmony_ci    <src/>
bf215546Sopenharmony_ci    <roundmode/> <!-- Also has rtna -->
bf215546Sopenharmony_ci    <!-- Also has infinity handling for arctan -->
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="FEXP.f32" title="Floating-point exponent" dests="1" opcode="0xA4" opcode2="8" unit="SFU">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Calculates the base-2 exponent of an argument specified as a 8:24
bf215546Sopenharmony_ci      fixed-point. The original argument is passed as well for correct handling
bf215546Sopenharmony_ci      of special cases.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <clamp/>
bf215546Sopenharmony_ci    <src>Input as 8:24 fixed-point</src>
bf215546Sopenharmony_ci    <src absneg="true">Input as 32-bit float</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="FADD_LSCALE.f32" title="Floating-point add with logarithm scale" dests="1" opcode="0xA4" opcode2="9" unit="FMA">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Performs a floating-point addition specialized for logarithm computation.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <clamp/>
bf215546Sopenharmony_ci    <src absneg="true">A</src>
bf215546Sopenharmony_ci    <src absneg="true">B</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="FATAN_ASSIST.f32" title="ATAN calculation helper" dests="1" opcode="0xA4" opcode2="14" unit="SFU">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Used for `atan2()` implementation. Destination is two 16-bit
bf215546Sopenharmony_ci      values (int and float) for the first form, and a single 32-bit float when
bf215546Sopenharmony_ci      `.second` is set (indicating the FATAN_TABLE.f32 instruction).
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <mod name="second" start="24" size="1"/>
bf215546Sopenharmony_ci    <src>A</src>
bf215546Sopenharmony_ci    <src>B</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="IADD" title="Integer addition" dests="1" opcode2="0" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      $A + B$ with optional saturation.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      As Valhall lacks swizzle instructions, `IADD.v2i16` with zero is the
bf215546Sopenharmony_ci      canonical lowering for swizzles.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <ins name="IADD.u32" opcode="0xA0"/>
bf215546Sopenharmony_ci    <ins name="IADD.v2u16" opcode="0xA1"/>
bf215546Sopenharmony_ci    <ins name="IADD.v4u8" opcode="0xA2"/>
bf215546Sopenharmony_ci    <ins name="IADD.s32" opcode="0xA8"/>
bf215546Sopenharmony_ci    <ins name="IADD.v2s16" opcode="0xA9"/>
bf215546Sopenharmony_ci    <ins name="IADD.v4s8" opcode="0x1A2"/>
bf215546Sopenharmony_ci    <ins name="IADD.u64" opcode="0x1A3"/>
bf215546Sopenharmony_ci    <ins name="IADD.s64" opcode="0x1AB"/>
bf215546Sopenharmony_ci    <!-- <ins name="IADD.s32" opcode="0x1A0"/> -->
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src widen="true">B</src>
bf215546Sopenharmony_ci    <saturate/>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="MKVEC.v2i16" title="Make 16-bit vector" dests="1" opcode="0xA1" opcode2="0x5" unit="CVT">
bf215546Sopenharmony_ci    <desc>Calculates $A | (B \ll 16)$. Used to implement `(ushort2)(A, B)`</desc>
bf215546Sopenharmony_ci    <src swizzle="true">A</src>
bf215546Sopenharmony_ci    <src swizzle="true">B</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="ISUB" title="Integer subtract" dests="1" opcode2="1" unit="CVT">
bf215546Sopenharmony_ci    <ins name="ISUB.u32" opcode="0xA0"/>
bf215546Sopenharmony_ci    <ins name="ISUB.v2u16" opcode="0xA1"/>
bf215546Sopenharmony_ci    <ins name="ISUB.v4u8" opcode="0xA2"/>
bf215546Sopenharmony_ci    <ins name="ISUB.s32" opcode="0xA8"/>
bf215546Sopenharmony_ci    <ins name="ISUB.v2s16" opcode="0xA9"/>
bf215546Sopenharmony_ci    <ins name="ISUB.v4s8" opcode="0x1A2"/>
bf215546Sopenharmony_ci    <ins name="ISUB.u64" opcode="0x1A3"/>
bf215546Sopenharmony_ci    <ins name="ISUB.s64" opcode="0x1AB"/>
bf215546Sopenharmony_ci    <desc>$A - B$ with optional saturation</desc>
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src widen="true">B</src>
bf215546Sopenharmony_ci    <saturate/>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="SEG_ADD" title="Segment addition" dests="1" opcode2="6" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Similar to SHADDX, but especially used for loading offsets into
bf215546Sopenharmony_ci      WLS. Usually this is only required for atomic operations, which cannot
bf215546Sopenharmony_ci      directly use wls_pointer as an address.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      .neg indicates SEG_SUB instead.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <ins name="SEG_ADD.u64" opcode="0x1A3"/>
bf215546Sopenharmony_ci    <mod name="neg" start="38" size="1"/>
bf215546Sopenharmony_ci    <mod name="preserve_null" start="39" size="1"/>
bf215546Sopenharmony_ci    <src>A</src>
bf215546Sopenharmony_ci    <src widen="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="SHADDX" title="Shift, extend, and 64-bit add" dests="1" opcode2="7" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Sign or zero extend B to 64-bits, left-shift by `shift`, and add the
bf215546Sopenharmony_ci      64-bit value A. These instructions accelerate address arithmetic, but may
bf215546Sopenharmony_ci      be used in full generality for 64-bit integer arithmetic.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <ins name="SHADDX.u64" opcode="0x1A3"/>
bf215546Sopenharmony_ci    <ins name="SHADDX.s64" opcode="0x1AB"/>
bf215546Sopenharmony_ci    <imm name="shift" start="20" size="3"/>
bf215546Sopenharmony_ci    <src>A</src>
bf215546Sopenharmony_ci    <src widen="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="IMUL" title="Integer multiply" dests="1" opcode2="0x0A" unit="SFU">
bf215546Sopenharmony_ci    <ins name="IMUL.i32" opcode="0xA0"/>
bf215546Sopenharmony_ci    <ins name="IMUL.v2i16" opcode="0xA1"/>
bf215546Sopenharmony_ci    <ins name="IMUL.v4i8" opcode="0xA2"/>
bf215546Sopenharmony_ci    <ins name="IMUL.s32" opcode="0xA8"/>
bf215546Sopenharmony_ci    <ins name="IMUL.v2s16" opcode="0xA9"/>
bf215546Sopenharmony_ci    <ins name="IMUL.v4s8" opcode="0x1A2"/>
bf215546Sopenharmony_ci    <ins name="IMULD.u64" opcode="0x1A3"/>
bf215546Sopenharmony_ci    <!-- <ins name="IMUL.s32" opcode="0x1A0"/> -->
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      $A \cdot B$ with optional saturation. Note the multipliers can only handle up to
bf215546Sopenharmony_ci      32-bit by 32-bit multiplies. The 64-bit "multiply" acts like IMUL.u32 but
bf215546Sopenharmony_ci      additionally writes the high half of the product to the high half of the
bf215546Sopenharmony_ci      64-bit destination. Along with IADD.u32 and IADD.u64, this allows the
bf215546Sopenharmony_ci      construction of a 64-bit multiply in 5 instructions (6 clocks).
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src widen="true">B</src>
bf215546Sopenharmony_ci    <saturate/>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="HADD" title="Integer half-add" dests="1" opcode2="0x0B" unit="CVT">
bf215546Sopenharmony_ci    <ins name="HADD.u32" opcode="0xA0"/>
bf215546Sopenharmony_ci    <ins name="HADD.v2u16" opcode="0xA1"/>
bf215546Sopenharmony_ci    <ins name="HADD.v4u8" opcode="0xA2"/>
bf215546Sopenharmony_ci    <ins name="HADD.s32" opcode="0xA8"/>
bf215546Sopenharmony_ci    <ins name="HADD.v2s16" opcode="0xA9"/>
bf215546Sopenharmony_ci    <ins name="HADD.v4s8" opcode="0x1A2"/>
bf215546Sopenharmony_ci    <mod name="rhadd" start="30" size="1"/>
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src widen="true">B</src>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      $(A + B) \gg 1$ without intermediate overflow, corresponding to `hadd()` in
bf215546Sopenharmony_ci      OpenCL. With the `.rhadd` modifier set, it instead calculates
bf215546Sopenharmony_ci      $(A + B + 1) \gg 1$ corresponding to `rhadd()` in OpenCL.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="CLPER" title="Cross-lane permute" dests="1" opcode2="0xF" unit="SFU">
bf215546Sopenharmony_ci    <ins name="CLPER.i32" opcode="0xA0"/>
bf215546Sopenharmony_ci    <ins name="CLPER.v2u16" opcode="0xA1"/>
bf215546Sopenharmony_ci    <ins name="CLPER.v4u8" opcode="0xA2"/>
bf215546Sopenharmony_ci    <ins name="CLPER.s32" opcode="0xA8"/>
bf215546Sopenharmony_ci    <ins name="CLPER.v2s16" opcode="0xA9"/>
bf215546Sopenharmony_ci    <ins name="CLPER.v4s8" opcode="0x1A2"/>
bf215546Sopenharmony_ci    <ins name="CLPER.u64" opcode="0x1A3"/>
bf215546Sopenharmony_ci    <ins name="CLPER.s64" opcode="0x1AB"/>
bf215546Sopenharmony_ci    <!-- <ins name="CLPER.s32" opcode="0x1A0"/> -->
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Selects the value of A in the subgroup lane given by B. This implements
bf215546Sopenharmony_ci      subgroup broadcasts. It may be used as a primitive for screen space
bf215546Sopenharmony_ci      derivatives in fragment shaders.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src>A</src>
bf215546Sopenharmony_ci    <src widen="true">B</src>
bf215546Sopenharmony_ci    <subgroup/>
bf215546Sopenharmony_ci    <lane_op/>
bf215546Sopenharmony_ci    <inactive_result/>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="FMA" title="Fused floating-point multiply add" dests="1" unit="FMA">
bf215546Sopenharmony_ci    <ins name="FMA.f32" opcode="0xB2"/>
bf215546Sopenharmony_ci    <ins name="FMA.v2f16" opcode="0xB3"/>
bf215546Sopenharmony_ci    <desc>$A \cdot B + C$</desc>
bf215546Sopenharmony_ci    <clamp/>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">B</src>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">C</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="LSHIFT_AND" title="Left shift and bitwise AND" dests="1" opcode2="0x100" unit="SFU">
bf215546Sopenharmony_ci    <ins name="LSHIFT_AND.i32" opcode="0xB4"/>
bf215546Sopenharmony_ci    <ins name="LSHIFT_AND.v2i16" opcode="0xB5"/>
bf215546Sopenharmony_ci    <ins name="LSHIFT_AND.v4i8" opcode="0xB6"/>
bf215546Sopenharmony_ci    <ins name="LSHIFT_AND.i64" opcode="0x1B7"/>
bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Left shifts its first source by a specified amount and bitwise ANDs it with the
bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <not_result/>
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
bf215546Sopenharmony_ci    <src not="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="RSHIFT_AND" title="Right shift and bitwise AND" dests="1" opcode2="0x000" unit="SFU">
bf215546Sopenharmony_ci    <ins name="RSHIFT_AND.i32" opcode="0xB4"/>
bf215546Sopenharmony_ci    <ins name="RSHIFT_AND.v2i16" opcode="0xB5"/>
bf215546Sopenharmony_ci    <ins name="RSHIFT_AND.v4i8" opcode="0xB6"/>
bf215546Sopenharmony_ci    <ins name="RSHIFT_AND.i64" opcode="0x1B7"/>
bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Right shifts its first source by a specified amount and bitwise ANDs it with the
bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result. If
bf215546Sopenharmony_ci      `signed` is set, the hardware performs an arithmetic right shift; otherwise,
bf215546Sopenharmony_ci      it performs an unsigned right shift.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <mod name="signed" start="34" size="1"/>
bf215546Sopenharmony_ci    <not_result/>
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
bf215546Sopenharmony_ci    <src not="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="LSHIFT_OR" title="Left shift and bitwise OR" dests="1" opcode2="0x101" unit="SFU">
bf215546Sopenharmony_ci    <ins name="LSHIFT_OR.i32" opcode="0xB4"/>
bf215546Sopenharmony_ci    <ins name="LSHIFT_OR.v2i16" opcode="0xB5"/>
bf215546Sopenharmony_ci    <ins name="LSHIFT_OR.v4i8" opcode="0xB6"/>
bf215546Sopenharmony_ci    <ins name="LSHIFT_OR.i64" opcode="0x1B7"/>
bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Left shifts its first source by a specified amount and bitwise ORs it with the
bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <not_result/>
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
bf215546Sopenharmony_ci    <src not="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="RSHIFT_OR" title="Right shift and bitwise OR" dests="1" opcode2="0x001" unit="SFU">
bf215546Sopenharmony_ci    <ins name="RSHIFT_OR.i32" opcode="0xB4"/>
bf215546Sopenharmony_ci    <ins name="RSHIFT_OR.v2i16" opcode="0xB5"/>
bf215546Sopenharmony_ci    <ins name="RSHIFT_OR.v4i8" opcode="0xB6"/>
bf215546Sopenharmony_ci    <ins name="RSHIFT_OR.i64" opcode="0x1B7"/>
bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Right shifts its first source by a specified amount and bitwise ORs it with the
bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result. If
bf215546Sopenharmony_ci      `signed` is set, the hardware performs an arithmetic right shift; otherwise,
bf215546Sopenharmony_ci      it performs an unsigned right shift.
bf215546Sopenharmony_ci   </desc>
bf215546Sopenharmony_ci    <mod name="signed" start="34" size="1"/>
bf215546Sopenharmony_ci    <not_result/>
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
bf215546Sopenharmony_ci    <src not="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="LSHIFT_XOR" title="Left shift and bitwise XOR" dests="1" opcode2="0x102" unit="SFU">
bf215546Sopenharmony_ci    <ins name="LSHIFT_XOR.i32" opcode="0xB4"/>
bf215546Sopenharmony_ci    <ins name="LSHIFT_XOR.v2i16" opcode="0xB5"/>
bf215546Sopenharmony_ci    <ins name="LSHIFT_XOR.v4i8" opcode="0xB6"/>
bf215546Sopenharmony_ci    <ins name="LSHIFT_XOR.i64" opcode="0x1B7"/>
bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Left shifts its first source by a specified amount and bitwise XORs it with the
bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <not_result/>
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
bf215546Sopenharmony_ci    <src not="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="RSHIFT_XOR" title="Right shift and bitwise XOR" dests="1" opcode2="0x002" unit="SFU">
bf215546Sopenharmony_ci    <ins name="RSHIFT_XOR.i32" opcode="0xB4"/>
bf215546Sopenharmony_ci    <ins name="RSHIFT_XOR.v2i16" opcode="0xB5"/>
bf215546Sopenharmony_ci    <ins name="RSHIFT_XOR.v4i8" opcode="0xB6"/>
bf215546Sopenharmony_ci    <ins name="RSHIFT_XOR.i64" opcode="0x1B7"/>
bf215546Sopenharmony_ci    <mod name="left" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Right shifts its first source by a specified amount and bitwise XORs it with the
bf215546Sopenharmony_ci      second source, optionally inverting the second source or the result. If
bf215546Sopenharmony_ci      `signed` is set, the hardware performs an arithmetic right shift; otherwise,
bf215546Sopenharmony_ci      it performs an unsigned right shift.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <mod name="signed" start="34" size="1"/>
bf215546Sopenharmony_ci    <not_result/>
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src lanes="true" size="8">shift</src>
bf215546Sopenharmony_ci    <src not="true">B</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="MUX.i32" title="Mux" dests="1" opcode="0xB8" unit="SFU">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Mux between A and B based on the provided mask. The condition specified
bf215546Sopenharmony_ci      as the `mux` modifier is evaluated on the mask. If true, `A` is chosen,
bf215546Sopenharmony_ci      else `B` is chosen. The `bit` modifier acts bitwise, equivalent to
bf215546Sopenharmony_ci      `bitselect()` in OpenCL, so `MUX.i32.bit A, B, mask` calculates
bf215546Sopenharmony_ci      `(A &amp; mask) | (A &amp; ~mask)`.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <mod name="mux" start="32" size="2"/>
bf215546Sopenharmony_ci    <src>A</src>
bf215546Sopenharmony_ci    <src>B</src>
bf215546Sopenharmony_ci    <src>Mask</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="MUX.v2i16" title="Mux" dests="1" opcode="0xB9" unit="SFU">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Mux between A and B based on the provided mask. The condition specified
bf215546Sopenharmony_ci      as the `mux` modifier is evaluated on the mask. If true, `A` is chosen,
bf215546Sopenharmony_ci      else `B` is chosen. The `bit` modifier acts bitwise, equivalent to
bf215546Sopenharmony_ci      `bitselect()` in OpenCL, so `MUX.i32.bit A, B, mask` calculates
bf215546Sopenharmony_ci      `(A &amp; mask) | (A &amp; ~mask)`.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <mod name="mux" start="32" size="2"/>
bf215546Sopenharmony_ci    <src swizzle="true">A</src>
bf215546Sopenharmony_ci    <src swizzle="true">B</src>
bf215546Sopenharmony_ci    <src swizzle="true">Mask</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="MUX.v4i8" title="Mux" dests="1" opcode="0xBA" unit="SFU">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Mux between A and B based on the provided mask. The condition specified
bf215546Sopenharmony_ci      as the `mux` modifier is evaluated on the mask. If true, `A` is chosen,
bf215546Sopenharmony_ci      else `B` is chosen. The `bit` modifier acts bitwise, equivalent to
bf215546Sopenharmony_ci      `bitselect()` in OpenCL, so `MUX.i32.bit A, B, mask` calculates
bf215546Sopenharmony_ci      `(A &amp; mask) | (A &amp; ~mask)`.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <mod name="mux" start="32" size="2"/>
bf215546Sopenharmony_ci    <src>A</src>
bf215546Sopenharmony_ci    <src>B</src>
bf215546Sopenharmony_ci    <src>Mask</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="CUBE_SSEL" title="Cube S-coordinate select" dests="1" opcode="0xBC" opcode2="0" unit="SFU">
bf215546Sopenharmony_ci    <desc>During a cube map transform, select the S coordinate given a selected face.</desc>
bf215546Sopenharmony_ci    <src absneg="true">Z coordinate as 32-bit floating point</src>
bf215546Sopenharmony_ci    <src absneg="true">X coordinate as 32-bit floating point</src>
bf215546Sopenharmony_ci    <src>Cube face index</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="CUBE_TSEL" title="Cube T-coordinate select" dests="1" opcode="0xBC" opcode2="1" unit="SFU">
bf215546Sopenharmony_ci    <desc>During a cube map transform, select the T coordinate given a selected face.</desc>
bf215546Sopenharmony_ci    <src absneg="true">Y coordinate as 32-bit floating point</src>
bf215546Sopenharmony_ci    <src absneg="true">Z coordinate as 32-bit floating point</src>
bf215546Sopenharmony_ci    <src>Cube face index</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="MKVEC.v2i8" title="Make 8-bit vector" dests="1" opcode="0xBD" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Calculates $A | (B \ll 8) | (CD \ll 16)$ for 8-bit A and B and 16-bit CD.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      To implement `(uchar4) (A, B, C, D)` in full generality, use the sequence
bf215546Sopenharmony_ci      `MKVEC.v2i8 CD, C, D, #0; MKVEC.v2i8 out, A, B, CD`
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      `MKVEC.v2i8` also allows zero extending arbitrary 8-bit lanes. For
bf215546Sopenharmony_ci      example, to extend `r0.b3` to `r1`, use `MKVEC.v2i8 r1, r0.b3, 0x0.b0, 0x0`.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src lane="true">A</src>
bf215546Sopenharmony_ci    <src lane="true">B</src>
bf215546Sopenharmony_ci    <src>CD</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="CUBEFACE1" title="Cube map transform step 1" dests="1" opcode="0xC0" unit="SFU">
bf215546Sopenharmony_ci    <desc>Select the maximum absolute value of its arguments.</desc>
bf215546Sopenharmony_ci    <src absneg="true">X coordinate as 32-bit floating point</src>
bf215546Sopenharmony_ci    <src absneg="true">Y coordinate as 32-bit floating point</src>
bf215546Sopenharmony_ci    <src absneg="true">Z coordinate as 32-bit floating point</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="CUBEFACE2" title="Cube map transform step 2" dests="1" opcode="0xC1" unit="SFU">
bf215546Sopenharmony_ci    <desc>Select the cube face index corresponding to the arguments.</desc>
bf215546Sopenharmony_ci    <src absneg="true">X coordinate as 32-bit floating point</src>
bf215546Sopenharmony_ci    <src absneg="true">Y coordinate as 32-bit floating point</src>
bf215546Sopenharmony_ci    <src absneg="true">Z coordinate as 32-bit floating point</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="IDP" title="8-bit dot product" dests="1" opcode="0xC2" unit="FMA">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      8-bit integer dot product between 4 channel vectors, intended for machine
bf215546Sopenharmony_ci      learning. Available in both unsigned and signed variants, controlling
bf215546Sopenharmony_ci      sign-extension/zero-extension behaviour to the final 32-bit destination.
bf215546Sopenharmony_ci      Saturation is available. Corresponds to the `cl_arm_integer_dot_product_*`
bf215546Sopenharmony_ci      family of OpenCL extensions. Not for actual use, just for completeness.
bf215546Sopenharmony_ci      Instead, use your platform's neural accelerator.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      For $A, B \in \{ 0, \ldots, 255 \}^4$ and $\text{Accumulator} \in
bf215546Sopenharmony_ci      \mathbb{Z}$, calculates $(A \cdot B) + \text{Accumulator}$ and optionally
bf215546Sopenharmony_ci      saturates.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <ins name="IDP.v4s8" opcode2="0"/>
bf215546Sopenharmony_ci    <ins name="IDP.v4u8" opcode2="1"/>
bf215546Sopenharmony_ci    <src>A</src>
bf215546Sopenharmony_ci    <src>B</src>
bf215546Sopenharmony_ci    <src>Accumulator</src>
bf215546Sopenharmony_ci    <saturate/>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="ICMP" title="Unsigned integer compare" dests="1" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Evaluates the given condition, do a logical and/or with the condition in
bf215546Sopenharmony_ci      the result source, and return in the given result type (integer
bf215546Sopenharmony_ci      one, integer minus one, or floating-point one). The third source is useful
bf215546Sopenharmony_ci      for chaining together conditions without intermediate bitwise arithmetic;
bf215546Sopenharmony_ci      when this is not desired, tie it to zero and use the OR combine mode (do
bf215546Sopenharmony_ci      not set the `.and` modifier).
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      The sequence modifier `.seq` is used to construct 64-bit compares in 2
bf215546Sopenharmony_ci      `ICMP.u32` instructions, in conjunction with the `u1` result type on the
bf215546Sopenharmony_ci      low half, the `m1` result type on the high half, and the result of the low
bf215546Sopenharmony_ci      half comparison passed as the third source. For comparisons other than
bf215546Sopenharmony_ci      64-bit, do not set the `.seq` modifier and do not use the `u1` result
bf215546Sopenharmony_ci      type.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <ins name="ICMP.u32" opcode="0xF0"/>
bf215546Sopenharmony_ci    <ins name="ICMP.v2u16" opcode="0xF1"/>
bf215546Sopenharmony_ci    <ins name="ICMP.v4u8" opcode="0xF2"/>
bf215546Sopenharmony_ci    <cmp/>
bf215546Sopenharmony_ci    <result_type/>
bf215546Sopenharmony_ci    <mod name="and" start="24" size="1"/>
bf215546Sopenharmony_ci    <mod name="seq" start="25" size="1"/>
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src widen="true">B</src>
bf215546Sopenharmony_ci    <src>C</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="FCMP" title="Floating-point compare" dests="1" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Evaluates the given condition, do a logical and/or with the condition in
bf215546Sopenharmony_ci      the result source, and return in the given result type (integer
bf215546Sopenharmony_ci      one, integer minus one, or floating-point one). The third source is useful
bf215546Sopenharmony_ci      for chaining together conditions without intermediate bitwise arithmetic;
bf215546Sopenharmony_ci      when this is not desired, tie it to zero and use the OR combine mode (do
bf215546Sopenharmony_ci      not set the `.and` modifier).
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <ins name="FCMP.f32" opcode="0xF4"/>
bf215546Sopenharmony_ci    <ins name="FCMP.v2f16" opcode="0xF5"/>
bf215546Sopenharmony_ci    <cmp/>
bf215546Sopenharmony_ci    <result_type/>
bf215546Sopenharmony_ci    <mod name="and" start="24" size="1"/>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">A</src>
bf215546Sopenharmony_ci    <src absneg="true" swizzle="true">B</src>
bf215546Sopenharmony_ci    <src>C</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <group name="ICMP" title="Signed integer compare" dests="1" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Evaluates the given condition, do a logical and/or with the condition in
bf215546Sopenharmony_ci      the result source, and return in the given result type (integer
bf215546Sopenharmony_ci      one, integer minus one, or floating-point one). The third source is useful
bf215546Sopenharmony_ci      for chaining together conditions without intermediate bitwise arithmetic;
bf215546Sopenharmony_ci      when this is not desired, tie it to zero and use the OR combine mode (do
bf215546Sopenharmony_ci      not set the `.and` modifier).
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      The sequence modifier `.seq` is used to construct signed 64-bit compares
bf215546Sopenharmony_ci      in 1 `ICMP.u32` and 1 `ICMP.s32` instruction, in conjunction with the `u1`
bf215546Sopenharmony_ci      result type on the low half, the `m1` result type on the high half, and
bf215546Sopenharmony_ci      the result of the low half comparison passed as the third source. For
bf215546Sopenharmony_ci      comparisons other than 64-bit, do not set the `.seq` modifier and do not
bf215546Sopenharmony_ci      use the `u1` result type.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <ins name="ICMP.s32" opcode="0xF8"/>
bf215546Sopenharmony_ci    <ins name="ICMP.v2s16" opcode="0xF9"/>
bf215546Sopenharmony_ci    <ins name="ICMP.v4s8" opcode="0xFA"/>
bf215546Sopenharmony_ci    <cmp/>
bf215546Sopenharmony_ci    <result_type/>
bf215546Sopenharmony_ci    <mod name="and" start="24" size="1"/>
bf215546Sopenharmony_ci    <mod name="seq" start="25" size="1"/>
bf215546Sopenharmony_ci    <src widen="true">A</src>
bf215546Sopenharmony_ci    <src widen="true">B</src>
bf215546Sopenharmony_ci    <src>C</src>
bf215546Sopenharmony_ci  </group>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="IADD_IMM.i32" title="Integer addition with immediate" dests="1" opcode="0x110" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Adds an arbitrary 32-bit immediate embedded within the instruction stream.
bf215546Sopenharmony_ci      If no modifiers are required, this is preferred to `IADD.i32` with a
bf215546Sopenharmony_ci      constant accessed as a uniform. However, if the constant is available
bf215546Sopenharmony_ci      inline, `IADD.f32` is preferred.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      `IADD_IMM.i32` with the source tied to zero is the canonical immediate move.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src>A</src>
bf215546Sopenharmony_ci    <imm name="constant" start="8" size="32"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="IADD_IMM.v2i16" title="Integer addition with immediate" dests="1" opcode="0x111" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Adds an arbitrary pair of 16-bit immediates embedded within the
bf215546Sopenharmony_ci      instruction stream. If no modifiers are required, this is preferred to
bf215546Sopenharmony_ci      `IADD.v2i16` with a constant accessed as a uniform. However, if the
bf215546Sopenharmony_ci      constant is available inline, `IADD.v2i16` is preferred. Adding only a
bf215546Sopenharmony_ci      single 16-bit constant requires replication of the constant.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src>A</src>
bf215546Sopenharmony_ci    <imm name="constant" start="8" size="32"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="IADD_IMM.v4i8" title="Integer addition with immediate" dests="1" opcode="0x112" unit="CVT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Adds an arbitrary quad of 8-bit immediates embedded within the
bf215546Sopenharmony_ci      instruction stream. If no modifiers are required, this is preferred to
bf215546Sopenharmony_ci      `IADD.v4i8` with a constant accessed as a uniform. However, if the
bf215546Sopenharmony_ci      constant is available inline, `IADD.v4i8` is preferred. Adding only a
bf215546Sopenharmony_ci      single 8-bit constant requires replication of the constant.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src>A</src>
bf215546Sopenharmony_ci    <imm name="constant" start="8" size="32"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="FADD_IMM.f32" title="Floating-point addition with immediate" dests="1" opcode="0x114" unit="FMA">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Adds an arbitrary 32-bit immediate embedded within the instruction stream.
bf215546Sopenharmony_ci      If no modifiers are required, this is preferred to `FADD.f32` with a
bf215546Sopenharmony_ci      constant accessed as a uniform. However, if the constant is available
bf215546Sopenharmony_ci      inline, `FADD.f32` is preferred.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src>A</src>
bf215546Sopenharmony_ci    <imm name="constant" start="8" size="32"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="FADD_IMM.v2f16" title="Floating-point addition with immediate" dests="1" opcode="0x115" unit="FMA">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Adds an arbitrary pair of 16-bit immediates embedded within the
bf215546Sopenharmony_ci      instruction stream. If no modifiers are required, this is preferred to
bf215546Sopenharmony_ci      `FADD.v2f16` with a constant accessed as a uniform. However, if the
bf215546Sopenharmony_ci      constant is available inline, `FADD.v2f16` is preferred. Adding only a
bf215546Sopenharmony_ci      single 16-bit constant requires replication of the constant.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <src float="true">A</src>
bf215546Sopenharmony_ci    <imm name="constant" start="8" size="32"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="ATOM1_RETURN.i32" title="Atomic operations on memory with 1" opcode="0x69" opcode2="3" unit="LS">
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <atom_opc_1/>
bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <!-- Optional for ATOM1.i32, in which sr_count must be 0 -->
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="ATOM1_RETURN.i64" title="Atomic operations on memory with 1" opcode="0x69" opcode2="5" unit="LS">
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <atom_opc_1/>
bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <!-- Optional for ATOM1.i64, in which sr_count must be 0 -->
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="ATOM.i32" title="Atomic operations on memory" opcode="0x68" opcode2="3" unit="LS">
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <atom_opc/>
bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr read="true"/>
bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="ATOM.i64" title="Atomic operations on memory" opcode="0x68" opcode2="5" unit="LS">
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <atom_opc/>
bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr read="true"/>
bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="ATOM_RETURN.i32" title="Atomic operations on memory" opcode="0x120" opcode2="3" unit="LS">
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <!-- Only valid with .xchg to implement ACMPXCHG -->
bf215546Sopenharmony_ci    <mod name="compare" start="26" size="1"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <atom_opc/>
bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
bf215546Sopenharmony_ci    <sr read="true" flags="rw"/>
bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="ATOM_RETURN.i64" title="Atomic operations on memory" opcode="0x120" opcode2="5" unit="LS">
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci    <mod name="compare" start="26" size="1"/>
bf215546Sopenharmony_ci    <atom_opc/>
bf215546Sopenharmony_ci    <mod name="memory_width" start="128" size="1" implied="true"/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
bf215546Sopenharmony_ci    <sr read="true" flags="rw"/>
bf215546Sopenharmony_ci    <src size="64">64-bit address to operate on</src>
bf215546Sopenharmony_ci    <imm name="offset" start="8" size="8"/>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="TEX_FETCH" title="Texel fetch" opcode="0x125" unit="T">
bf215546Sopenharmony_ci    <desc>Unfiltered textured instruction.</desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <register_width/>
bf215546Sopenharmony_ci    <write_mask/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <wide_indices/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <texel_offset/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <!-- Leave secondary_register_width as 0 -->
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
bf215546Sopenharmony_ci    <sr read="true" flags="false"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="TEX_SINGLE" title="Texture load" opcode="0x128" unit="T">
bf215546Sopenharmony_ci    <desc>Ordinary texturing instruction using a sampler.</desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <register_width/>
bf215546Sopenharmony_ci    <write_mask/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <wide_indices/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <texel_offset/>
bf215546Sopenharmony_ci    <shadow/>
bf215546Sopenharmony_ci    <lod_mode/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <!-- Leave secondary_register_width as 0 -->
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
bf215546Sopenharmony_ci    <sr read="true" flags="false"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="TEX_GATHER" title="Texel gather" opcode="0x129" unit="T">
bf215546Sopenharmony_ci    <desc>Texture gather instruction.</desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <register_width/>
bf215546Sopenharmony_ci    <write_mask/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <wide_indices/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <texel_offset/>
bf215546Sopenharmony_ci    <integer_coordinates/>
bf215546Sopenharmony_ci    <fetch_component/>
bf215546Sopenharmony_ci    <shadow/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <!-- Leave secondary_register_width as 0 -->
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
bf215546Sopenharmony_ci    <sr read="true" flags="false"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="TEX_DUAL" title="Dual texture" opcode="0x12F" unit="T">
bf215546Sopenharmony_ci    <desc>Pair of texture instructions.</desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <register_width/>
bf215546Sopenharmony_ci    <secondary_register_width/>
bf215546Sopenharmony_ci    <write_mask/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <wide_indices/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <texel_offset/>
bf215546Sopenharmony_ci    <shadow/>
bf215546Sopenharmony_ci    <lod_mode/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr_count/>
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true" flags="false"/>
bf215546Sopenharmony_ci    <sr read="true" flags="false"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="VAR_TEX_BUF_SINGLE" title="Fused varying-texturing" opcode="0x130" unit="VT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
bf215546Sopenharmony_ci      to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <sample_and_update/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <vartex_register_width/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <shadow/>
bf215546Sopenharmony_ci    <lod_mode/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci    <src>Varying offset</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="VAR_TEX_BUF_GATHER" title="Fused varying-texturing" opcode="0x131" unit="VT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
bf215546Sopenharmony_ci      to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <sample_and_update/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <vartex_register_width/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <integer_coordinates/>
bf215546Sopenharmony_ci    <fetch_component/>
bf215546Sopenharmony_ci    <shadow/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci    <src>Varying offset</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="VAR_TEX_BUF_GRADIENT" title="Fused varying-texturing" opcode="0x132" unit="VT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
bf215546Sopenharmony_ci      to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <sample_and_update/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <vartex_register_width/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <shadow/>
bf215546Sopenharmony_ci    <lod_bias_disable/>
bf215546Sopenharmony_ci    <lod_clamp_disable/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci    <src>Varying offset</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="VAR_TEX_BUF_DUAL" title="Fused varying-texturing" opcode="0x137" unit="VT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
bf215546Sopenharmony_ci      to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX_DUAL, using both V and T units.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <sample_and_update/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <vartex_register_width/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <shadow/>
bf215546Sopenharmony_ci    <lod_mode/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci    <src>Varying offset</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="VAR_TEX_SINGLE" title="Fused varying-texturing" opcode="0x138" unit="VT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
bf215546Sopenharmony_ci      to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <sample_and_update/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <vartex_register_width/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <shadow/>
bf215546Sopenharmony_ci    <lod_mode/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci    <src>Varying offset</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="VAR_TEX_GATHER" title="Fused varying-texturing" opcode="0x139" unit="VT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
bf215546Sopenharmony_ci      to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <sample_and_update/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <vartex_register_width/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <integer_coordinates/>
bf215546Sopenharmony_ci    <fetch_component/>
bf215546Sopenharmony_ci    <shadow/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci    <src>Varying offset</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="VAR_TEX_GRADIENT" title="Fused varying-texturing" opcode="0x13A" unit="VT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
bf215546Sopenharmony_ci      to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <sample_and_update/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <vartex_register_width/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <shadow/>
bf215546Sopenharmony_ci    <lod_bias_disable/>
bf215546Sopenharmony_ci    <lod_clamp_disable/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci    <src>Varying offset</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="VAR_TEX_DUAL" title="Fused varying-texturing" opcode="0x13F" unit="VT">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      Only works for FP32 varyings. Performance characteristics are similar
bf215546Sopenharmony_ci      to LD_VAR_IMM_F32.v2.f32 followed by TEX_DUAL, using both V and T units.
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <slot/>
bf215546Sopenharmony_ci    <skip/>
bf215546Sopenharmony_ci    <sample_and_update/>
bf215546Sopenharmony_ci    <register_type/>
bf215546Sopenharmony_ci    <vartex_register_width/>
bf215546Sopenharmony_ci    <dimension/>
bf215546Sopenharmony_ci    <array_enable/>
bf215546Sopenharmony_ci    <shadow/>
bf215546Sopenharmony_ci    <lod_mode/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr_write_count/>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    <sr write="true"/>
bf215546Sopenharmony_ci    <src size="64">Image to read from</src>
bf215546Sopenharmony_ci    <src>Varying offset</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="FMA_RSCALE.f32" title="Fused floating-point multiply add with exponent bias" dests="1" opcode="0x160" unit="FMA">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      First calculates $A \cdot B + C$ and then biases the exponent by D. Used in
bf215546Sopenharmony_ci      special transcendental function sequences. It should not be used for
bf215546Sopenharmony_ci      general code as its special case handling differs from two back-to-back
bf215546Sopenharmony_ci      `FMA.f32` operations. Equivalent to `FMA.f32` back-to-back with
bf215546Sopenharmony_ci      `LDEXP.f32`
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <clamp/>
bf215546Sopenharmony_ci    <src absneg="true">A</src>
bf215546Sopenharmony_ci    <src absneg="true">B</src>
bf215546Sopenharmony_ci    <src absneg="true">C</src>
bf215546Sopenharmony_ci    <src>D</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="FMA_RSCALE_N.f32" title="Fused floating-point multiply add with exponent bias and zero override" dests="1" opcode="0x161" unit="FMA">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      First calculates $A \cdot B + C$ and then biases the exponent by D. If $A
bf215546Sopenharmony_ci      = 0$ or $B = 0$, the multiply $A \cdot B$ is treated as zero even if an
bf215546Sopenharmony_ci      ordinary multiply would return NaN. Used in special transcendental
bf215546Sopenharmony_ci      function sequences. It should not be used for general code as its special
bf215546Sopenharmony_ci      case handling differs from two back-to-back `FMA.f32` operations.
bf215546Sopenharmony_ci      Equivalent to `FMA.f32` back-to-back with `LDEXP.f32`
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <clamp/>
bf215546Sopenharmony_ci    <src absneg="true">A</src>
bf215546Sopenharmony_ci    <src absneg="true">B</src>
bf215546Sopenharmony_ci    <src absneg="true">C</src>
bf215546Sopenharmony_ci    <src>D</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="FMA_RSCALE_LEFT.f32" title="Fused floating-point multiply add with exponent bias and asymmetric zero handling" dests="1" opcode="0x162" unit="FMA">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      First calculates $A \cdot B + C$ and then biases the exponent by D. If $A
bf215546Sopenharmony_ci      = 0$ or $B = 0$, the multiply is treated as $A$ even if an
bf215546Sopenharmony_ci      ordinary multiply would return NaN. Used in special transcendental
bf215546Sopenharmony_ci      function sequences. It should not be used for general code as its special
bf215546Sopenharmony_ci      case handling differs from two back-to-back `FMA.f32` operations.
bf215546Sopenharmony_ci      Equivalent to `FMA.f32` back-to-back with `LDEXP.f32`
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <clamp/>
bf215546Sopenharmony_ci    <src absneg="true">A</src>
bf215546Sopenharmony_ci    <src absneg="true">B</src>
bf215546Sopenharmony_ci    <src absneg="true">C</src>
bf215546Sopenharmony_ci    <src>D</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci  <ins name="FMA_RSCALE_SCALE16.f32" title="Fused floating-point multiply add with 16-bit exponent bias" dests="1" opcode="0x163" unit="FMA">
bf215546Sopenharmony_ci    <desc>
bf215546Sopenharmony_ci      First calculates $A \cdot B + C$ and then biases the exponent by D,
bf215546Sopenharmony_ci      interpreted as a 16-bit value. Used in special transcendental function
bf215546Sopenharmony_ci      sequences. It should not be used for general code as its special case
bf215546Sopenharmony_ci      handling differs from two back-to-back `FMA.f32` operations.  Equivalent
bf215546Sopenharmony_ci      to `FMA.f32` back-to-back with `LDEXP.f32`
bf215546Sopenharmony_ci    </desc>
bf215546Sopenharmony_ci    <clamp/>
bf215546Sopenharmony_ci    <src absneg="true">A</src>
bf215546Sopenharmony_ci    <src absneg="true">B</src>
bf215546Sopenharmony_ci    <src absneg="true">C</src>
bf215546Sopenharmony_ci    <src>D</src>
bf215546Sopenharmony_ci  </ins>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci</valhall>