1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2016 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir.h" 25bf215546Sopenharmony_ci#include "nir_builder.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#define COND_LOWER_OP(b, name, ...) \ 28bf215546Sopenharmony_ci (b->shader->options->lower_int64_options & \ 29bf215546Sopenharmony_ci nir_lower_int64_op_to_options_mask(nir_op_##name)) ? \ 30bf215546Sopenharmony_ci lower_##name##64(b, __VA_ARGS__) : nir_##name(b, __VA_ARGS__) 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#define COND_LOWER_CMP(b, name, ...) \ 33bf215546Sopenharmony_ci (b->shader->options->lower_int64_options & \ 34bf215546Sopenharmony_ci nir_lower_int64_op_to_options_mask(nir_op_##name)) ? \ 35bf215546Sopenharmony_ci lower_int64_compare(b, nir_op_##name, __VA_ARGS__) : \ 36bf215546Sopenharmony_ci nir_##name(b, __VA_ARGS__) 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#define COND_LOWER_CAST(b, name, ...) \ 39bf215546Sopenharmony_ci (b->shader->options->lower_int64_options & \ 40bf215546Sopenharmony_ci nir_lower_int64_op_to_options_mask(nir_op_##name)) ? \ 41bf215546Sopenharmony_ci lower_##name(b, __VA_ARGS__) : \ 42bf215546Sopenharmony_ci nir_##name(b, __VA_ARGS__) 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_cistatic nir_ssa_def * 45bf215546Sopenharmony_cilower_b2i64(nir_builder *b, nir_ssa_def *x) 46bf215546Sopenharmony_ci{ 47bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, nir_b2i32(b, x), nir_imm_int(b, 0)); 48bf215546Sopenharmony_ci} 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_cistatic nir_ssa_def * 51bf215546Sopenharmony_cilower_i2b(nir_builder *b, nir_ssa_def *x) 52bf215546Sopenharmony_ci{ 53bf215546Sopenharmony_ci return nir_ine(b, nir_ior(b, nir_unpack_64_2x32_split_x(b, x), 54bf215546Sopenharmony_ci nir_unpack_64_2x32_split_y(b, x)), 55bf215546Sopenharmony_ci nir_imm_int(b, 0)); 56bf215546Sopenharmony_ci} 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_cistatic nir_ssa_def * 59bf215546Sopenharmony_cilower_i2i8(nir_builder *b, nir_ssa_def *x) 60bf215546Sopenharmony_ci{ 61bf215546Sopenharmony_ci return nir_i2i8(b, nir_unpack_64_2x32_split_x(b, x)); 62bf215546Sopenharmony_ci} 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_cistatic nir_ssa_def * 65bf215546Sopenharmony_cilower_i2i16(nir_builder *b, nir_ssa_def *x) 66bf215546Sopenharmony_ci{ 67bf215546Sopenharmony_ci return nir_i2i16(b, nir_unpack_64_2x32_split_x(b, x)); 68bf215546Sopenharmony_ci} 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_cistatic nir_ssa_def * 72bf215546Sopenharmony_cilower_i2i32(nir_builder *b, nir_ssa_def *x) 73bf215546Sopenharmony_ci{ 74bf215546Sopenharmony_ci return nir_unpack_64_2x32_split_x(b, x); 75bf215546Sopenharmony_ci} 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_cistatic nir_ssa_def * 78bf215546Sopenharmony_cilower_i2i64(nir_builder *b, nir_ssa_def *x) 79bf215546Sopenharmony_ci{ 80bf215546Sopenharmony_ci nir_ssa_def *x32 = x->bit_size == 32 ? x : nir_i2i32(b, x); 81bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, x32, nir_ishr_imm(b, x32, 31)); 82bf215546Sopenharmony_ci} 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_cistatic nir_ssa_def * 85bf215546Sopenharmony_cilower_u2u8(nir_builder *b, nir_ssa_def *x) 86bf215546Sopenharmony_ci{ 87bf215546Sopenharmony_ci return nir_u2u8(b, nir_unpack_64_2x32_split_x(b, x)); 88bf215546Sopenharmony_ci} 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_cistatic nir_ssa_def * 91bf215546Sopenharmony_cilower_u2u16(nir_builder *b, nir_ssa_def *x) 92bf215546Sopenharmony_ci{ 93bf215546Sopenharmony_ci return nir_u2u16(b, nir_unpack_64_2x32_split_x(b, x)); 94bf215546Sopenharmony_ci} 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_cistatic nir_ssa_def * 97bf215546Sopenharmony_cilower_u2u32(nir_builder *b, nir_ssa_def *x) 98bf215546Sopenharmony_ci{ 99bf215546Sopenharmony_ci return nir_unpack_64_2x32_split_x(b, x); 100bf215546Sopenharmony_ci} 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_cistatic nir_ssa_def * 103bf215546Sopenharmony_cilower_u2u64(nir_builder *b, nir_ssa_def *x) 104bf215546Sopenharmony_ci{ 105bf215546Sopenharmony_ci nir_ssa_def *x32 = x->bit_size == 32 ? x : nir_u2u32(b, x); 106bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, x32, nir_imm_int(b, 0)); 107bf215546Sopenharmony_ci} 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_cistatic nir_ssa_def * 110bf215546Sopenharmony_cilower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x, nir_ssa_def *y) 111bf215546Sopenharmony_ci{ 112bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 113bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 114bf215546Sopenharmony_ci nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); 115bf215546Sopenharmony_ci nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo), 118bf215546Sopenharmony_ci nir_bcsel(b, cond, x_hi, y_hi)); 119bf215546Sopenharmony_ci} 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_cistatic nir_ssa_def * 122bf215546Sopenharmony_cilower_inot64(nir_builder *b, nir_ssa_def *x) 123bf215546Sopenharmony_ci{ 124bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 125bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b, x_hi)); 128bf215546Sopenharmony_ci} 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_cistatic nir_ssa_def * 131bf215546Sopenharmony_cilower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 132bf215546Sopenharmony_ci{ 133bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 134bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 135bf215546Sopenharmony_ci nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); 136bf215546Sopenharmony_ci nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo), 139bf215546Sopenharmony_ci nir_iand(b, x_hi, y_hi)); 140bf215546Sopenharmony_ci} 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_cistatic nir_ssa_def * 143bf215546Sopenharmony_cilower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 144bf215546Sopenharmony_ci{ 145bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 146bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 147bf215546Sopenharmony_ci nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); 148bf215546Sopenharmony_ci nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo), 151bf215546Sopenharmony_ci nir_ior(b, x_hi, y_hi)); 152bf215546Sopenharmony_ci} 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_cistatic nir_ssa_def * 155bf215546Sopenharmony_cilower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 156bf215546Sopenharmony_ci{ 157bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 158bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 159bf215546Sopenharmony_ci nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); 160bf215546Sopenharmony_ci nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo), 163bf215546Sopenharmony_ci nir_ixor(b, x_hi, y_hi)); 164bf215546Sopenharmony_ci} 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_cistatic nir_ssa_def * 167bf215546Sopenharmony_cilower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 168bf215546Sopenharmony_ci{ 169bf215546Sopenharmony_ci /* Implemented as 170bf215546Sopenharmony_ci * 171bf215546Sopenharmony_ci * uint64_t lshift(uint64_t x, int c) 172bf215546Sopenharmony_ci * { 173bf215546Sopenharmony_ci * if (c == 0) return x; 174bf215546Sopenharmony_ci * 175bf215546Sopenharmony_ci * uint32_t lo = LO(x), hi = HI(x); 176bf215546Sopenharmony_ci * 177bf215546Sopenharmony_ci * if (c < 32) { 178bf215546Sopenharmony_ci * uint32_t lo_shifted = lo << c; 179bf215546Sopenharmony_ci * uint32_t hi_shifted = hi << c; 180bf215546Sopenharmony_ci * uint32_t lo_shifted_hi = lo >> abs(32 - c); 181bf215546Sopenharmony_ci * return pack_64(lo_shifted, hi_shifted | lo_shifted_hi); 182bf215546Sopenharmony_ci * } else { 183bf215546Sopenharmony_ci * uint32_t lo_shifted_hi = lo << abs(32 - c); 184bf215546Sopenharmony_ci * return pack_64(0, lo_shifted_hi); 185bf215546Sopenharmony_ci * } 186bf215546Sopenharmony_ci * } 187bf215546Sopenharmony_ci */ 188bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 189bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32))); 192bf215546Sopenharmony_ci nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y); 193bf215546Sopenharmony_ci nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y); 194bf215546Sopenharmony_ci nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count); 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci nir_ssa_def *res_if_lt_32 = 197bf215546Sopenharmony_ci nir_pack_64_2x32_split(b, lo_shifted, 198bf215546Sopenharmony_ci nir_ior(b, hi_shifted, lo_shifted_hi)); 199bf215546Sopenharmony_ci nir_ssa_def *res_if_ge_32 = 200bf215546Sopenharmony_ci nir_pack_64_2x32_split(b, nir_imm_int(b, 0), 201bf215546Sopenharmony_ci nir_ishl(b, x_lo, reverse_count)); 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci return nir_bcsel(b, nir_ieq_imm(b, y, 0), x, 204bf215546Sopenharmony_ci nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)), 205bf215546Sopenharmony_ci res_if_ge_32, res_if_lt_32)); 206bf215546Sopenharmony_ci} 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_cistatic nir_ssa_def * 209bf215546Sopenharmony_cilower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 210bf215546Sopenharmony_ci{ 211bf215546Sopenharmony_ci /* Implemented as 212bf215546Sopenharmony_ci * 213bf215546Sopenharmony_ci * uint64_t arshift(uint64_t x, int c) 214bf215546Sopenharmony_ci * { 215bf215546Sopenharmony_ci * if (c == 0) return x; 216bf215546Sopenharmony_ci * 217bf215546Sopenharmony_ci * uint32_t lo = LO(x); 218bf215546Sopenharmony_ci * int32_t hi = HI(x); 219bf215546Sopenharmony_ci * 220bf215546Sopenharmony_ci * if (c < 32) { 221bf215546Sopenharmony_ci * uint32_t lo_shifted = lo >> c; 222bf215546Sopenharmony_ci * uint32_t hi_shifted = hi >> c; 223bf215546Sopenharmony_ci * uint32_t hi_shifted_lo = hi << abs(32 - c); 224bf215546Sopenharmony_ci * return pack_64(hi_shifted, hi_shifted_lo | lo_shifted); 225bf215546Sopenharmony_ci * } else { 226bf215546Sopenharmony_ci * uint32_t hi_shifted = hi >> 31; 227bf215546Sopenharmony_ci * uint32_t hi_shifted_lo = hi >> abs(32 - c); 228bf215546Sopenharmony_ci * return pack_64(hi_shifted, hi_shifted_lo); 229bf215546Sopenharmony_ci * } 230bf215546Sopenharmony_ci * } 231bf215546Sopenharmony_ci */ 232bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 233bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32))); 236bf215546Sopenharmony_ci nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y); 237bf215546Sopenharmony_ci nir_ssa_def *hi_shifted = nir_ishr(b, x_hi, y); 238bf215546Sopenharmony_ci nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count); 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci nir_ssa_def *res_if_lt_32 = 241bf215546Sopenharmony_ci nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo), 242bf215546Sopenharmony_ci hi_shifted); 243bf215546Sopenharmony_ci nir_ssa_def *res_if_ge_32 = 244bf215546Sopenharmony_ci nir_pack_64_2x32_split(b, nir_ishr(b, x_hi, reverse_count), 245bf215546Sopenharmony_ci nir_ishr(b, x_hi, nir_imm_int(b, 31))); 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci return nir_bcsel(b, nir_ieq_imm(b, y, 0), x, 248bf215546Sopenharmony_ci nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)), 249bf215546Sopenharmony_ci res_if_ge_32, res_if_lt_32)); 250bf215546Sopenharmony_ci} 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_cistatic nir_ssa_def * 253bf215546Sopenharmony_cilower_ushr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 254bf215546Sopenharmony_ci{ 255bf215546Sopenharmony_ci /* Implemented as 256bf215546Sopenharmony_ci * 257bf215546Sopenharmony_ci * uint64_t rshift(uint64_t x, int c) 258bf215546Sopenharmony_ci * { 259bf215546Sopenharmony_ci * if (c == 0) return x; 260bf215546Sopenharmony_ci * 261bf215546Sopenharmony_ci * uint32_t lo = LO(x), hi = HI(x); 262bf215546Sopenharmony_ci * 263bf215546Sopenharmony_ci * if (c < 32) { 264bf215546Sopenharmony_ci * uint32_t lo_shifted = lo >> c; 265bf215546Sopenharmony_ci * uint32_t hi_shifted = hi >> c; 266bf215546Sopenharmony_ci * uint32_t hi_shifted_lo = hi << abs(32 - c); 267bf215546Sopenharmony_ci * return pack_64(hi_shifted, hi_shifted_lo | lo_shifted); 268bf215546Sopenharmony_ci * } else { 269bf215546Sopenharmony_ci * uint32_t hi_shifted_lo = hi >> abs(32 - c); 270bf215546Sopenharmony_ci * return pack_64(0, hi_shifted_lo); 271bf215546Sopenharmony_ci * } 272bf215546Sopenharmony_ci * } 273bf215546Sopenharmony_ci */ 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 276bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32))); 279bf215546Sopenharmony_ci nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y); 280bf215546Sopenharmony_ci nir_ssa_def *hi_shifted = nir_ushr(b, x_hi, y); 281bf215546Sopenharmony_ci nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count); 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci nir_ssa_def *res_if_lt_32 = 284bf215546Sopenharmony_ci nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo), 285bf215546Sopenharmony_ci hi_shifted); 286bf215546Sopenharmony_ci nir_ssa_def *res_if_ge_32 = 287bf215546Sopenharmony_ci nir_pack_64_2x32_split(b, nir_ushr(b, x_hi, reverse_count), 288bf215546Sopenharmony_ci nir_imm_int(b, 0)); 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_ci return nir_bcsel(b, nir_ieq_imm(b, y, 0), x, 291bf215546Sopenharmony_ci nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)), 292bf215546Sopenharmony_ci res_if_ge_32, res_if_lt_32)); 293bf215546Sopenharmony_ci} 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_cistatic nir_ssa_def * 296bf215546Sopenharmony_cilower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 297bf215546Sopenharmony_ci{ 298bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 299bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 300bf215546Sopenharmony_ci nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); 301bf215546Sopenharmony_ci nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo); 304bf215546Sopenharmony_ci nir_ssa_def *carry = nir_b2i32(b, nir_ult(b, res_lo, x_lo)); 305bf215546Sopenharmony_ci nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi)); 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, res_lo, res_hi); 308bf215546Sopenharmony_ci} 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_cistatic nir_ssa_def * 311bf215546Sopenharmony_cilower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 312bf215546Sopenharmony_ci{ 313bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 314bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 315bf215546Sopenharmony_ci nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); 316bf215546Sopenharmony_ci nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo); 319bf215546Sopenharmony_ci nir_ssa_def *borrow = nir_ineg(b, nir_b2i32(b, nir_ult(b, x_lo, y_lo))); 320bf215546Sopenharmony_ci nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow); 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, res_lo, res_hi); 323bf215546Sopenharmony_ci} 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_cistatic nir_ssa_def * 326bf215546Sopenharmony_cilower_ineg64(nir_builder *b, nir_ssa_def *x) 327bf215546Sopenharmony_ci{ 328bf215546Sopenharmony_ci /* Since isub is the same number of instructions (with better dependencies) 329bf215546Sopenharmony_ci * as iadd, subtraction is actually more efficient for ineg than the usual 330bf215546Sopenharmony_ci * 2's complement "flip the bits and add one". 331bf215546Sopenharmony_ci */ 332bf215546Sopenharmony_ci return lower_isub64(b, nir_imm_int64(b, 0), x); 333bf215546Sopenharmony_ci} 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_cistatic nir_ssa_def * 336bf215546Sopenharmony_cilower_iabs64(nir_builder *b, nir_ssa_def *x) 337bf215546Sopenharmony_ci{ 338bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 339bf215546Sopenharmony_ci nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0)); 340bf215546Sopenharmony_ci return nir_bcsel(b, x_is_neg, nir_ineg(b, x), x); 341bf215546Sopenharmony_ci} 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_cistatic nir_ssa_def * 344bf215546Sopenharmony_cilower_int64_compare(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *y) 345bf215546Sopenharmony_ci{ 346bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 347bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 348bf215546Sopenharmony_ci nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); 349bf215546Sopenharmony_ci nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci switch (op) { 352bf215546Sopenharmony_ci case nir_op_ieq: 353bf215546Sopenharmony_ci return nir_iand(b, nir_ieq(b, x_hi, y_hi), nir_ieq(b, x_lo, y_lo)); 354bf215546Sopenharmony_ci case nir_op_ine: 355bf215546Sopenharmony_ci return nir_ior(b, nir_ine(b, x_hi, y_hi), nir_ine(b, x_lo, y_lo)); 356bf215546Sopenharmony_ci case nir_op_ult: 357bf215546Sopenharmony_ci return nir_ior(b, nir_ult(b, x_hi, y_hi), 358bf215546Sopenharmony_ci nir_iand(b, nir_ieq(b, x_hi, y_hi), 359bf215546Sopenharmony_ci nir_ult(b, x_lo, y_lo))); 360bf215546Sopenharmony_ci case nir_op_ilt: 361bf215546Sopenharmony_ci return nir_ior(b, nir_ilt(b, x_hi, y_hi), 362bf215546Sopenharmony_ci nir_iand(b, nir_ieq(b, x_hi, y_hi), 363bf215546Sopenharmony_ci nir_ult(b, x_lo, y_lo))); 364bf215546Sopenharmony_ci break; 365bf215546Sopenharmony_ci case nir_op_uge: 366bf215546Sopenharmony_ci /* Lower as !(x < y) in the hopes of better CSE */ 367bf215546Sopenharmony_ci return nir_inot(b, lower_int64_compare(b, nir_op_ult, x, y)); 368bf215546Sopenharmony_ci case nir_op_ige: 369bf215546Sopenharmony_ci /* Lower as !(x < y) in the hopes of better CSE */ 370bf215546Sopenharmony_ci return nir_inot(b, lower_int64_compare(b, nir_op_ilt, x, y)); 371bf215546Sopenharmony_ci default: 372bf215546Sopenharmony_ci unreachable("Invalid comparison"); 373bf215546Sopenharmony_ci } 374bf215546Sopenharmony_ci} 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_cistatic nir_ssa_def * 377bf215546Sopenharmony_cilower_umax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 378bf215546Sopenharmony_ci{ 379bf215546Sopenharmony_ci return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), y, x); 380bf215546Sopenharmony_ci} 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_cistatic nir_ssa_def * 383bf215546Sopenharmony_cilower_imax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 384bf215546Sopenharmony_ci{ 385bf215546Sopenharmony_ci return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), y, x); 386bf215546Sopenharmony_ci} 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_cistatic nir_ssa_def * 389bf215546Sopenharmony_cilower_umin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 390bf215546Sopenharmony_ci{ 391bf215546Sopenharmony_ci return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), x, y); 392bf215546Sopenharmony_ci} 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_cistatic nir_ssa_def * 395bf215546Sopenharmony_cilower_imin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 396bf215546Sopenharmony_ci{ 397bf215546Sopenharmony_ci return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), x, y); 398bf215546Sopenharmony_ci} 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_cistatic nir_ssa_def * 401bf215546Sopenharmony_cilower_mul_2x32_64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, 402bf215546Sopenharmony_ci bool sign_extend) 403bf215546Sopenharmony_ci{ 404bf215546Sopenharmony_ci nir_ssa_def *res_hi = sign_extend ? nir_imul_high(b, x, y) 405bf215546Sopenharmony_ci : nir_umul_high(b, x, y); 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, nir_imul(b, x, y), res_hi); 408bf215546Sopenharmony_ci} 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_cistatic nir_ssa_def * 411bf215546Sopenharmony_cilower_imul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 412bf215546Sopenharmony_ci{ 413bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 414bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 415bf215546Sopenharmony_ci nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); 416bf215546Sopenharmony_ci nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci nir_ssa_def *mul_lo = nir_umul_2x32_64(b, x_lo, y_lo); 419bf215546Sopenharmony_ci nir_ssa_def *res_hi = nir_iadd(b, nir_unpack_64_2x32_split_y(b, mul_lo), 420bf215546Sopenharmony_ci nir_iadd(b, nir_imul(b, x_lo, y_hi), 421bf215546Sopenharmony_ci nir_imul(b, x_hi, y_lo))); 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, nir_unpack_64_2x32_split_x(b, mul_lo), 424bf215546Sopenharmony_ci res_hi); 425bf215546Sopenharmony_ci} 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_cistatic nir_ssa_def * 428bf215546Sopenharmony_cilower_mul_high64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, 429bf215546Sopenharmony_ci bool sign_extend) 430bf215546Sopenharmony_ci{ 431bf215546Sopenharmony_ci nir_ssa_def *x32[4], *y32[4]; 432bf215546Sopenharmony_ci x32[0] = nir_unpack_64_2x32_split_x(b, x); 433bf215546Sopenharmony_ci x32[1] = nir_unpack_64_2x32_split_y(b, x); 434bf215546Sopenharmony_ci if (sign_extend) { 435bf215546Sopenharmony_ci x32[2] = x32[3] = nir_ishr_imm(b, x32[1], 31); 436bf215546Sopenharmony_ci } else { 437bf215546Sopenharmony_ci x32[2] = x32[3] = nir_imm_int(b, 0); 438bf215546Sopenharmony_ci } 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci y32[0] = nir_unpack_64_2x32_split_x(b, y); 441bf215546Sopenharmony_ci y32[1] = nir_unpack_64_2x32_split_y(b, y); 442bf215546Sopenharmony_ci if (sign_extend) { 443bf215546Sopenharmony_ci y32[2] = y32[3] = nir_ishr_imm(b, y32[1], 31); 444bf215546Sopenharmony_ci } else { 445bf215546Sopenharmony_ci y32[2] = y32[3] = nir_imm_int(b, 0); 446bf215546Sopenharmony_ci } 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_ci nir_ssa_def *res[8] = { NULL, }; 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci /* Yes, the following generates a pile of code. However, we throw res[0] 451bf215546Sopenharmony_ci * and res[1] away in the end and, if we're in the umul case, four of our 452bf215546Sopenharmony_ci * eight dword operands will be constant zero and opt_algebraic will clean 453bf215546Sopenharmony_ci * this up nicely. 454bf215546Sopenharmony_ci */ 455bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 456bf215546Sopenharmony_ci nir_ssa_def *carry = NULL; 457bf215546Sopenharmony_ci for (unsigned j = 0; j < 4; j++) { 458bf215546Sopenharmony_ci /* The maximum values of x32[i] and y32[j] are UINT32_MAX so the 459bf215546Sopenharmony_ci * maximum value of tmp is UINT32_MAX * UINT32_MAX. The maximum 460bf215546Sopenharmony_ci * value that will fit in tmp is 461bf215546Sopenharmony_ci * 462bf215546Sopenharmony_ci * UINT64_MAX = UINT32_MAX << 32 + UINT32_MAX 463bf215546Sopenharmony_ci * = UINT32_MAX * (UINT32_MAX + 1) + UINT32_MAX 464bf215546Sopenharmony_ci * = UINT32_MAX * UINT32_MAX + 2 * UINT32_MAX 465bf215546Sopenharmony_ci * 466bf215546Sopenharmony_ci * so we're guaranteed that we can add in two more 32-bit values 467bf215546Sopenharmony_ci * without overflowing tmp. 468bf215546Sopenharmony_ci */ 469bf215546Sopenharmony_ci nir_ssa_def *tmp = nir_umul_2x32_64(b, x32[i], y32[j]); 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci if (res[i + j]) 472bf215546Sopenharmony_ci tmp = nir_iadd(b, tmp, nir_u2u64(b, res[i + j])); 473bf215546Sopenharmony_ci if (carry) 474bf215546Sopenharmony_ci tmp = nir_iadd(b, tmp, carry); 475bf215546Sopenharmony_ci res[i + j] = nir_u2u32(b, tmp); 476bf215546Sopenharmony_ci carry = nir_ushr_imm(b, tmp, 32); 477bf215546Sopenharmony_ci } 478bf215546Sopenharmony_ci res[i + 4] = nir_u2u32(b, carry); 479bf215546Sopenharmony_ci } 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, res[2], res[3]); 482bf215546Sopenharmony_ci} 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_cistatic nir_ssa_def * 485bf215546Sopenharmony_cilower_isign64(nir_builder *b, nir_ssa_def *x) 486bf215546Sopenharmony_ci{ 487bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 488bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci nir_ssa_def *is_non_zero = nir_i2b(b, nir_ior(b, x_lo, x_hi)); 491bf215546Sopenharmony_ci nir_ssa_def *res_hi = nir_ishr_imm(b, x_hi, 31); 492bf215546Sopenharmony_ci nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i32(b, is_non_zero)); 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, res_lo, res_hi); 495bf215546Sopenharmony_ci} 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_cistatic void 498bf215546Sopenharmony_cilower_udiv64_mod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d, 499bf215546Sopenharmony_ci nir_ssa_def **q, nir_ssa_def **r) 500bf215546Sopenharmony_ci{ 501bf215546Sopenharmony_ci /* TODO: We should specially handle the case where the denominator is a 502bf215546Sopenharmony_ci * constant. In that case, we should be able to reduce it to a multiply by 503bf215546Sopenharmony_ci * a constant, some shifts, and an add. 504bf215546Sopenharmony_ci */ 505bf215546Sopenharmony_ci nir_ssa_def *n_lo = nir_unpack_64_2x32_split_x(b, n); 506bf215546Sopenharmony_ci nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n); 507bf215546Sopenharmony_ci nir_ssa_def *d_lo = nir_unpack_64_2x32_split_x(b, d); 508bf215546Sopenharmony_ci nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d); 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci nir_ssa_def *q_lo = nir_imm_zero(b, n->num_components, 32); 511bf215546Sopenharmony_ci nir_ssa_def *q_hi = nir_imm_zero(b, n->num_components, 32); 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci nir_ssa_def *n_hi_before_if = n_hi; 514bf215546Sopenharmony_ci nir_ssa_def *q_hi_before_if = q_hi; 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_ci /* If the upper 32 bits of denom are non-zero, it is impossible for shifts 517bf215546Sopenharmony_ci * greater than 32 bits to occur. If the upper 32 bits of the numerator 518bf215546Sopenharmony_ci * are zero, it is impossible for (denom << [63, 32]) <= numer unless 519bf215546Sopenharmony_ci * denom == 0. 520bf215546Sopenharmony_ci */ 521bf215546Sopenharmony_ci nir_ssa_def *need_high_div = 522bf215546Sopenharmony_ci nir_iand(b, nir_ieq_imm(b, d_hi, 0), nir_uge(b, n_hi, d_lo)); 523bf215546Sopenharmony_ci nir_push_if(b, nir_bany(b, need_high_div)); 524bf215546Sopenharmony_ci { 525bf215546Sopenharmony_ci /* If we only have one component, then the bany above goes away and 526bf215546Sopenharmony_ci * this is always true within the if statement. 527bf215546Sopenharmony_ci */ 528bf215546Sopenharmony_ci if (n->num_components == 1) 529bf215546Sopenharmony_ci need_high_div = nir_imm_true(b); 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci nir_ssa_def *log2_d_lo = nir_ufind_msb(b, d_lo); 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci for (int i = 31; i >= 0; i--) { 534bf215546Sopenharmony_ci /* if ((d.x << i) <= n.y) { 535bf215546Sopenharmony_ci * n.y -= d.x << i; 536bf215546Sopenharmony_ci * quot.y |= 1U << i; 537bf215546Sopenharmony_ci * } 538bf215546Sopenharmony_ci */ 539bf215546Sopenharmony_ci nir_ssa_def *d_shift = nir_ishl(b, d_lo, nir_imm_int(b, i)); 540bf215546Sopenharmony_ci nir_ssa_def *new_n_hi = nir_isub(b, n_hi, d_shift); 541bf215546Sopenharmony_ci nir_ssa_def *new_q_hi = nir_ior(b, q_hi, nir_imm_int(b, 1u << i)); 542bf215546Sopenharmony_ci nir_ssa_def *cond = nir_iand(b, need_high_div, 543bf215546Sopenharmony_ci nir_uge(b, n_hi, d_shift)); 544bf215546Sopenharmony_ci if (i != 0) { 545bf215546Sopenharmony_ci /* log2_d_lo is always <= 31, so we don't need to bother with it 546bf215546Sopenharmony_ci * in the last iteration. 547bf215546Sopenharmony_ci */ 548bf215546Sopenharmony_ci cond = nir_iand(b, cond, 549bf215546Sopenharmony_ci nir_ige(b, nir_imm_int(b, 31 - i), log2_d_lo)); 550bf215546Sopenharmony_ci } 551bf215546Sopenharmony_ci n_hi = nir_bcsel(b, cond, new_n_hi, n_hi); 552bf215546Sopenharmony_ci q_hi = nir_bcsel(b, cond, new_q_hi, q_hi); 553bf215546Sopenharmony_ci } 554bf215546Sopenharmony_ci } 555bf215546Sopenharmony_ci nir_pop_if(b, NULL); 556bf215546Sopenharmony_ci n_hi = nir_if_phi(b, n_hi, n_hi_before_if); 557bf215546Sopenharmony_ci q_hi = nir_if_phi(b, q_hi, q_hi_before_if); 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci nir_ssa_def *log2_denom = nir_ufind_msb(b, d_hi); 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci n = nir_pack_64_2x32_split(b, n_lo, n_hi); 562bf215546Sopenharmony_ci d = nir_pack_64_2x32_split(b, d_lo, d_hi); 563bf215546Sopenharmony_ci for (int i = 31; i >= 0; i--) { 564bf215546Sopenharmony_ci /* if ((d64 << i) <= n64) { 565bf215546Sopenharmony_ci * n64 -= d64 << i; 566bf215546Sopenharmony_ci * quot.x |= 1U << i; 567bf215546Sopenharmony_ci * } 568bf215546Sopenharmony_ci */ 569bf215546Sopenharmony_ci nir_ssa_def *d_shift = nir_ishl(b, d, nir_imm_int(b, i)); 570bf215546Sopenharmony_ci nir_ssa_def *new_n = nir_isub(b, n, d_shift); 571bf215546Sopenharmony_ci nir_ssa_def *new_q_lo = nir_ior(b, q_lo, nir_imm_int(b, 1u << i)); 572bf215546Sopenharmony_ci nir_ssa_def *cond = nir_uge(b, n, d_shift); 573bf215546Sopenharmony_ci if (i != 0) { 574bf215546Sopenharmony_ci /* log2_denom is always <= 31, so we don't need to bother with it 575bf215546Sopenharmony_ci * in the last iteration. 576bf215546Sopenharmony_ci */ 577bf215546Sopenharmony_ci cond = nir_iand(b, cond, 578bf215546Sopenharmony_ci nir_ige(b, nir_imm_int(b, 31 - i), log2_denom)); 579bf215546Sopenharmony_ci } 580bf215546Sopenharmony_ci n = nir_bcsel(b, cond, new_n, n); 581bf215546Sopenharmony_ci q_lo = nir_bcsel(b, cond, new_q_lo, q_lo); 582bf215546Sopenharmony_ci } 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_ci *q = nir_pack_64_2x32_split(b, q_lo, q_hi); 585bf215546Sopenharmony_ci *r = n; 586bf215546Sopenharmony_ci} 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_cistatic nir_ssa_def * 589bf215546Sopenharmony_cilower_udiv64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d) 590bf215546Sopenharmony_ci{ 591bf215546Sopenharmony_ci nir_ssa_def *q, *r; 592bf215546Sopenharmony_ci lower_udiv64_mod64(b, n, d, &q, &r); 593bf215546Sopenharmony_ci return q; 594bf215546Sopenharmony_ci} 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_cistatic nir_ssa_def * 597bf215546Sopenharmony_cilower_idiv64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d) 598bf215546Sopenharmony_ci{ 599bf215546Sopenharmony_ci nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n); 600bf215546Sopenharmony_ci nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d); 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_ci nir_ssa_def *negate = nir_ine(b, nir_ilt(b, n_hi, nir_imm_int(b, 0)), 603bf215546Sopenharmony_ci nir_ilt(b, d_hi, nir_imm_int(b, 0))); 604bf215546Sopenharmony_ci nir_ssa_def *q, *r; 605bf215546Sopenharmony_ci lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r); 606bf215546Sopenharmony_ci return nir_bcsel(b, negate, nir_ineg(b, q), q); 607bf215546Sopenharmony_ci} 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_cistatic nir_ssa_def * 610bf215546Sopenharmony_cilower_umod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d) 611bf215546Sopenharmony_ci{ 612bf215546Sopenharmony_ci nir_ssa_def *q, *r; 613bf215546Sopenharmony_ci lower_udiv64_mod64(b, n, d, &q, &r); 614bf215546Sopenharmony_ci return r; 615bf215546Sopenharmony_ci} 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_cistatic nir_ssa_def * 618bf215546Sopenharmony_cilower_imod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d) 619bf215546Sopenharmony_ci{ 620bf215546Sopenharmony_ci nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n); 621bf215546Sopenharmony_ci nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d); 622bf215546Sopenharmony_ci nir_ssa_def *n_is_neg = nir_ilt(b, n_hi, nir_imm_int(b, 0)); 623bf215546Sopenharmony_ci nir_ssa_def *d_is_neg = nir_ilt(b, d_hi, nir_imm_int(b, 0)); 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_ci nir_ssa_def *q, *r; 626bf215546Sopenharmony_ci lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r); 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci nir_ssa_def *rem = nir_bcsel(b, n_is_neg, nir_ineg(b, r), r); 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci return nir_bcsel(b, nir_ieq_imm(b, r, 0), nir_imm_int64(b, 0), 631bf215546Sopenharmony_ci nir_bcsel(b, nir_ieq(b, n_is_neg, d_is_neg), rem, 632bf215546Sopenharmony_ci nir_iadd(b, rem, d))); 633bf215546Sopenharmony_ci} 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_cistatic nir_ssa_def * 636bf215546Sopenharmony_cilower_irem64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d) 637bf215546Sopenharmony_ci{ 638bf215546Sopenharmony_ci nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n); 639bf215546Sopenharmony_ci nir_ssa_def *n_is_neg = nir_ilt(b, n_hi, nir_imm_int(b, 0)); 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_ci nir_ssa_def *q, *r; 642bf215546Sopenharmony_ci lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r); 643bf215546Sopenharmony_ci return nir_bcsel(b, n_is_neg, nir_ineg(b, r), r); 644bf215546Sopenharmony_ci} 645bf215546Sopenharmony_ci 646bf215546Sopenharmony_cistatic nir_ssa_def * 647bf215546Sopenharmony_cilower_extract(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *c) 648bf215546Sopenharmony_ci{ 649bf215546Sopenharmony_ci assert(op == nir_op_extract_u8 || op == nir_op_extract_i8 || 650bf215546Sopenharmony_ci op == nir_op_extract_u16 || op == nir_op_extract_i16); 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_ci const int chunk = nir_src_as_uint(nir_src_for_ssa(c)); 653bf215546Sopenharmony_ci const int chunk_bits = 654bf215546Sopenharmony_ci (op == nir_op_extract_u8 || op == nir_op_extract_i8) ? 8 : 16; 655bf215546Sopenharmony_ci const int num_chunks_in_32 = 32 / chunk_bits; 656bf215546Sopenharmony_ci 657bf215546Sopenharmony_ci nir_ssa_def *extract32; 658bf215546Sopenharmony_ci if (chunk < num_chunks_in_32) { 659bf215546Sopenharmony_ci extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_x(b, x), 660bf215546Sopenharmony_ci nir_imm_int(b, chunk), 661bf215546Sopenharmony_ci NULL, NULL); 662bf215546Sopenharmony_ci } else { 663bf215546Sopenharmony_ci extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_y(b, x), 664bf215546Sopenharmony_ci nir_imm_int(b, chunk - num_chunks_in_32), 665bf215546Sopenharmony_ci NULL, NULL); 666bf215546Sopenharmony_ci } 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci if (op == nir_op_extract_i8 || op == nir_op_extract_i16) 669bf215546Sopenharmony_ci return lower_i2i64(b, extract32); 670bf215546Sopenharmony_ci else 671bf215546Sopenharmony_ci return lower_u2u64(b, extract32); 672bf215546Sopenharmony_ci} 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_cistatic nir_ssa_def * 675bf215546Sopenharmony_cilower_ufind_msb64(nir_builder *b, nir_ssa_def *x) 676bf215546Sopenharmony_ci{ 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 679bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 680bf215546Sopenharmony_ci nir_ssa_def *lo_count = nir_ufind_msb(b, x_lo); 681bf215546Sopenharmony_ci nir_ssa_def *hi_count = nir_ufind_msb(b, x_hi); 682bf215546Sopenharmony_ci nir_ssa_def *valid_hi_bits = nir_ine(b, x_hi, nir_imm_int(b, 0)); 683bf215546Sopenharmony_ci nir_ssa_def *hi_res = nir_iadd(b, nir_imm_intN_t(b, 32, 32), hi_count); 684bf215546Sopenharmony_ci return nir_bcsel(b, valid_hi_bits, hi_res, lo_count); 685bf215546Sopenharmony_ci} 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_cistatic nir_ssa_def * 688bf215546Sopenharmony_cilower_2f(nir_builder *b, nir_ssa_def *x, unsigned dest_bit_size, 689bf215546Sopenharmony_ci bool src_is_signed) 690bf215546Sopenharmony_ci{ 691bf215546Sopenharmony_ci nir_ssa_def *x_sign = NULL; 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci if (src_is_signed) { 694bf215546Sopenharmony_ci x_sign = nir_bcsel(b, COND_LOWER_CMP(b, ilt, x, nir_imm_int64(b, 0)), 695bf215546Sopenharmony_ci nir_imm_floatN_t(b, -1, dest_bit_size), 696bf215546Sopenharmony_ci nir_imm_floatN_t(b, 1, dest_bit_size)); 697bf215546Sopenharmony_ci x = COND_LOWER_OP(b, iabs, x); 698bf215546Sopenharmony_ci } 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci nir_ssa_def *exp = COND_LOWER_OP(b, ufind_msb, x); 701bf215546Sopenharmony_ci unsigned significand_bits; 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci switch (dest_bit_size) { 704bf215546Sopenharmony_ci case 32: 705bf215546Sopenharmony_ci significand_bits = 23; 706bf215546Sopenharmony_ci break; 707bf215546Sopenharmony_ci case 16: 708bf215546Sopenharmony_ci significand_bits = 10; 709bf215546Sopenharmony_ci break; 710bf215546Sopenharmony_ci default: 711bf215546Sopenharmony_ci unreachable("Invalid dest_bit_size"); 712bf215546Sopenharmony_ci } 713bf215546Sopenharmony_ci 714bf215546Sopenharmony_ci nir_ssa_def *discard = 715bf215546Sopenharmony_ci nir_imax(b, nir_isub(b, exp, nir_imm_int(b, significand_bits)), 716bf215546Sopenharmony_ci nir_imm_int(b, 0)); 717bf215546Sopenharmony_ci nir_ssa_def *significand = 718bf215546Sopenharmony_ci COND_LOWER_CAST(b, u2u32, COND_LOWER_OP(b, ushr, x, discard)); 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci /* Round-to-nearest-even implementation: 721bf215546Sopenharmony_ci * - if the non-representable part of the significand is higher than half 722bf215546Sopenharmony_ci * the minimum representable significand, we round-up 723bf215546Sopenharmony_ci * - if the non-representable part of the significand is equal to half the 724bf215546Sopenharmony_ci * minimum representable significand and the representable part of the 725bf215546Sopenharmony_ci * significand is odd, we round-up 726bf215546Sopenharmony_ci * - in any other case, we round-down 727bf215546Sopenharmony_ci */ 728bf215546Sopenharmony_ci nir_ssa_def *lsb_mask = COND_LOWER_OP(b, ishl, nir_imm_int64(b, 1), discard); 729bf215546Sopenharmony_ci nir_ssa_def *rem_mask = COND_LOWER_OP(b, isub, lsb_mask, nir_imm_int64(b, 1)); 730bf215546Sopenharmony_ci nir_ssa_def *half = COND_LOWER_OP(b, ishr, lsb_mask, nir_imm_int(b, 1)); 731bf215546Sopenharmony_ci nir_ssa_def *rem = COND_LOWER_OP(b, iand, x, rem_mask); 732bf215546Sopenharmony_ci nir_ssa_def *halfway = nir_iand(b, COND_LOWER_CMP(b, ieq, rem, half), 733bf215546Sopenharmony_ci nir_ine(b, discard, nir_imm_int(b, 0))); 734bf215546Sopenharmony_ci nir_ssa_def *is_odd = nir_i2b(b, nir_iand(b, significand, nir_imm_int(b, 1))); 735bf215546Sopenharmony_ci nir_ssa_def *round_up = nir_ior(b, COND_LOWER_CMP(b, ilt, half, rem), 736bf215546Sopenharmony_ci nir_iand(b, halfway, is_odd)); 737bf215546Sopenharmony_ci significand = nir_iadd(b, significand, nir_b2i32(b, round_up)); 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_ci nir_ssa_def *res; 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci if (dest_bit_size == 32) 742bf215546Sopenharmony_ci res = nir_fmul(b, nir_u2f32(b, significand), 743bf215546Sopenharmony_ci nir_fexp2(b, nir_u2f32(b, discard))); 744bf215546Sopenharmony_ci else 745bf215546Sopenharmony_ci res = nir_fmul(b, nir_u2f16(b, significand), 746bf215546Sopenharmony_ci nir_fexp2(b, nir_u2f16(b, discard))); 747bf215546Sopenharmony_ci 748bf215546Sopenharmony_ci if (src_is_signed) 749bf215546Sopenharmony_ci res = nir_fmul(b, res, x_sign); 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci return res; 752bf215546Sopenharmony_ci} 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_cistatic nir_ssa_def * 755bf215546Sopenharmony_cilower_f2(nir_builder *b, nir_ssa_def *x, bool dst_is_signed) 756bf215546Sopenharmony_ci{ 757bf215546Sopenharmony_ci assert(x->bit_size == 16 || x->bit_size == 32); 758bf215546Sopenharmony_ci nir_ssa_def *x_sign = NULL; 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_ci if (dst_is_signed) 761bf215546Sopenharmony_ci x_sign = nir_fsign(b, x); 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_ci x = nir_ftrunc(b, x); 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_ci if (dst_is_signed) 766bf215546Sopenharmony_ci x = nir_fabs(b, x); 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci nir_ssa_def *res; 769bf215546Sopenharmony_ci if (x->bit_size < 32) { 770bf215546Sopenharmony_ci res = nir_pack_64_2x32_split(b, nir_f2u32(b, x), nir_imm_int(b, 0)); 771bf215546Sopenharmony_ci } else { 772bf215546Sopenharmony_ci nir_ssa_def *div = nir_imm_floatN_t(b, 1ULL << 32, x->bit_size); 773bf215546Sopenharmony_ci nir_ssa_def *res_hi = nir_f2u32(b, nir_fdiv(b, x, div)); 774bf215546Sopenharmony_ci nir_ssa_def *res_lo = nir_f2u32(b, nir_frem(b, x, div)); 775bf215546Sopenharmony_ci res = nir_pack_64_2x32_split(b, res_lo, res_hi); 776bf215546Sopenharmony_ci } 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci if (dst_is_signed) 779bf215546Sopenharmony_ci res = nir_bcsel(b, nir_flt(b, x_sign, nir_imm_floatN_t(b, 0, x->bit_size)), 780bf215546Sopenharmony_ci nir_ineg(b, res), res); 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci return res; 783bf215546Sopenharmony_ci} 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_cistatic nir_ssa_def * 786bf215546Sopenharmony_cilower_bit_count64(nir_builder *b, nir_ssa_def *x) 787bf215546Sopenharmony_ci{ 788bf215546Sopenharmony_ci nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); 789bf215546Sopenharmony_ci nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); 790bf215546Sopenharmony_ci nir_ssa_def *lo_count = nir_bit_count(b, x_lo); 791bf215546Sopenharmony_ci nir_ssa_def *hi_count = nir_bit_count(b, x_hi); 792bf215546Sopenharmony_ci return nir_iadd(b, lo_count, hi_count); 793bf215546Sopenharmony_ci} 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_cinir_lower_int64_options 796bf215546Sopenharmony_cinir_lower_int64_op_to_options_mask(nir_op opcode) 797bf215546Sopenharmony_ci{ 798bf215546Sopenharmony_ci switch (opcode) { 799bf215546Sopenharmony_ci case nir_op_imul: 800bf215546Sopenharmony_ci case nir_op_amul: 801bf215546Sopenharmony_ci return nir_lower_imul64; 802bf215546Sopenharmony_ci case nir_op_imul_2x32_64: 803bf215546Sopenharmony_ci case nir_op_umul_2x32_64: 804bf215546Sopenharmony_ci return nir_lower_imul_2x32_64; 805bf215546Sopenharmony_ci case nir_op_imul_high: 806bf215546Sopenharmony_ci case nir_op_umul_high: 807bf215546Sopenharmony_ci return nir_lower_imul_high64; 808bf215546Sopenharmony_ci case nir_op_isign: 809bf215546Sopenharmony_ci return nir_lower_isign64; 810bf215546Sopenharmony_ci case nir_op_udiv: 811bf215546Sopenharmony_ci case nir_op_idiv: 812bf215546Sopenharmony_ci case nir_op_umod: 813bf215546Sopenharmony_ci case nir_op_imod: 814bf215546Sopenharmony_ci case nir_op_irem: 815bf215546Sopenharmony_ci return nir_lower_divmod64; 816bf215546Sopenharmony_ci case nir_op_b2i64: 817bf215546Sopenharmony_ci case nir_op_i2b1: 818bf215546Sopenharmony_ci case nir_op_i2i8: 819bf215546Sopenharmony_ci case nir_op_i2i16: 820bf215546Sopenharmony_ci case nir_op_i2i32: 821bf215546Sopenharmony_ci case nir_op_i2i64: 822bf215546Sopenharmony_ci case nir_op_u2u8: 823bf215546Sopenharmony_ci case nir_op_u2u16: 824bf215546Sopenharmony_ci case nir_op_u2u32: 825bf215546Sopenharmony_ci case nir_op_u2u64: 826bf215546Sopenharmony_ci case nir_op_i2f32: 827bf215546Sopenharmony_ci case nir_op_u2f32: 828bf215546Sopenharmony_ci case nir_op_i2f16: 829bf215546Sopenharmony_ci case nir_op_u2f16: 830bf215546Sopenharmony_ci case nir_op_f2i64: 831bf215546Sopenharmony_ci case nir_op_f2u64: 832bf215546Sopenharmony_ci case nir_op_bcsel: 833bf215546Sopenharmony_ci return nir_lower_mov64; 834bf215546Sopenharmony_ci case nir_op_ieq: 835bf215546Sopenharmony_ci case nir_op_ine: 836bf215546Sopenharmony_ci case nir_op_ult: 837bf215546Sopenharmony_ci case nir_op_ilt: 838bf215546Sopenharmony_ci case nir_op_uge: 839bf215546Sopenharmony_ci case nir_op_ige: 840bf215546Sopenharmony_ci return nir_lower_icmp64; 841bf215546Sopenharmony_ci case nir_op_iadd: 842bf215546Sopenharmony_ci case nir_op_isub: 843bf215546Sopenharmony_ci return nir_lower_iadd64; 844bf215546Sopenharmony_ci case nir_op_imin: 845bf215546Sopenharmony_ci case nir_op_imax: 846bf215546Sopenharmony_ci case nir_op_umin: 847bf215546Sopenharmony_ci case nir_op_umax: 848bf215546Sopenharmony_ci return nir_lower_minmax64; 849bf215546Sopenharmony_ci case nir_op_iabs: 850bf215546Sopenharmony_ci return nir_lower_iabs64; 851bf215546Sopenharmony_ci case nir_op_ineg: 852bf215546Sopenharmony_ci return nir_lower_ineg64; 853bf215546Sopenharmony_ci case nir_op_iand: 854bf215546Sopenharmony_ci case nir_op_ior: 855bf215546Sopenharmony_ci case nir_op_ixor: 856bf215546Sopenharmony_ci case nir_op_inot: 857bf215546Sopenharmony_ci return nir_lower_logic64; 858bf215546Sopenharmony_ci case nir_op_ishl: 859bf215546Sopenharmony_ci case nir_op_ishr: 860bf215546Sopenharmony_ci case nir_op_ushr: 861bf215546Sopenharmony_ci return nir_lower_shift64; 862bf215546Sopenharmony_ci case nir_op_extract_u8: 863bf215546Sopenharmony_ci case nir_op_extract_i8: 864bf215546Sopenharmony_ci case nir_op_extract_u16: 865bf215546Sopenharmony_ci case nir_op_extract_i16: 866bf215546Sopenharmony_ci return nir_lower_extract64; 867bf215546Sopenharmony_ci case nir_op_ufind_msb: 868bf215546Sopenharmony_ci return nir_lower_ufind_msb64; 869bf215546Sopenharmony_ci case nir_op_bit_count: 870bf215546Sopenharmony_ci return nir_lower_bit_count64; 871bf215546Sopenharmony_ci default: 872bf215546Sopenharmony_ci return 0; 873bf215546Sopenharmony_ci } 874bf215546Sopenharmony_ci} 875bf215546Sopenharmony_ci 876bf215546Sopenharmony_cistatic nir_ssa_def * 877bf215546Sopenharmony_cilower_int64_alu_instr(nir_builder *b, nir_alu_instr *alu) 878bf215546Sopenharmony_ci{ 879bf215546Sopenharmony_ci nir_ssa_def *src[4]; 880bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) 881bf215546Sopenharmony_ci src[i] = nir_ssa_for_alu_src(b, alu, i); 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci switch (alu->op) { 884bf215546Sopenharmony_ci case nir_op_imul: 885bf215546Sopenharmony_ci case nir_op_amul: 886bf215546Sopenharmony_ci return lower_imul64(b, src[0], src[1]); 887bf215546Sopenharmony_ci case nir_op_imul_2x32_64: 888bf215546Sopenharmony_ci return lower_mul_2x32_64(b, src[0], src[1], true); 889bf215546Sopenharmony_ci case nir_op_umul_2x32_64: 890bf215546Sopenharmony_ci return lower_mul_2x32_64(b, src[0], src[1], false); 891bf215546Sopenharmony_ci case nir_op_imul_high: 892bf215546Sopenharmony_ci return lower_mul_high64(b, src[0], src[1], true); 893bf215546Sopenharmony_ci case nir_op_umul_high: 894bf215546Sopenharmony_ci return lower_mul_high64(b, src[0], src[1], false); 895bf215546Sopenharmony_ci case nir_op_isign: 896bf215546Sopenharmony_ci return lower_isign64(b, src[0]); 897bf215546Sopenharmony_ci case nir_op_udiv: 898bf215546Sopenharmony_ci return lower_udiv64(b, src[0], src[1]); 899bf215546Sopenharmony_ci case nir_op_idiv: 900bf215546Sopenharmony_ci return lower_idiv64(b, src[0], src[1]); 901bf215546Sopenharmony_ci case nir_op_umod: 902bf215546Sopenharmony_ci return lower_umod64(b, src[0], src[1]); 903bf215546Sopenharmony_ci case nir_op_imod: 904bf215546Sopenharmony_ci return lower_imod64(b, src[0], src[1]); 905bf215546Sopenharmony_ci case nir_op_irem: 906bf215546Sopenharmony_ci return lower_irem64(b, src[0], src[1]); 907bf215546Sopenharmony_ci case nir_op_b2i64: 908bf215546Sopenharmony_ci return lower_b2i64(b, src[0]); 909bf215546Sopenharmony_ci case nir_op_i2b1: 910bf215546Sopenharmony_ci return lower_i2b(b, src[0]); 911bf215546Sopenharmony_ci case nir_op_i2i8: 912bf215546Sopenharmony_ci return lower_i2i8(b, src[0]); 913bf215546Sopenharmony_ci case nir_op_i2i16: 914bf215546Sopenharmony_ci return lower_i2i16(b, src[0]); 915bf215546Sopenharmony_ci case nir_op_i2i32: 916bf215546Sopenharmony_ci return lower_i2i32(b, src[0]); 917bf215546Sopenharmony_ci case nir_op_i2i64: 918bf215546Sopenharmony_ci return lower_i2i64(b, src[0]); 919bf215546Sopenharmony_ci case nir_op_u2u8: 920bf215546Sopenharmony_ci return lower_u2u8(b, src[0]); 921bf215546Sopenharmony_ci case nir_op_u2u16: 922bf215546Sopenharmony_ci return lower_u2u16(b, src[0]); 923bf215546Sopenharmony_ci case nir_op_u2u32: 924bf215546Sopenharmony_ci return lower_u2u32(b, src[0]); 925bf215546Sopenharmony_ci case nir_op_u2u64: 926bf215546Sopenharmony_ci return lower_u2u64(b, src[0]); 927bf215546Sopenharmony_ci case nir_op_bcsel: 928bf215546Sopenharmony_ci return lower_bcsel64(b, src[0], src[1], src[2]); 929bf215546Sopenharmony_ci case nir_op_ieq: 930bf215546Sopenharmony_ci case nir_op_ine: 931bf215546Sopenharmony_ci case nir_op_ult: 932bf215546Sopenharmony_ci case nir_op_ilt: 933bf215546Sopenharmony_ci case nir_op_uge: 934bf215546Sopenharmony_ci case nir_op_ige: 935bf215546Sopenharmony_ci return lower_int64_compare(b, alu->op, src[0], src[1]); 936bf215546Sopenharmony_ci case nir_op_iadd: 937bf215546Sopenharmony_ci return lower_iadd64(b, src[0], src[1]); 938bf215546Sopenharmony_ci case nir_op_isub: 939bf215546Sopenharmony_ci return lower_isub64(b, src[0], src[1]); 940bf215546Sopenharmony_ci case nir_op_imin: 941bf215546Sopenharmony_ci return lower_imin64(b, src[0], src[1]); 942bf215546Sopenharmony_ci case nir_op_imax: 943bf215546Sopenharmony_ci return lower_imax64(b, src[0], src[1]); 944bf215546Sopenharmony_ci case nir_op_umin: 945bf215546Sopenharmony_ci return lower_umin64(b, src[0], src[1]); 946bf215546Sopenharmony_ci case nir_op_umax: 947bf215546Sopenharmony_ci return lower_umax64(b, src[0], src[1]); 948bf215546Sopenharmony_ci case nir_op_iabs: 949bf215546Sopenharmony_ci return lower_iabs64(b, src[0]); 950bf215546Sopenharmony_ci case nir_op_ineg: 951bf215546Sopenharmony_ci return lower_ineg64(b, src[0]); 952bf215546Sopenharmony_ci case nir_op_iand: 953bf215546Sopenharmony_ci return lower_iand64(b, src[0], src[1]); 954bf215546Sopenharmony_ci case nir_op_ior: 955bf215546Sopenharmony_ci return lower_ior64(b, src[0], src[1]); 956bf215546Sopenharmony_ci case nir_op_ixor: 957bf215546Sopenharmony_ci return lower_ixor64(b, src[0], src[1]); 958bf215546Sopenharmony_ci case nir_op_inot: 959bf215546Sopenharmony_ci return lower_inot64(b, src[0]); 960bf215546Sopenharmony_ci case nir_op_ishl: 961bf215546Sopenharmony_ci return lower_ishl64(b, src[0], src[1]); 962bf215546Sopenharmony_ci case nir_op_ishr: 963bf215546Sopenharmony_ci return lower_ishr64(b, src[0], src[1]); 964bf215546Sopenharmony_ci case nir_op_ushr: 965bf215546Sopenharmony_ci return lower_ushr64(b, src[0], src[1]); 966bf215546Sopenharmony_ci case nir_op_extract_u8: 967bf215546Sopenharmony_ci case nir_op_extract_i8: 968bf215546Sopenharmony_ci case nir_op_extract_u16: 969bf215546Sopenharmony_ci case nir_op_extract_i16: 970bf215546Sopenharmony_ci return lower_extract(b, alu->op, src[0], src[1]); 971bf215546Sopenharmony_ci case nir_op_ufind_msb: 972bf215546Sopenharmony_ci return lower_ufind_msb64(b, src[0]); 973bf215546Sopenharmony_ci case nir_op_bit_count: 974bf215546Sopenharmony_ci return lower_bit_count64(b, src[0]); 975bf215546Sopenharmony_ci case nir_op_i2f64: 976bf215546Sopenharmony_ci case nir_op_i2f32: 977bf215546Sopenharmony_ci case nir_op_i2f16: 978bf215546Sopenharmony_ci return lower_2f(b, src[0], nir_dest_bit_size(alu->dest.dest), true); 979bf215546Sopenharmony_ci case nir_op_u2f64: 980bf215546Sopenharmony_ci case nir_op_u2f32: 981bf215546Sopenharmony_ci case nir_op_u2f16: 982bf215546Sopenharmony_ci return lower_2f(b, src[0], nir_dest_bit_size(alu->dest.dest), false); 983bf215546Sopenharmony_ci case nir_op_f2i64: 984bf215546Sopenharmony_ci case nir_op_f2u64: 985bf215546Sopenharmony_ci /* We don't support f64toi64 (yet?). */ 986bf215546Sopenharmony_ci if (src[0]->bit_size > 32) 987bf215546Sopenharmony_ci return false; 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci return lower_f2(b, src[0], alu->op == nir_op_f2i64); 990bf215546Sopenharmony_ci default: 991bf215546Sopenharmony_ci unreachable("Invalid ALU opcode to lower"); 992bf215546Sopenharmony_ci } 993bf215546Sopenharmony_ci} 994bf215546Sopenharmony_ci 995bf215546Sopenharmony_cistatic bool 996bf215546Sopenharmony_cishould_lower_int64_alu_instr(const nir_alu_instr *alu, 997bf215546Sopenharmony_ci const nir_shader_compiler_options *options) 998bf215546Sopenharmony_ci{ 999bf215546Sopenharmony_ci switch (alu->op) { 1000bf215546Sopenharmony_ci case nir_op_i2b1: 1001bf215546Sopenharmony_ci case nir_op_i2i8: 1002bf215546Sopenharmony_ci case nir_op_i2i16: 1003bf215546Sopenharmony_ci case nir_op_i2i32: 1004bf215546Sopenharmony_ci case nir_op_u2u8: 1005bf215546Sopenharmony_ci case nir_op_u2u16: 1006bf215546Sopenharmony_ci case nir_op_u2u32: 1007bf215546Sopenharmony_ci assert(alu->src[0].src.is_ssa); 1008bf215546Sopenharmony_ci if (alu->src[0].src.ssa->bit_size != 64) 1009bf215546Sopenharmony_ci return false; 1010bf215546Sopenharmony_ci break; 1011bf215546Sopenharmony_ci case nir_op_bcsel: 1012bf215546Sopenharmony_ci assert(alu->src[1].src.is_ssa); 1013bf215546Sopenharmony_ci assert(alu->src[2].src.is_ssa); 1014bf215546Sopenharmony_ci assert(alu->src[1].src.ssa->bit_size == 1015bf215546Sopenharmony_ci alu->src[2].src.ssa->bit_size); 1016bf215546Sopenharmony_ci if (alu->src[1].src.ssa->bit_size != 64) 1017bf215546Sopenharmony_ci return false; 1018bf215546Sopenharmony_ci break; 1019bf215546Sopenharmony_ci case nir_op_ieq: 1020bf215546Sopenharmony_ci case nir_op_ine: 1021bf215546Sopenharmony_ci case nir_op_ult: 1022bf215546Sopenharmony_ci case nir_op_ilt: 1023bf215546Sopenharmony_ci case nir_op_uge: 1024bf215546Sopenharmony_ci case nir_op_ige: 1025bf215546Sopenharmony_ci assert(alu->src[0].src.is_ssa); 1026bf215546Sopenharmony_ci assert(alu->src[1].src.is_ssa); 1027bf215546Sopenharmony_ci assert(alu->src[0].src.ssa->bit_size == 1028bf215546Sopenharmony_ci alu->src[1].src.ssa->bit_size); 1029bf215546Sopenharmony_ci if (alu->src[0].src.ssa->bit_size != 64) 1030bf215546Sopenharmony_ci return false; 1031bf215546Sopenharmony_ci break; 1032bf215546Sopenharmony_ci case nir_op_ufind_msb: 1033bf215546Sopenharmony_ci case nir_op_bit_count: 1034bf215546Sopenharmony_ci assert(alu->src[0].src.is_ssa); 1035bf215546Sopenharmony_ci if (alu->src[0].src.ssa->bit_size != 64) 1036bf215546Sopenharmony_ci return false; 1037bf215546Sopenharmony_ci break; 1038bf215546Sopenharmony_ci case nir_op_amul: 1039bf215546Sopenharmony_ci assert(alu->dest.dest.is_ssa); 1040bf215546Sopenharmony_ci if (options->has_imul24) 1041bf215546Sopenharmony_ci return false; 1042bf215546Sopenharmony_ci if (alu->dest.dest.ssa.bit_size != 64) 1043bf215546Sopenharmony_ci return false; 1044bf215546Sopenharmony_ci break; 1045bf215546Sopenharmony_ci case nir_op_i2f64: 1046bf215546Sopenharmony_ci case nir_op_u2f64: 1047bf215546Sopenharmony_ci case nir_op_i2f32: 1048bf215546Sopenharmony_ci case nir_op_u2f32: 1049bf215546Sopenharmony_ci case nir_op_i2f16: 1050bf215546Sopenharmony_ci case nir_op_u2f16: 1051bf215546Sopenharmony_ci assert(alu->src[0].src.is_ssa); 1052bf215546Sopenharmony_ci if (alu->src[0].src.ssa->bit_size != 64) 1053bf215546Sopenharmony_ci return false; 1054bf215546Sopenharmony_ci break; 1055bf215546Sopenharmony_ci case nir_op_f2u64: 1056bf215546Sopenharmony_ci case nir_op_f2i64: 1057bf215546Sopenharmony_ci FALLTHROUGH; 1058bf215546Sopenharmony_ci default: 1059bf215546Sopenharmony_ci assert(alu->dest.dest.is_ssa); 1060bf215546Sopenharmony_ci if (alu->dest.dest.ssa.bit_size != 64) 1061bf215546Sopenharmony_ci return false; 1062bf215546Sopenharmony_ci break; 1063bf215546Sopenharmony_ci } 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci unsigned mask = nir_lower_int64_op_to_options_mask(alu->op); 1066bf215546Sopenharmony_ci return (options->lower_int64_options & mask) != 0; 1067bf215546Sopenharmony_ci} 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_cistatic nir_ssa_def * 1070bf215546Sopenharmony_cisplit_64bit_subgroup_op(nir_builder *b, const nir_intrinsic_instr *intrin) 1071bf215546Sopenharmony_ci{ 1072bf215546Sopenharmony_ci const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic]; 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci /* This works on subgroup ops with a single 64-bit source which can be 1075bf215546Sopenharmony_ci * trivially lowered by doing the exact same op on both halves. 1076bf215546Sopenharmony_ci */ 1077bf215546Sopenharmony_ci assert(intrin->src[0].is_ssa && intrin->src[0].ssa->bit_size == 64); 1078bf215546Sopenharmony_ci nir_ssa_def *split_src0[2] = { 1079bf215546Sopenharmony_ci nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa), 1080bf215546Sopenharmony_ci nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa), 1081bf215546Sopenharmony_ci }; 1082bf215546Sopenharmony_ci 1083bf215546Sopenharmony_ci assert(info->has_dest && intrin->dest.is_ssa && 1084bf215546Sopenharmony_ci intrin->dest.ssa.bit_size == 64); 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci nir_ssa_def *res[2]; 1087bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 1088bf215546Sopenharmony_ci nir_intrinsic_instr *split = 1089bf215546Sopenharmony_ci nir_intrinsic_instr_create(b->shader, intrin->intrinsic); 1090bf215546Sopenharmony_ci split->num_components = intrin->num_components; 1091bf215546Sopenharmony_ci split->src[0] = nir_src_for_ssa(split_src0[i]); 1092bf215546Sopenharmony_ci 1093bf215546Sopenharmony_ci /* Other sources must be less than 64 bits and get copied directly */ 1094bf215546Sopenharmony_ci for (unsigned j = 1; j < info->num_srcs; j++) { 1095bf215546Sopenharmony_ci assert(intrin->src[j].is_ssa && intrin->src[j].ssa->bit_size < 64); 1096bf215546Sopenharmony_ci split->src[j] = nir_src_for_ssa(intrin->src[j].ssa); 1097bf215546Sopenharmony_ci } 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_ci /* Copy const indices, if any */ 1100bf215546Sopenharmony_ci memcpy(split->const_index, intrin->const_index, 1101bf215546Sopenharmony_ci sizeof(intrin->const_index)); 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci nir_ssa_dest_init(&split->instr, &split->dest, 1104bf215546Sopenharmony_ci intrin->dest.ssa.num_components, 32, NULL); 1105bf215546Sopenharmony_ci nir_builder_instr_insert(b, &split->instr); 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci res[i] = &split->dest.ssa; 1108bf215546Sopenharmony_ci } 1109bf215546Sopenharmony_ci 1110bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, res[0], res[1]); 1111bf215546Sopenharmony_ci} 1112bf215546Sopenharmony_ci 1113bf215546Sopenharmony_cistatic nir_ssa_def * 1114bf215546Sopenharmony_cibuild_vote_ieq(nir_builder *b, nir_ssa_def *x) 1115bf215546Sopenharmony_ci{ 1116bf215546Sopenharmony_ci nir_intrinsic_instr *vote = 1117bf215546Sopenharmony_ci nir_intrinsic_instr_create(b->shader, nir_intrinsic_vote_ieq); 1118bf215546Sopenharmony_ci vote->src[0] = nir_src_for_ssa(x); 1119bf215546Sopenharmony_ci vote->num_components = x->num_components; 1120bf215546Sopenharmony_ci nir_ssa_dest_init(&vote->instr, &vote->dest, 1, 1, NULL); 1121bf215546Sopenharmony_ci nir_builder_instr_insert(b, &vote->instr); 1122bf215546Sopenharmony_ci return &vote->dest.ssa; 1123bf215546Sopenharmony_ci} 1124bf215546Sopenharmony_ci 1125bf215546Sopenharmony_cistatic nir_ssa_def * 1126bf215546Sopenharmony_cilower_vote_ieq(nir_builder *b, nir_ssa_def *x) 1127bf215546Sopenharmony_ci{ 1128bf215546Sopenharmony_ci return nir_iand(b, build_vote_ieq(b, nir_unpack_64_2x32_split_x(b, x)), 1129bf215546Sopenharmony_ci build_vote_ieq(b, nir_unpack_64_2x32_split_y(b, x))); 1130bf215546Sopenharmony_ci} 1131bf215546Sopenharmony_ci 1132bf215546Sopenharmony_cistatic nir_ssa_def * 1133bf215546Sopenharmony_cibuild_scan_intrinsic(nir_builder *b, nir_intrinsic_op scan_op, 1134bf215546Sopenharmony_ci nir_op reduction_op, unsigned cluster_size, 1135bf215546Sopenharmony_ci nir_ssa_def *val) 1136bf215546Sopenharmony_ci{ 1137bf215546Sopenharmony_ci nir_intrinsic_instr *scan = 1138bf215546Sopenharmony_ci nir_intrinsic_instr_create(b->shader, scan_op); 1139bf215546Sopenharmony_ci scan->num_components = val->num_components; 1140bf215546Sopenharmony_ci scan->src[0] = nir_src_for_ssa(val); 1141bf215546Sopenharmony_ci nir_intrinsic_set_reduction_op(scan, reduction_op); 1142bf215546Sopenharmony_ci if (scan_op == nir_intrinsic_reduce) 1143bf215546Sopenharmony_ci nir_intrinsic_set_cluster_size(scan, cluster_size); 1144bf215546Sopenharmony_ci nir_ssa_dest_init(&scan->instr, &scan->dest, 1145bf215546Sopenharmony_ci val->num_components, val->bit_size, NULL); 1146bf215546Sopenharmony_ci nir_builder_instr_insert(b, &scan->instr); 1147bf215546Sopenharmony_ci return &scan->dest.ssa; 1148bf215546Sopenharmony_ci} 1149bf215546Sopenharmony_ci 1150bf215546Sopenharmony_cistatic nir_ssa_def * 1151bf215546Sopenharmony_cilower_scan_iadd64(nir_builder *b, const nir_intrinsic_instr *intrin) 1152bf215546Sopenharmony_ci{ 1153bf215546Sopenharmony_ci unsigned cluster_size = 1154bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_reduce ? 1155bf215546Sopenharmony_ci nir_intrinsic_cluster_size(intrin) : 0; 1156bf215546Sopenharmony_ci 1157bf215546Sopenharmony_ci /* Split it into three chunks of no more than 24 bits each. With 8 bits 1158bf215546Sopenharmony_ci * of headroom, we're guaranteed that there will never be overflow in the 1159bf215546Sopenharmony_ci * individual subgroup operations. (Assuming, of course, a subgroup size 1160bf215546Sopenharmony_ci * no larger than 256 which seems reasonable.) We can then scan on each of 1161bf215546Sopenharmony_ci * the chunks and add them back together at the end. 1162bf215546Sopenharmony_ci */ 1163bf215546Sopenharmony_ci assert(intrin->src[0].is_ssa); 1164bf215546Sopenharmony_ci nir_ssa_def *x = intrin->src[0].ssa; 1165bf215546Sopenharmony_ci nir_ssa_def *x_low = 1166bf215546Sopenharmony_ci nir_u2u32(b, nir_iand_imm(b, x, 0xffffff)); 1167bf215546Sopenharmony_ci nir_ssa_def *x_mid = 1168bf215546Sopenharmony_ci nir_u2u32(b, nir_iand_imm(b, nir_ushr(b, x, nir_imm_int(b, 24)), 1169bf215546Sopenharmony_ci 0xffffff)); 1170bf215546Sopenharmony_ci nir_ssa_def *x_hi = 1171bf215546Sopenharmony_ci nir_u2u32(b, nir_ushr(b, x, nir_imm_int(b, 48))); 1172bf215546Sopenharmony_ci 1173bf215546Sopenharmony_ci nir_ssa_def *scan_low = 1174bf215546Sopenharmony_ci build_scan_intrinsic(b, intrin->intrinsic, nir_op_iadd, 1175bf215546Sopenharmony_ci cluster_size, x_low); 1176bf215546Sopenharmony_ci nir_ssa_def *scan_mid = 1177bf215546Sopenharmony_ci build_scan_intrinsic(b, intrin->intrinsic, nir_op_iadd, 1178bf215546Sopenharmony_ci cluster_size, x_mid); 1179bf215546Sopenharmony_ci nir_ssa_def *scan_hi = 1180bf215546Sopenharmony_ci build_scan_intrinsic(b, intrin->intrinsic, nir_op_iadd, 1181bf215546Sopenharmony_ci cluster_size, x_hi); 1182bf215546Sopenharmony_ci 1183bf215546Sopenharmony_ci scan_low = nir_u2u64(b, scan_low); 1184bf215546Sopenharmony_ci scan_mid = nir_ishl(b, nir_u2u64(b, scan_mid), nir_imm_int(b, 24)); 1185bf215546Sopenharmony_ci scan_hi = nir_ishl(b, nir_u2u64(b, scan_hi), nir_imm_int(b, 48)); 1186bf215546Sopenharmony_ci 1187bf215546Sopenharmony_ci return nir_iadd(b, scan_hi, nir_iadd(b, scan_mid, scan_low)); 1188bf215546Sopenharmony_ci} 1189bf215546Sopenharmony_ci 1190bf215546Sopenharmony_cistatic bool 1191bf215546Sopenharmony_cishould_lower_int64_intrinsic(const nir_intrinsic_instr *intrin, 1192bf215546Sopenharmony_ci const nir_shader_compiler_options *options) 1193bf215546Sopenharmony_ci{ 1194bf215546Sopenharmony_ci switch (intrin->intrinsic) { 1195bf215546Sopenharmony_ci case nir_intrinsic_read_invocation: 1196bf215546Sopenharmony_ci case nir_intrinsic_read_first_invocation: 1197bf215546Sopenharmony_ci case nir_intrinsic_shuffle: 1198bf215546Sopenharmony_ci case nir_intrinsic_shuffle_xor: 1199bf215546Sopenharmony_ci case nir_intrinsic_shuffle_up: 1200bf215546Sopenharmony_ci case nir_intrinsic_shuffle_down: 1201bf215546Sopenharmony_ci case nir_intrinsic_quad_broadcast: 1202bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_horizontal: 1203bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_vertical: 1204bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_diagonal: 1205bf215546Sopenharmony_ci assert(intrin->dest.is_ssa); 1206bf215546Sopenharmony_ci return intrin->dest.ssa.bit_size == 64 && 1207bf215546Sopenharmony_ci (options->lower_int64_options & nir_lower_subgroup_shuffle64); 1208bf215546Sopenharmony_ci 1209bf215546Sopenharmony_ci case nir_intrinsic_vote_ieq: 1210bf215546Sopenharmony_ci assert(intrin->src[0].is_ssa); 1211bf215546Sopenharmony_ci return intrin->src[0].ssa->bit_size == 64 && 1212bf215546Sopenharmony_ci (options->lower_int64_options & nir_lower_vote_ieq64); 1213bf215546Sopenharmony_ci 1214bf215546Sopenharmony_ci case nir_intrinsic_reduce: 1215bf215546Sopenharmony_ci case nir_intrinsic_inclusive_scan: 1216bf215546Sopenharmony_ci case nir_intrinsic_exclusive_scan: 1217bf215546Sopenharmony_ci assert(intrin->dest.is_ssa); 1218bf215546Sopenharmony_ci if (intrin->dest.ssa.bit_size != 64) 1219bf215546Sopenharmony_ci return false; 1220bf215546Sopenharmony_ci 1221bf215546Sopenharmony_ci switch (nir_intrinsic_reduction_op(intrin)) { 1222bf215546Sopenharmony_ci case nir_op_iadd: 1223bf215546Sopenharmony_ci return options->lower_int64_options & nir_lower_scan_reduce_iadd64; 1224bf215546Sopenharmony_ci case nir_op_iand: 1225bf215546Sopenharmony_ci case nir_op_ior: 1226bf215546Sopenharmony_ci case nir_op_ixor: 1227bf215546Sopenharmony_ci return options->lower_int64_options & nir_lower_scan_reduce_bitwise64; 1228bf215546Sopenharmony_ci default: 1229bf215546Sopenharmony_ci return false; 1230bf215546Sopenharmony_ci } 1231bf215546Sopenharmony_ci break; 1232bf215546Sopenharmony_ci 1233bf215546Sopenharmony_ci default: 1234bf215546Sopenharmony_ci return false; 1235bf215546Sopenharmony_ci } 1236bf215546Sopenharmony_ci} 1237bf215546Sopenharmony_ci 1238bf215546Sopenharmony_cistatic nir_ssa_def * 1239bf215546Sopenharmony_cilower_int64_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin) 1240bf215546Sopenharmony_ci{ 1241bf215546Sopenharmony_ci switch (intrin->intrinsic) { 1242bf215546Sopenharmony_ci case nir_intrinsic_read_invocation: 1243bf215546Sopenharmony_ci case nir_intrinsic_read_first_invocation: 1244bf215546Sopenharmony_ci case nir_intrinsic_shuffle: 1245bf215546Sopenharmony_ci case nir_intrinsic_shuffle_xor: 1246bf215546Sopenharmony_ci case nir_intrinsic_shuffle_up: 1247bf215546Sopenharmony_ci case nir_intrinsic_shuffle_down: 1248bf215546Sopenharmony_ci case nir_intrinsic_quad_broadcast: 1249bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_horizontal: 1250bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_vertical: 1251bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_diagonal: 1252bf215546Sopenharmony_ci return split_64bit_subgroup_op(b, intrin); 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_ci case nir_intrinsic_vote_ieq: 1255bf215546Sopenharmony_ci assert(intrin->src[0].is_ssa); 1256bf215546Sopenharmony_ci return lower_vote_ieq(b, intrin->src[0].ssa); 1257bf215546Sopenharmony_ci 1258bf215546Sopenharmony_ci case nir_intrinsic_reduce: 1259bf215546Sopenharmony_ci case nir_intrinsic_inclusive_scan: 1260bf215546Sopenharmony_ci case nir_intrinsic_exclusive_scan: 1261bf215546Sopenharmony_ci switch (nir_intrinsic_reduction_op(intrin)) { 1262bf215546Sopenharmony_ci case nir_op_iadd: 1263bf215546Sopenharmony_ci return lower_scan_iadd64(b, intrin); 1264bf215546Sopenharmony_ci case nir_op_iand: 1265bf215546Sopenharmony_ci case nir_op_ior: 1266bf215546Sopenharmony_ci case nir_op_ixor: 1267bf215546Sopenharmony_ci return split_64bit_subgroup_op(b, intrin); 1268bf215546Sopenharmony_ci default: 1269bf215546Sopenharmony_ci unreachable("Unsupported subgroup scan/reduce op"); 1270bf215546Sopenharmony_ci } 1271bf215546Sopenharmony_ci break; 1272bf215546Sopenharmony_ci 1273bf215546Sopenharmony_ci default: 1274bf215546Sopenharmony_ci unreachable("Unsupported intrinsic"); 1275bf215546Sopenharmony_ci } 1276bf215546Sopenharmony_ci} 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_cistatic bool 1279bf215546Sopenharmony_cishould_lower_int64_instr(const nir_instr *instr, const void *_options) 1280bf215546Sopenharmony_ci{ 1281bf215546Sopenharmony_ci switch (instr->type) { 1282bf215546Sopenharmony_ci case nir_instr_type_alu: 1283bf215546Sopenharmony_ci return should_lower_int64_alu_instr(nir_instr_as_alu(instr), _options); 1284bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 1285bf215546Sopenharmony_ci return should_lower_int64_intrinsic(nir_instr_as_intrinsic(instr), 1286bf215546Sopenharmony_ci _options); 1287bf215546Sopenharmony_ci default: 1288bf215546Sopenharmony_ci return false; 1289bf215546Sopenharmony_ci } 1290bf215546Sopenharmony_ci} 1291bf215546Sopenharmony_ci 1292bf215546Sopenharmony_cistatic nir_ssa_def * 1293bf215546Sopenharmony_cilower_int64_instr(nir_builder *b, nir_instr *instr, void *_options) 1294bf215546Sopenharmony_ci{ 1295bf215546Sopenharmony_ci switch (instr->type) { 1296bf215546Sopenharmony_ci case nir_instr_type_alu: 1297bf215546Sopenharmony_ci return lower_int64_alu_instr(b, nir_instr_as_alu(instr)); 1298bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 1299bf215546Sopenharmony_ci return lower_int64_intrinsic(b, nir_instr_as_intrinsic(instr)); 1300bf215546Sopenharmony_ci default: 1301bf215546Sopenharmony_ci return NULL; 1302bf215546Sopenharmony_ci } 1303bf215546Sopenharmony_ci} 1304bf215546Sopenharmony_ci 1305bf215546Sopenharmony_cibool 1306bf215546Sopenharmony_cinir_lower_int64(nir_shader *shader) 1307bf215546Sopenharmony_ci{ 1308bf215546Sopenharmony_ci return nir_shader_lower_instructions(shader, should_lower_int64_instr, 1309bf215546Sopenharmony_ci lower_int64_instr, 1310bf215546Sopenharmony_ci (void *)shader->options); 1311bf215546Sopenharmony_ci} 1312