compiler/nir/nir_opt_algebraic.py

bf215546Sopenharmony_ci# -*- coding: utf-8 -*-
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# Copyright (C) 2014 Intel Corporation
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# Permission is hereby granted, free of charge, to any person obtaining a
bf215546Sopenharmony_ci# copy of this software and associated documentation files (the "Software"),
bf215546Sopenharmony_ci# to deal in the Software without restriction, including without limitation
bf215546Sopenharmony_ci# the rights to use, copy, modify, merge, publish, distribute, sublicense,
bf215546Sopenharmony_ci# and/or sell copies of the Software, and to permit persons to whom the
bf215546Sopenharmony_ci# Software is furnished to do so, subject to the following conditions:
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# The above copyright notice and this permission notice (including the next
bf215546Sopenharmony_ci# paragraph) shall be included in all copies or substantial portions of the
bf215546Sopenharmony_ci# Software.
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
bf215546Sopenharmony_ci# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
bf215546Sopenharmony_ci# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
bf215546Sopenharmony_ci# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
bf215546Sopenharmony_ci# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
bf215546Sopenharmony_ci# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
bf215546Sopenharmony_ci# IN THE SOFTWARE.
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# Authors:
bf215546Sopenharmony_ci#    Jason Ekstrand (jason@jlekstrand.net)
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifrom collections import OrderedDict
bf215546Sopenharmony_ciimport nir_algebraic
bf215546Sopenharmony_cifrom nir_opcodes import type_sizes
bf215546Sopenharmony_ciimport itertools
bf215546Sopenharmony_ciimport struct
bf215546Sopenharmony_cifrom math import pi
bf215546Sopenharmony_ciimport math
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Convenience variables
bf215546Sopenharmony_cia = 'a'
bf215546Sopenharmony_cib = 'b'
bf215546Sopenharmony_cic = 'c'
bf215546Sopenharmony_cid = 'd'
bf215546Sopenharmony_cie = 'e'
bf215546Sopenharmony_ci
bf215546Sopenharmony_cisigned_zero_inf_nan_preserve_16 = 'nir_is_float_control_signed_zero_inf_nan_preserve(info->float_controls_execution_mode, 16)'
bf215546Sopenharmony_cisigned_zero_inf_nan_preserve_32 = 'nir_is_float_control_signed_zero_inf_nan_preserve(info->float_controls_execution_mode, 32)'
bf215546Sopenharmony_ci
bf215546Sopenharmony_ciignore_exact = nir_algebraic.ignore_exact
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Written in the form (<search>, <replace>) where <search> is an expression
bf215546Sopenharmony_ci# and <replace> is either an expression or a value.  An expression is
bf215546Sopenharmony_ci# defined as a tuple of the form ([~]<op>, <src0>, <src1>, <src2>, <src3>)
bf215546Sopenharmony_ci# where each source is either an expression or a value.  A value can be
bf215546Sopenharmony_ci# either a numeric constant or a string representing a variable name.
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# If the opcode in a search expression is prefixed by a '~' character, this
bf215546Sopenharmony_ci# indicates that the operation is inexact.  Such operations will only get
bf215546Sopenharmony_ci# applied to SSA values that do not have the exact bit set.  This should be
bf215546Sopenharmony_ci# used by by any optimizations that are not bit-for-bit exact.  It should not,
bf215546Sopenharmony_ci# however, be used for backend-requested lowering operations as those need to
bf215546Sopenharmony_ci# happen regardless of precision.
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# Variable names are specified as "[#]name[@type][(cond)][.swiz]" where:
bf215546Sopenharmony_ci# "#" indicates that the given variable will only match constants,
bf215546Sopenharmony_ci# type indicates that the given variable will only match values from ALU
bf215546Sopenharmony_ci#    instructions with the given output type,
bf215546Sopenharmony_ci# (cond) specifies an additional condition function (see nir_search_helpers.h),
bf215546Sopenharmony_ci# swiz is a swizzle applied to the variable (only in the <replace> expression)
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# For constants, you have to be careful to make sure that it is the right
bf215546Sopenharmony_ci# type because python is unaware of the source and destination types of the
bf215546Sopenharmony_ci# opcodes.
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# All expression types can have a bit-size specified.  For opcodes, this
bf215546Sopenharmony_ci# looks like "op@32", for variables it is "a@32" or "a@uint32" to specify a
bf215546Sopenharmony_ci# type and size.  In the search half of the expression this indicates that it
bf215546Sopenharmony_ci# should only match that particular bit-size.  In the replace half of the
bf215546Sopenharmony_ci# expression this indicates that the constructed value should have that
bf215546Sopenharmony_ci# bit-size.
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# If the opcode in a replacement expression is prefixed by a '!' character,
bf215546Sopenharmony_ci# this indicated that the new expression will be marked exact.
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# A special condition "many-comm-expr" can be used with expressions to note
bf215546Sopenharmony_ci# that the expression and its subexpressions have more commutative expressions
bf215546Sopenharmony_ci# than nir_replace_instr can handle.  If this special condition is needed with
bf215546Sopenharmony_ci# another condition, the two can be separated by a comma (e.g.,
bf215546Sopenharmony_ci# "(many-comm-expr,is_used_once)").
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# based on https://web.archive.org/web/20180105155939/http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
bf215546Sopenharmony_cidef lowered_sincos(c):
bf215546Sopenharmony_ci    x = ('fsub', ('fmul', 2.0, ('ffract', ('fadd', ('fmul', 0.5 / pi, a), c))), 1.0)
bf215546Sopenharmony_ci    x = ('fmul', ('fsub', x, ('fmul', x, ('fabs', x))), 4.0)
bf215546Sopenharmony_ci    return ('ffma', ('ffma', x, ('fabs', x), ('fneg', x)), 0.225, x)
bf215546Sopenharmony_ci
bf215546Sopenharmony_cidef intBitsToFloat(i):
bf215546Sopenharmony_ci    return struct.unpack('!f', struct.pack('!I', i))[0]
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations = [
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('imul', a, '#b(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b)), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('imul', 'a@8', 0x80), ('ishl', a, 7), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('imul', 'a@16', 0x8000), ('ishl', a, 15), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('imul', 'a@32', 0x80000000), ('ishl', a, 31), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('imul', 'a@64', 0x8000000000000000), ('ishl', a, 63), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('imul', a, '#b(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b)))), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('ishl', a, '#b'), ('imul', a, ('ishl', 1, b)), 'options->lower_bitops'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('imul@64', a, '#b(is_bitcount2)'), ('iadd', ('ishl', a, ('ufind_msb', b)), ('ishl', a, ('find_lsb', b))),
bf215546Sopenharmony_ci    '!options->lower_bitops && (options->lower_int64_options & (nir_lower_imul64 | nir_lower_shift64)) == nir_lower_imul64'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('unpack_64_2x32_split_x', ('imul_2x32_64(is_used_once)', a, b)), ('imul', a, b)),
bf215546Sopenharmony_ci   (('unpack_64_2x32_split_x', ('umul_2x32_64(is_used_once)', a, b)), ('imul', a, b)),
bf215546Sopenharmony_ci   (('imul_2x32_64', a, b), ('pack_64_2x32_split', ('imul', a, b), ('imul_high', a, b)), 'options->lower_mul_2x32_64'),
bf215546Sopenharmony_ci   (('umul_2x32_64', a, b), ('pack_64_2x32_split', ('imul', a, b), ('umul_high', a, b)), 'options->lower_mul_2x32_64'),
bf215546Sopenharmony_ci   (('udiv', a, 1), a),
bf215546Sopenharmony_ci   (('idiv', a, 1), a),
bf215546Sopenharmony_ci   (('umod', a, 1), 0),
bf215546Sopenharmony_ci   (('imod', a, 1), 0),
bf215546Sopenharmony_ci   (('imod', a, -1), 0),
bf215546Sopenharmony_ci   (('irem', a, 1), 0),
bf215546Sopenharmony_ci   (('irem', a, -1), 0),
bf215546Sopenharmony_ci   (('udiv', a, '#b(is_pos_power_of_two)'), ('ushr', a, ('find_lsb', b)), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('idiv', a, '#b(is_pos_power_of_two)'), ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', b))), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('idiv', a, '#b(is_neg_power_of_two)'), ('ineg', ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', ('iabs', b))))), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('umod', a, '#b(is_pos_power_of_two)'), ('iand', a, ('isub', b, 1)), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('imod', a, '#b(is_pos_power_of_two)'), ('iand', a, ('isub', b, 1)), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('imod', a, '#b(is_neg_power_of_two)'), ('bcsel', ('ieq', ('ior', a, b), b), 0, ('ior', a, b)), '!options->lower_bitops'),
bf215546Sopenharmony_ci   # 'irem(a, b)' -> 'a - ((a < 0 ? (a + b - 1) : a) & -b)'
bf215546Sopenharmony_ci   (('irem', a, '#b(is_pos_power_of_two)'),
bf215546Sopenharmony_ci    ('isub', a, ('iand', ('bcsel', ('ilt', a, 0), ('iadd', a, ('isub', b, 1)), a), ('ineg', b))),
bf215546Sopenharmony_ci    '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('irem', a, '#b(is_neg_power_of_two)'), ('irem', a, ('iabs', b)), '!options->lower_bitops'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~fneg', ('fneg', a)), a),
bf215546Sopenharmony_ci   (('ineg', ('ineg', a)), a),
bf215546Sopenharmony_ci   (('fabs', ('fneg', a)), ('fabs', a)),
bf215546Sopenharmony_ci   (('fabs', ('u2f', a)), ('u2f', a)),
bf215546Sopenharmony_ci   (('iabs', ('iabs', a)), ('iabs', a)),
bf215546Sopenharmony_ci   (('iabs', ('ineg', a)), ('iabs', a)),
bf215546Sopenharmony_ci   (('f2b', ('fneg', a)), ('f2b', a)),
bf215546Sopenharmony_ci   (('i2b', ('ineg', a)), ('i2b', a)),
bf215546Sopenharmony_ci   (('~fadd', a, 0.0), a),
bf215546Sopenharmony_ci   # a+0.0 is 'a' unless 'a' is denormal or -0.0. If it's only used by a
bf215546Sopenharmony_ci   # floating point instruction, they should flush any input denormals and we
bf215546Sopenharmony_ci   # can replace -0.0 with 0.0 if the float execution mode allows it.
bf215546Sopenharmony_ci   (('fadd(is_only_used_as_float)', 'a@16', 0.0), a, '!'+signed_zero_inf_nan_preserve_16),
bf215546Sopenharmony_ci   (('fadd(is_only_used_as_float)', 'a@32', 0.0), a, '!'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('iadd', a, 0), a),
bf215546Sopenharmony_ci   (('iadd_sat', a, 0), a),
bf215546Sopenharmony_ci   (('isub_sat', a, 0), a),
bf215546Sopenharmony_ci   (('uadd_sat', a, 0), a),
bf215546Sopenharmony_ci   (('usub_sat', a, 0), a),
bf215546Sopenharmony_ci   (('usadd_4x8_vc4', a, 0), a),
bf215546Sopenharmony_ci   (('usadd_4x8_vc4', a, ~0), ~0),
bf215546Sopenharmony_ci   (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
bf215546Sopenharmony_ci   (('~fadd', ('fmulz', a, b), ('fmulz', a, c)), ('fmulz', a, ('fadd', b, c))),
bf215546Sopenharmony_ci   (('~ffma', a, b, ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)),
bf215546Sopenharmony_ci   (('~ffma', a, b, ('fmul(is_used_once)', a, c)), ('fmul', a, ('fadd', b, c))),
bf215546Sopenharmony_ci   (('~fadd', ('fmul(is_used_once)', a, b), ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)),
bf215546Sopenharmony_ci   (('~ffma', a, ('fmul(is_used_once)', b, c), ('fmul(is_used_once)', b, d)), ('fmul', b, ('ffma', a, c, d))),
bf215546Sopenharmony_ci   (('~ffmaz', a, b, ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)),
bf215546Sopenharmony_ci   (('~ffmaz', a, b, ('fmulz(is_used_once)', a, c)), ('fmulz', a, ('fadd', b, c))),
bf215546Sopenharmony_ci   (('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)),
bf215546Sopenharmony_ci   (('~ffmaz', a, ('fmulz(is_used_once)', b, c), ('fmulz(is_used_once)', b, d)), ('fmulz', b, ('ffmaz', a, c, d))),
bf215546Sopenharmony_ci   (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
bf215546Sopenharmony_ci   (('iand', ('ior', a, b), ('ior', a, c)), ('ior', a, ('iand', b, c))),
bf215546Sopenharmony_ci   (('ior', ('iand', a, b), ('iand', a, c)), ('iand', a, ('ior', b, c))),
bf215546Sopenharmony_ci   (('~fadd', ('fneg', a), a), 0.0),
bf215546Sopenharmony_ci   (('iadd', ('ineg', a), a), 0),
bf215546Sopenharmony_ci   (('iadd', ('ineg', a), ('iadd', a, b)), b),
bf215546Sopenharmony_ci   (('iadd', a, ('iadd', ('ineg', a), b)), b),
bf215546Sopenharmony_ci   (('~fadd', ('fneg', a), ('fadd', a, b)), b),
bf215546Sopenharmony_ci   (('~fadd', a, ('fadd', ('fneg', a), b)), b),
bf215546Sopenharmony_ci   (('fadd', ('fsat', a), ('fsat', ('fneg', a))), ('fsat', ('fabs', a))),
bf215546Sopenharmony_ci   (('~fmul', a, 0.0), 0.0),
bf215546Sopenharmony_ci   # The only effect a*0.0 should have is when 'a' is infinity, -0.0 or NaN
bf215546Sopenharmony_ci   (('fmul', 'a@16', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_16),
bf215546Sopenharmony_ci   (('fmul', 'a@32', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('fmulz', a, 0.0), 0.0),
bf215546Sopenharmony_ci   (('fmulz', a, 'b(is_finite_not_zero)'), ('fmul', a, b), '!'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('fmulz', 'a(is_finite)', 'b(is_finite)'), ('fmul', a, b)),
bf215546Sopenharmony_ci   (('fmulz', a, a), ('fmul', a, a)),
bf215546Sopenharmony_ci   (('ffmaz', a, 'b(is_finite_not_zero)', c), ('ffma', a, b, c), '!'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('ffmaz', 'a(is_finite)', 'b(is_finite)', c), ('ffma', a, b, c)),
bf215546Sopenharmony_ci   (('ffmaz', a, a, b), ('ffma', a, a, b)),
bf215546Sopenharmony_ci   (('imul', a, 0), 0),
bf215546Sopenharmony_ci   (('umul_unorm_4x8_vc4', a, 0), 0),
bf215546Sopenharmony_ci   (('umul_unorm_4x8_vc4', a, ~0), a),
bf215546Sopenharmony_ci   (('~fmul', a, 1.0), a),
bf215546Sopenharmony_ci   (('~fmulz', a, 1.0), a),
bf215546Sopenharmony_ci   # The only effect a*1.0 can have is flushing denormals. If it's only used by
bf215546Sopenharmony_ci   # a floating point instruction, they should flush any input denormals and
bf215546Sopenharmony_ci   # this multiplication isn't needed.
bf215546Sopenharmony_ci   (('fmul(is_only_used_as_float)', a, 1.0), a),
bf215546Sopenharmony_ci   (('imul', a, 1), a),
bf215546Sopenharmony_ci   (('fmul', a, -1.0), ('fneg', a)),
bf215546Sopenharmony_ci   (('imul', a, -1), ('ineg', a)),
bf215546Sopenharmony_ci   # If a < 0: fsign(a)*a*a => -1*a*a => -a*a => abs(a)*a
bf215546Sopenharmony_ci   # If a > 0: fsign(a)*a*a => 1*a*a => a*a => abs(a)*a
bf215546Sopenharmony_ci   # If a == 0: fsign(a)*a*a => 0*0*0 => abs(0)*0
bf215546Sopenharmony_ci   # If a != a: fsign(a)*a*a => 0*NaN*NaN => abs(NaN)*NaN
bf215546Sopenharmony_ci   (('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)),
bf215546Sopenharmony_ci   (('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)),
bf215546Sopenharmony_ci   (('~ffma', 0.0, a, b), b),
bf215546Sopenharmony_ci   (('ffma@16(is_only_used_as_float)', 0.0, a, b), b, '!'+signed_zero_inf_nan_preserve_16),
bf215546Sopenharmony_ci   (('ffma@32(is_only_used_as_float)', 0.0, a, b), b, '!'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('ffmaz', 0.0, a, b), ('fadd', 0.0, b)),
bf215546Sopenharmony_ci   (('~ffma', a, b, 0.0), ('fmul', a, b)),
bf215546Sopenharmony_ci   (('ffma@16', a, b, 0.0), ('fmul', a, b), '!'+signed_zero_inf_nan_preserve_16),
bf215546Sopenharmony_ci   (('ffma@32', a, b, 0.0), ('fmul', a, b), '!'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('ffmaz', a, b, 0.0), ('fmulz', a, b), '!'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('ffma', 1.0, a, b), ('fadd', a, b)),
bf215546Sopenharmony_ci   (('ffmaz', 1.0, a, b), ('fadd', a, b), '!'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('ffma', -1.0, a, b), ('fadd', ('fneg', a), b)),
bf215546Sopenharmony_ci   (('ffmaz', -1.0, a, b), ('fadd', ('fneg', a), b), '!'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('~ffma', '#a', '#b', c), ('fadd', ('fmul', a, b), c)),
bf215546Sopenharmony_ci   (('~ffmaz', '#a', '#b', c), ('fadd', ('fmulz', a, b), c)),
bf215546Sopenharmony_ci   (('~flrp', a, b, 0.0), a),
bf215546Sopenharmony_ci   (('~flrp', a, b, 1.0), b),
bf215546Sopenharmony_ci   (('~flrp', a, a, b), a),
bf215546Sopenharmony_ci   (('~flrp', 0.0, a, b), ('fmul', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # flrp(a, a + b, c) => a + flrp(0, b, c) => a + (b * c)
bf215546Sopenharmony_ci   (('~flrp', a, ('fadd(is_used_once)', a, b), c), ('fadd', ('fmul', b, c), a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('sdot_4x8_iadd', a, 0, b), b),
bf215546Sopenharmony_ci   (('udot_4x8_uadd', a, 0, b), b),
bf215546Sopenharmony_ci   (('sdot_4x8_iadd_sat', a, 0, b), b),
bf215546Sopenharmony_ci   (('udot_4x8_uadd_sat', a, 0, b), b),
bf215546Sopenharmony_ci   (('sdot_2x16_iadd', a, 0, b), b),
bf215546Sopenharmony_ci   (('udot_2x16_uadd', a, 0, b), b),
bf215546Sopenharmony_ci   (('sdot_2x16_iadd_sat', a, 0, b), b),
bf215546Sopenharmony_ci   (('udot_2x16_uadd_sat', a, 0, b), b),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # sudot_4x8_iadd is not commutative at all, so the patterns must be
bf215546Sopenharmony_ci   # duplicated with zeros on each of the first positions.
bf215546Sopenharmony_ci   (('sudot_4x8_iadd', a, 0, b), b),
bf215546Sopenharmony_ci   (('sudot_4x8_iadd', 0, a, b), b),
bf215546Sopenharmony_ci   (('sudot_4x8_iadd_sat', a, 0, b), b),
bf215546Sopenharmony_ci   (('sudot_4x8_iadd_sat', 0, a, b), b),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('iadd', ('sdot_4x8_iadd(is_used_once)', a, b, '#c'), '#d'), ('sdot_4x8_iadd', a, b, ('iadd', c, d))),
bf215546Sopenharmony_ci   (('iadd', ('udot_4x8_uadd(is_used_once)', a, b, '#c'), '#d'), ('udot_4x8_uadd', a, b, ('iadd', c, d))),
bf215546Sopenharmony_ci   (('iadd', ('sudot_4x8_iadd(is_used_once)', a, b, '#c'), '#d'), ('sudot_4x8_iadd', a, b, ('iadd', c, d))),
bf215546Sopenharmony_ci   (('iadd', ('sdot_2x16_iadd(is_used_once)', a, b, '#c'), '#d'), ('sdot_2x16_iadd', a, b, ('iadd', c, d))),
bf215546Sopenharmony_ci   (('iadd', ('udot_2x16_uadd(is_used_once)', a, b, '#c'), '#d'), ('udot_2x16_uadd', a, b, ('iadd', c, d))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Try to let constant folding eliminate the dot-product part.  These are
bf215546Sopenharmony_ci   # safe because the dot product cannot overflow 32 bits.
bf215546Sopenharmony_ci   (('iadd', ('sdot_4x8_iadd', 'a(is_not_const)', b, 0), c), ('sdot_4x8_iadd', a, b, c)),
bf215546Sopenharmony_ci   (('iadd', ('udot_4x8_uadd', 'a(is_not_const)', b, 0), c), ('udot_4x8_uadd', a, b, c)),
bf215546Sopenharmony_ci   (('iadd', ('sudot_4x8_iadd', 'a(is_not_const)', b, 0), c), ('sudot_4x8_iadd', a, b, c)),
bf215546Sopenharmony_ci   (('iadd', ('sudot_4x8_iadd', a, 'b(is_not_const)', 0), c), ('sudot_4x8_iadd', a, b, c)),
bf215546Sopenharmony_ci   (('iadd', ('sdot_2x16_iadd', 'a(is_not_const)', b, 0), c), ('sdot_2x16_iadd', a, b, c)),
bf215546Sopenharmony_ci   (('iadd', ('udot_2x16_uadd', 'a(is_not_const)', b, 0), c), ('udot_2x16_uadd', a, b, c)),
bf215546Sopenharmony_ci   (('sdot_4x8_iadd', '#a', '#b', 'c(is_not_const)'), ('iadd', ('sdot_4x8_iadd', a, b, 0), c)),
bf215546Sopenharmony_ci   (('udot_4x8_uadd', '#a', '#b', 'c(is_not_const)'), ('iadd', ('udot_4x8_uadd', a, b, 0), c)),
bf215546Sopenharmony_ci   (('sudot_4x8_iadd', '#a', '#b', 'c(is_not_const)'), ('iadd', ('sudot_4x8_iadd', a, b, 0), c)),
bf215546Sopenharmony_ci   (('sdot_2x16_iadd', '#a', '#b', 'c(is_not_const)'), ('iadd', ('sdot_2x16_iadd', a, b, 0), c)),
bf215546Sopenharmony_ci   (('udot_2x16_uadd', '#a', '#b', 'c(is_not_const)'), ('iadd', ('udot_2x16_uadd', a, b, 0), c)),
bf215546Sopenharmony_ci   (('sdot_4x8_iadd_sat', '#a', '#b', 'c(is_not_const)'), ('iadd_sat', ('sdot_4x8_iadd', a, b, 0), c), '!options->lower_iadd_sat'),
bf215546Sopenharmony_ci   (('udot_4x8_uadd_sat', '#a', '#b', 'c(is_not_const)'), ('uadd_sat', ('udot_4x8_uadd', a, b, 0), c), '!options->lower_uadd_sat'),
bf215546Sopenharmony_ci   (('sudot_4x8_iadd_sat', '#a', '#b', 'c(is_not_const)'), ('iadd_sat', ('sudot_4x8_iadd', a, b, 0), c), '!options->lower_iadd_sat'),
bf215546Sopenharmony_ci   (('sdot_2x16_iadd_sat', '#a', '#b', 'c(is_not_const)'), ('iadd_sat', ('sdot_2x16_iadd', a, b, 0), c), '!options->lower_iadd_sat'),
bf215546Sopenharmony_ci   (('udot_2x16_uadd_sat', '#a', '#b', 'c(is_not_const)'), ('uadd_sat', ('udot_2x16_uadd', a, b, 0), c), '!options->lower_uadd_sat'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Optimize open-coded fmulz.
bf215546Sopenharmony_ci   # (b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b) -> fmulz(a, b)
bf215546Sopenharmony_ci   (('fmul@32', ('bcsel', ignore_exact('feq', b, 0.0), 0.0, a), ('bcsel', ignore_exact('feq', a, 0.0), 0.0, b)),
bf215546Sopenharmony_ci    ('fmulz', a, b), 'options->has_fmulz && !'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('fmul@32', a, ('bcsel', ignore_exact('feq', a, 0.0), 0.0, '#b(is_not_const_zero)')),
bf215546Sopenharmony_ci    ('fmulz', a, b), 'options->has_fmulz && !'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # ffma(b==0.0 ? 0.0 : a, a==0.0 ? 0.0 : b, c) -> ffmaz(a, b, c)
bf215546Sopenharmony_ci   (('ffma@32', ('bcsel', ignore_exact('feq', b, 0.0), 0.0, a), ('bcsel', ignore_exact('feq', a, 0.0), 0.0, b), c),
bf215546Sopenharmony_ci    ('ffmaz', a, b, c), 'options->has_fmulz && !'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('ffma@32', a, ('bcsel', ignore_exact('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c),
bf215546Sopenharmony_ci    ('ffmaz', a, b, c), 'options->has_fmulz && !'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Shorthand for the expansion of just the dot product part of the [iu]dp4a
bf215546Sopenharmony_ci# instructions.
bf215546Sopenharmony_cisdot_4x8_a_b = ('iadd', ('iadd', ('imul', ('extract_i8', a, 0), ('extract_i8', b, 0)),
bf215546Sopenharmony_ci                                 ('imul', ('extract_i8', a, 1), ('extract_i8', b, 1))),
bf215546Sopenharmony_ci                        ('iadd', ('imul', ('extract_i8', a, 2), ('extract_i8', b, 2)),
bf215546Sopenharmony_ci                                 ('imul', ('extract_i8', a, 3), ('extract_i8', b, 3))))
bf215546Sopenharmony_ciudot_4x8_a_b = ('iadd', ('iadd', ('imul', ('extract_u8', a, 0), ('extract_u8', b, 0)),
bf215546Sopenharmony_ci                                 ('imul', ('extract_u8', a, 1), ('extract_u8', b, 1))),
bf215546Sopenharmony_ci                        ('iadd', ('imul', ('extract_u8', a, 2), ('extract_u8', b, 2)),
bf215546Sopenharmony_ci                                 ('imul', ('extract_u8', a, 3), ('extract_u8', b, 3))))
bf215546Sopenharmony_cisudot_4x8_a_b = ('iadd', ('iadd', ('imul', ('extract_i8', a, 0), ('extract_u8', b, 0)),
bf215546Sopenharmony_ci                                  ('imul', ('extract_i8', a, 1), ('extract_u8', b, 1))),
bf215546Sopenharmony_ci                         ('iadd', ('imul', ('extract_i8', a, 2), ('extract_u8', b, 2)),
bf215546Sopenharmony_ci                                  ('imul', ('extract_i8', a, 3), ('extract_u8', b, 3))))
bf215546Sopenharmony_cisdot_2x16_a_b = ('iadd', ('imul', ('extract_i16', a, 0), ('extract_i16', b, 0)),
bf215546Sopenharmony_ci                         ('imul', ('extract_i16', a, 1), ('extract_i16', b, 1)))
bf215546Sopenharmony_ciudot_2x16_a_b = ('iadd', ('imul', ('extract_u16', a, 0), ('extract_u16', b, 0)),
bf215546Sopenharmony_ci                         ('imul', ('extract_u16', a, 1), ('extract_u16', b, 1)))
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations.extend([
bf215546Sopenharmony_ci   (('sdot_4x8_iadd', a, b, c), ('iadd', sdot_4x8_a_b, c), '!options->has_sdot_4x8'),
bf215546Sopenharmony_ci   (('udot_4x8_uadd', a, b, c), ('iadd', udot_4x8_a_b, c), '!options->has_udot_4x8'),
bf215546Sopenharmony_ci   (('sudot_4x8_iadd', a, b, c), ('iadd', sudot_4x8_a_b, c), '!options->has_sudot_4x8'),
bf215546Sopenharmony_ci   (('sdot_2x16_iadd', a, b, c), ('iadd', sdot_2x16_a_b, c), '!options->has_dot_2x16'),
bf215546Sopenharmony_ci   (('udot_2x16_uadd', a, b, c), ('iadd', udot_2x16_a_b, c), '!options->has_dot_2x16'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # For the unsigned dot-product, the largest possible value 4*(255*255) =
bf215546Sopenharmony_ci   # 0x3f804, so we don't have to worry about that intermediate result
bf215546Sopenharmony_ci   # overflowing.  0x100000000 - 0x3f804 = 0xfffc07fc.  If c is a constant
bf215546Sopenharmony_ci   # that is less than 0xfffc07fc, then the result cannot overflow ever.
bf215546Sopenharmony_ci   (('udot_4x8_uadd_sat', a, b, '#c(is_ult_0xfffc07fc)'), ('udot_4x8_uadd', a, b, c)),
bf215546Sopenharmony_ci   (('udot_4x8_uadd_sat', a, b, c), ('uadd_sat', udot_4x8_a_b, c), '!options->has_udot_4x8'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # For the signed dot-product, the largest positive value is 4*(-128*-128) =
bf215546Sopenharmony_ci   # 0x10000, and the largest negative value is 4*(-128*127) = -0xfe00.  We
bf215546Sopenharmony_ci   # don't have to worry about that intermediate result overflowing or
bf215546Sopenharmony_ci   # underflowing.
bf215546Sopenharmony_ci   (('sdot_4x8_iadd_sat', a, b, c), ('iadd_sat', sdot_4x8_a_b, c), '!options->has_sdot_4x8'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('sudot_4x8_iadd_sat', a, b, c), ('iadd_sat', sudot_4x8_a_b, c), '!options->has_sudot_4x8'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('udot_2x16_uadd_sat', a, b, c), ('uadd_sat', udot_2x16_a_b, c), '!options->has_dot_2x16'),
bf215546Sopenharmony_ci   (('sdot_2x16_iadd_sat', a, b, c), ('iadd_sat', sdot_2x16_a_b, c), '!options->has_dot_2x16'),
bf215546Sopenharmony_ci])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Float sizes
bf215546Sopenharmony_cifor s in [16, 32, 64]:
bf215546Sopenharmony_ci    optimizations.extend([
bf215546Sopenharmony_ci       (('~flrp@{}'.format(s), a, b, ('b2f', 'c@1')), ('bcsel', c, b, a), 'options->lower_flrp{}'.format(s)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       (('~flrp@{}'.format(s), a, ('fadd', a, b), c), ('fadd', ('fmul', b, c), a), 'options->lower_flrp{}'.format(s)),
bf215546Sopenharmony_ci       (('~flrp@{}'.format(s), ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a), 'options->lower_flrp{}'.format(s)),
bf215546Sopenharmony_ci       (('~flrp@{}'.format(s), a, ('fmul(is_used_once)', a, b), c), ('fmul', ('flrp', 1.0, b, c), a), 'options->lower_flrp{}'.format(s)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       (('~fadd@{}'.format(s), ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp{}'.format(s)),
bf215546Sopenharmony_ci       # These are the same as the previous three rules, but it depends on
bf215546Sopenharmony_ci       # 1-fsat(x) <=> fsat(1-x).  See below.
bf215546Sopenharmony_ci       (('~fadd@{}'.format(s), ('fmul', a, ('fsat', ('fadd', 1.0, ('fneg', c)))), ('fmul', b, ('fsat', c))), ('flrp', a, b, ('fsat', c)), '!options->lower_flrp{}'.format(s)),
bf215546Sopenharmony_ci       (('~fadd@{}'.format(s), a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp{}'.format(s)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       (('~fadd@{}'.format(s),    ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1')))), ('fmul', b, ('b2f',  c))), ('bcsel', c, b, a), 'options->lower_flrp{}'.format(s)),
bf215546Sopenharmony_ci       (('~fadd@{}'.format(s), a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp{}'.format(s)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       (('~ffma@{}'.format(s), a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1'))), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, b, a)),
bf215546Sopenharmony_ci       (('~ffma@{}'.format(s), b, ('b2f', 'c@1'), ('ffma', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, b, a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       # These two aren't flrp lowerings, but do appear in some shaders.
bf215546Sopenharmony_ci       (('~ffma@{}'.format(s), ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, b, a)),
bf215546Sopenharmony_ci       (('~ffma@{}'.format(s), ('b2f', 'c@1'), ('ffma', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, d, ('fmul', a, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       # 1 - ((1 - a) * (1 - b))
bf215546Sopenharmony_ci       # 1 - (1 - a - b + a*b)
bf215546Sopenharmony_ci       # 1 - 1 + a + b - a*b
bf215546Sopenharmony_ci       # a + b - a*b
bf215546Sopenharmony_ci       # a + b*(1 - a)
bf215546Sopenharmony_ci       # b*(1 - a) + 1*a
bf215546Sopenharmony_ci       # flrp(b, 1, a)
bf215546Sopenharmony_ci       (('~fadd@{}'.format(s), 1.0, ('fneg', ('fmul', ('fadd', 1.0, ('fneg', a)), ('fadd', 1.0, ('fneg', b))))), ('flrp', b, 1.0, a), '!options->lower_flrp{}'.format(s)),
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations.extend([
bf215546Sopenharmony_ci   (('~flrp', ('fmul(is_used_once)', a, b), ('fmul(is_used_once)', a, c), d), ('fmul', ('flrp', b, c, d), a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~flrp', a, 0.0, c), ('fadd', ('fmul', ('fneg', a), c), a)),
bf215546Sopenharmony_ci   (('ftrunc', a), ('bcsel', ('flt', a, 0.0), ('fneg', ('ffloor', ('fabs', a))), ('ffloor', ('fabs', a))), 'options->lower_ftrunc'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ffloor@16', a), ('fsub', a, ('ffract', a)), 'options->lower_ffloor'),
bf215546Sopenharmony_ci   (('ffloor@32', a), ('fsub', a, ('ffract', a)), 'options->lower_ffloor'),
bf215546Sopenharmony_ci   (('ffloor@64', a), ('fsub', a, ('ffract', a)), '(options->lower_ffloor || (options->lower_doubles_options & nir_lower_dfloor)) && !(options->lower_doubles_options & nir_lower_dfract)'),
bf215546Sopenharmony_ci   (('fadd@16', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'),
bf215546Sopenharmony_ci   (('fadd@32', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'),
bf215546Sopenharmony_ci   (('fadd@64', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor && !(options->lower_doubles_options & nir_lower_dfloor)'),
bf215546Sopenharmony_ci   (('ffract@16', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
bf215546Sopenharmony_ci   (('ffract@32', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
bf215546Sopenharmony_ci   (('ffract@64', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract || (options->lower_doubles_options & nir_lower_dfract)'),
bf215546Sopenharmony_ci   (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
bf215546Sopenharmony_ci   (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
bf215546Sopenharmony_ci   (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'),
bf215546Sopenharmony_ci   (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'),
bf215546Sopenharmony_ci   (('ffmaz', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->lower_ffma32'),
bf215546Sopenharmony_ci   # Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
bf215546Sopenharmony_ci   (('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
bf215546Sopenharmony_ci   (('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
bf215546Sopenharmony_ci   (('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'),
bf215546Sopenharmony_ci   (('~ffmaz', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->fuse_ffma32'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), '#d'), '#e'),
bf215546Sopenharmony_ci    ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fdph', a, b), ('fdot4', ('vec4', 'a.x', 'a.y', 'a.z', 1.0), b), 'options->lower_fdph'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fdot4', ('vec4', a, b,   c,   1.0), d), ('fdph',  ('vec3', a, b, c), d), '!options->lower_fdph'),
bf215546Sopenharmony_ci   (('fdot4', ('vec4', a, 0.0, 0.0, 0.0), b), ('fmul', a, b)),
bf215546Sopenharmony_ci   (('fdot4', ('vec4', a, b,   0.0, 0.0), c), ('fdot2', ('vec2', a, b), c)),
bf215546Sopenharmony_ci   (('fdot4', ('vec4', a, b,   c,   0.0), d), ('fdot3', ('vec3', a, b, c), d)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fdot3', ('vec3', a, 0.0, 0.0), b), ('fmul', a, b)),
bf215546Sopenharmony_ci   (('fdot3', ('vec3', a, b,   0.0), c), ('fdot2', ('vec2', a, b), c)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fdot2', ('vec2', a, 0.0), b), ('fmul', a, b)),
bf215546Sopenharmony_ci   (('fdot2', a, 1.0), ('fadd', 'a.x', 'a.y')),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Lower fdot to fsum when it is available
bf215546Sopenharmony_ci   (('fdot2', a, b), ('fsum2', ('fmul', a, b)), 'options->lower_fdot'),
bf215546Sopenharmony_ci   (('fdot3', a, b), ('fsum3', ('fmul', a, b)), 'options->lower_fdot'),
bf215546Sopenharmony_ci   (('fdot4', a, b), ('fsum4', ('fmul', a, b)), 'options->lower_fdot'),
bf215546Sopenharmony_ci   (('fsum2', a), ('fadd', 'a.x', 'a.y'), 'options->lower_fdot'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially
bf215546Sopenharmony_ci   # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1
bf215546Sopenharmony_ci   # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0
bf215546Sopenharmony_ci   (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # (a * #b + #c) << #d
bf215546Sopenharmony_ci   # ((a * #b) << #d) + (#c << #d)
bf215546Sopenharmony_ci   # (a * (#b << #d)) + (#c << #d)
bf215546Sopenharmony_ci   (('ishl', ('iadd', ('imul', a, '#b'), '#c'), '#d'),
bf215546Sopenharmony_ci    ('iadd', ('imul', a, ('ishl', b, d)), ('ishl', c, d))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # (a * #b) << #c
bf215546Sopenharmony_ci   # a * (#b << #c)
bf215546Sopenharmony_ci   (('ishl', ('imul', a, '#b'), '#c'), ('imul', a, ('ishl', b, c))),
bf215546Sopenharmony_ci])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Care must be taken here.  Shifts in NIR uses only the lower log2(bitsize)
bf215546Sopenharmony_ci# bits of the second source.  These replacements must correctly handle the
bf215546Sopenharmony_ci# case where (b % bitsize) + (c % bitsize) >= bitsize.
bf215546Sopenharmony_cifor s in [8, 16, 32, 64]:
bf215546Sopenharmony_ci   mask = s - 1
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   ishl = "ishl@{}".format(s)
bf215546Sopenharmony_ci   ishr = "ishr@{}".format(s)
bf215546Sopenharmony_ci   ushr = "ushr@{}".format(s)
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   in_bounds = ('ult', ('iadd', ('iand', b, mask), ('iand', c, mask)), s)
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   optimizations.extend([
bf215546Sopenharmony_ci       ((ishl, (ishl, a, '#b'), '#c'), ('bcsel', in_bounds, (ishl, a, ('iadd', b, c)), 0)),
bf215546Sopenharmony_ci       ((ushr, (ushr, a, '#b'), '#c'), ('bcsel', in_bounds, (ushr, a, ('iadd', b, c)), 0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       # To get get -1 for large shifts of negative values, ishr must instead
bf215546Sopenharmony_ci       # clamp the shift count to the maximum value.
bf215546Sopenharmony_ci       ((ishr, (ishr, a, '#b'), '#c'),
bf215546Sopenharmony_ci        (ishr, a, ('imin', ('iadd', ('iand', b, mask), ('iand', c, mask)), s - 1))),
bf215546Sopenharmony_ci   ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Optimize a pattern of address calculation created by DXVK where the offset is
bf215546Sopenharmony_ci# divided by 4 and then multipled by 4. This can be turned into an iand and the
bf215546Sopenharmony_ci# additions before can be reassociated to CSE the iand instruction.
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor size, mask in ((8, 0xff), (16, 0xffff), (32, 0xffffffff), (64, 0xffffffffffffffff)):
bf215546Sopenharmony_ci    a_sz = 'a@{}'.format(size)
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    optimizations.extend([
bf215546Sopenharmony_ci       # 'a >> #b << #b' -> 'a & ~((1 << #b) - 1)'
bf215546Sopenharmony_ci       (('ishl', ('ushr', a_sz, '#b'), b), ('iand', a, ('ishl', mask, b))),
bf215546Sopenharmony_ci       (('ishl', ('ishr', a_sz, '#b'), b), ('iand', a, ('ishl', mask, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       # This does not trivially work with ishr.
bf215546Sopenharmony_ci       (('ushr', ('ishl', a_sz, '#b'), b), ('iand', a, ('ushr', mask, b))),
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations.extend([
bf215546Sopenharmony_ci    (('iand', ('ishl', 'a@32', '#b(is_first_5_bits_uge_2)'), -4), ('ishl', a, b)),
bf215546Sopenharmony_ci    (('iand', ('imul', a, '#b(is_unsigned_multiple_of_4)'), -4), ('imul', a, b)),
bf215546Sopenharmony_ci])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor log2 in range(1, 7): # powers of two from 2 to 64
bf215546Sopenharmony_ci   v = 1 << log2
bf215546Sopenharmony_ci   mask = 0xffffffff & ~(v - 1)
bf215546Sopenharmony_ci   b_is_multiple = '#b(is_unsigned_multiple_of_{})'.format(v)
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   optimizations.extend([
bf215546Sopenharmony_ci       # Reassociate for improved CSE
bf215546Sopenharmony_ci       (('iand@32', ('iadd@32', a, b_is_multiple), mask), ('iadd', ('iand', a, mask), b)),
bf215546Sopenharmony_ci   ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# To save space in the state tables, reduce to the set that is known to help.
bf215546Sopenharmony_ci# Previously, this was range(1, 32).  In addition, a couple rules inside the
bf215546Sopenharmony_ci# loop are commented out.  Revisit someday, probably after mesa/#2635 has some
bf215546Sopenharmony_ci# resolution.
bf215546Sopenharmony_cifor i in [1, 2, 16, 24]:
bf215546Sopenharmony_ci    lo_mask = 0xffffffff >> i
bf215546Sopenharmony_ci    hi_mask = (0xffffffff << i) & 0xffffffff
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    optimizations.extend([
bf215546Sopenharmony_ci        # This pattern seems to only help in the soft-fp64 code.
bf215546Sopenharmony_ci        (('ishl@32', ('iand', 'a@32', lo_mask), i), ('ishl', a, i)),
bf215546Sopenharmony_ci#        (('ushr@32', ('iand', 'a@32', hi_mask), i), ('ushr', a, i)),
bf215546Sopenharmony_ci#        (('ishr@32', ('iand', 'a@32', hi_mask), i), ('ishr', a, i)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci        (('iand', ('ishl', 'a@32', i), hi_mask), ('ishl', a, i)),
bf215546Sopenharmony_ci        (('iand', ('ushr', 'a@32', i), lo_mask), ('ushr', a, i)),
bf215546Sopenharmony_ci#        (('iand', ('ishr', 'a@32', i), lo_mask), ('ushr', a, i)), # Yes, ushr is correct
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations.extend([
bf215546Sopenharmony_ci   # This is common for address calculations.  Reassociating may enable the
bf215546Sopenharmony_ci   # 'a<<c' to be CSE'd.  It also helps architectures that have an ISHLADD
bf215546Sopenharmony_ci   # instruction or a constant offset field for in load / store instructions.
bf215546Sopenharmony_ci   (('ishl', ('iadd', a, '#b'), '#c'), ('iadd', ('ishl', a, c), ('ishl', b, c))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # (a + #b) * #c => (a * #c) + (#b * #c)
bf215546Sopenharmony_ci   (('imul', ('iadd(is_used_once)', a, '#b'), '#c'), ('iadd', ('imul', a, c), ('imul', b, c))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # ((a + #b) + c) * #d => ((a + c) * #d) + (#b * #d)
bf215546Sopenharmony_ci   (('imul', ('iadd(is_used_once)', ('iadd(is_used_once)', a, '#b'), c), '#d'),
bf215546Sopenharmony_ci    ('iadd', ('imul', ('iadd', a, c), d), ('imul', b, d))),
bf215546Sopenharmony_ci   (('ishl', ('iadd(is_used_once)', ('iadd(is_used_once)', a, '#b'), c), '#d'),
bf215546Sopenharmony_ci    ('iadd', ('ishl', ('iadd', a, c), d), ('ishl', b, d))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Comparison simplifications
bf215546Sopenharmony_ci   (('inot', ('flt(is_used_once)', 'a(is_a_number)', 'b(is_a_number)')), ('fge', a, b)),
bf215546Sopenharmony_ci   (('inot', ('fge(is_used_once)', 'a(is_a_number)', 'b(is_a_number)')), ('flt', a, b)),
bf215546Sopenharmony_ci   (('inot', ('feq(is_used_once)', a, b)), ('fneu', a, b)),
bf215546Sopenharmony_ci   (('inot', ('fneu(is_used_once)', a, b)), ('feq', a, b)),
bf215546Sopenharmony_ci   (('inot', ('ilt(is_used_once)', a, b)), ('ige', a, b)),
bf215546Sopenharmony_ci   (('inot', ('ult(is_used_once)', a, b)), ('uge', a, b)),
bf215546Sopenharmony_ci   (('inot', ('ige(is_used_once)', a, b)), ('ilt', a, b)),
bf215546Sopenharmony_ci   (('inot', ('uge(is_used_once)', a, b)), ('ult', a, b)),
bf215546Sopenharmony_ci   (('inot', ('ieq(is_used_once)', a, b)), ('ine', a, b)),
bf215546Sopenharmony_ci   (('inot', ('ine(is_used_once)', a, b)), ('ieq', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('iand', ('feq', a, b), ('fneu', a, b)), False),
bf215546Sopenharmony_ci   (('iand', ('flt', a, b), ('flt', b, a)), False),
bf215546Sopenharmony_ci   (('iand', ('ieq', a, b), ('ine', a, b)), False),
bf215546Sopenharmony_ci   (('iand', ('ilt', a, b), ('ilt', b, a)), False),
bf215546Sopenharmony_ci   (('iand', ('ult', a, b), ('ult', b, a)), False),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # This helps some shaders because, after some optimizations, they end up
bf215546Sopenharmony_ci   # with patterns like (-a < -b) || (b < a).  In an ideal world, this sort of
bf215546Sopenharmony_ci   # matching would be handled by CSE.
bf215546Sopenharmony_ci   (('flt', ('fneg', a), ('fneg', b)), ('flt', b, a)),
bf215546Sopenharmony_ci   (('fge', ('fneg', a), ('fneg', b)), ('fge', b, a)),
bf215546Sopenharmony_ci   (('feq', ('fneg', a), ('fneg', b)), ('feq', b, a)),
bf215546Sopenharmony_ci   (('fneu', ('fneg', a), ('fneg', b)), ('fneu', b, a)),
bf215546Sopenharmony_ci   (('flt', ('fneg', a), -1.0), ('flt', 1.0, a)),
bf215546Sopenharmony_ci   (('flt', -1.0, ('fneg', a)), ('flt', a, 1.0)),
bf215546Sopenharmony_ci   (('fge', ('fneg', a), -1.0), ('fge', 1.0, a)),
bf215546Sopenharmony_ci   (('fge', -1.0, ('fneg', a)), ('fge', a, 1.0)),
bf215546Sopenharmony_ci   (('fneu', ('fneg', a), -1.0), ('fneu', 1.0, a)),
bf215546Sopenharmony_ci   (('feq', -1.0, ('fneg', a)), ('feq', a, 1.0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # b < fsat(NaN) -> b < 0 -> false, and b < Nan -> false.
bf215546Sopenharmony_ci   (('flt', '#b(is_gt_0_and_lt_1)', ('fsat(is_used_once)', a)), ('flt', b, a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # fsat(NaN) >= b -> 0 >= b -> false, and NaN >= b -> false.
bf215546Sopenharmony_ci   (('fge', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('fge', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # b == fsat(NaN) -> b == 0 -> false, and b == NaN -> false.
bf215546Sopenharmony_ci   (('feq', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('feq', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # b != fsat(NaN) -> b != 0 -> true, and b != NaN -> true.
bf215546Sopenharmony_ci   (('fneu', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('fneu', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # fsat(NaN) >= 1 -> 0 >= 1 -> false, and NaN >= 1 -> false.
bf215546Sopenharmony_ci   (('fge', ('fsat(is_used_once)', a), 1.0), ('fge', a, 1.0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # 0 < fsat(NaN) -> 0 < 0 -> false, and 0 < NaN -> false.
bf215546Sopenharmony_ci   (('flt', 0.0, ('fsat(is_used_once)', a)), ('flt', 0.0, a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # 0.0 >= b2f(a)
bf215546Sopenharmony_ci   # b2f(a) <= 0.0
bf215546Sopenharmony_ci   # b2f(a) == 0.0 because b2f(a) can only be 0 or 1
bf215546Sopenharmony_ci   # inot(a)
bf215546Sopenharmony_ci   (('fge', 0.0, ('b2f', 'a@1')), ('inot', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fge', ('fneg', ('b2f', 'a@1')), 0.0), ('inot', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fneu', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('ior', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('bcsel', a, 1.0, ('b2f', 'b@1'))   , 0.0), ('ior', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))),      ('ior', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('iand', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('bcsel', a, ('b2f', 'b@1'), 0.0)   , 0.0), ('iand', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), 0.0), ('ixor', a, b)),
bf215546Sopenharmony_ci   (('fneu',          ('b2f', 'a@1') ,          ('b2f', 'b@1') ),      ('ixor', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('fneg', ('b2f', 'a@1')), ('fneg', ('b2f', 'b@1'))),      ('ixor', a, b)),
bf215546Sopenharmony_ci   (('feq', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('ior', a, b))),
bf215546Sopenharmony_ci   (('feq', ('bcsel', a, 1.0, ('b2f', 'b@1'))   , 0.0), ('inot', ('ior', a, b))),
bf215546Sopenharmony_ci   (('feq', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))),      ('inot', ('ior', a, b))),
bf215546Sopenharmony_ci   (('feq', ('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('iand', a, b))),
bf215546Sopenharmony_ci   (('feq', ('bcsel', a, ('b2f', 'b@1'), 0.0)   , 0.0), ('inot', ('iand', a, b))),
bf215546Sopenharmony_ci   (('feq', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), 0.0), ('ieq', a, b)),
bf215546Sopenharmony_ci   (('feq',          ('b2f', 'a@1') ,          ('b2f', 'b@1') ),      ('ieq', a, b)),
bf215546Sopenharmony_ci   (('feq', ('fneg', ('b2f', 'a@1')), ('fneg', ('b2f', 'b@1'))),      ('ieq', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # -(b2f(a) + b2f(b)) < 0
bf215546Sopenharmony_ci   # 0 < b2f(a) + b2f(b)
bf215546Sopenharmony_ci   # 0 != b2f(a) + b2f(b)       b2f must be 0 or 1, so the sum is non-negative
bf215546Sopenharmony_ci   # a || b
bf215546Sopenharmony_ci   (('flt', ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), 0.0), ('ior', a, b)),
bf215546Sopenharmony_ci   (('flt', 0.0, ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('ior', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # -(b2f(a) + b2f(b)) >= 0
bf215546Sopenharmony_ci   # 0 >= b2f(a) + b2f(b)
bf215546Sopenharmony_ci   # 0 == b2f(a) + b2f(b)       b2f must be 0 or 1, so the sum is non-negative
bf215546Sopenharmony_ci   # !(a || b)
bf215546Sopenharmony_ci   (('fge', ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), 0.0), ('inot', ('ior', a, b))),
bf215546Sopenharmony_ci   (('fge', 0.0, ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('inot', ('ior', a, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('flt', a, ('fneg', a)), ('flt', a, 0.0)),
bf215546Sopenharmony_ci   (('fge', a, ('fneg', a)), ('fge', a, 0.0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Some optimizations (below) convert things like (a < b || c < b) into
bf215546Sopenharmony_ci   # (min(a, c) < b).  However, this interfers with the previous optimizations
bf215546Sopenharmony_ci   # that try to remove comparisons with negated sums of b2f.  This just
bf215546Sopenharmony_ci   # breaks that apart.
bf215546Sopenharmony_ci   (('flt', ('fmin', c, ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')))), 0.0),
bf215546Sopenharmony_ci    ('ior', ('flt', c, 0.0), ('ior', a, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~flt', ('fadd', a, b), a), ('flt', b, 0.0)),
bf215546Sopenharmony_ci   (('~fge', ('fadd', a, b), a), ('fge', b, 0.0)),
bf215546Sopenharmony_ci   (('~feq', ('fadd', a, b), a), ('feq', b, 0.0)),
bf215546Sopenharmony_ci   (('~fneu', ('fadd', a, b), a), ('fneu', b, 0.0)),
bf215546Sopenharmony_ci   (('~flt',                        ('fadd(is_used_once)', a, '#b'),  '#c'), ('flt', a, ('fadd', c, ('fneg', b)))),
bf215546Sopenharmony_ci   (('~flt', ('fneg(is_used_once)', ('fadd(is_used_once)', a, '#b')), '#c'), ('flt', ('fneg', ('fadd', c, b)), a)),
bf215546Sopenharmony_ci   (('~fge',                        ('fadd(is_used_once)', a, '#b'),  '#c'), ('fge', a, ('fadd', c, ('fneg', b)))),
bf215546Sopenharmony_ci   (('~fge', ('fneg(is_used_once)', ('fadd(is_used_once)', a, '#b')), '#c'), ('fge', ('fneg', ('fadd', c, b)), a)),
bf215546Sopenharmony_ci   (('~feq',                        ('fadd(is_used_once)', a, '#b'),  '#c'), ('feq', a, ('fadd', c, ('fneg', b)))),
bf215546Sopenharmony_ci   (('~feq', ('fneg(is_used_once)', ('fadd(is_used_once)', a, '#b')), '#c'), ('feq', ('fneg', ('fadd', c, b)), a)),
bf215546Sopenharmony_ci   (('~fneu',                        ('fadd(is_used_once)', a, '#b'),  '#c'), ('fneu', a, ('fadd', c, ('fneg', b)))),
bf215546Sopenharmony_ci   (('~fneu', ('fneg(is_used_once)', ('fadd(is_used_once)', a, '#b')), '#c'), ('fneu', ('fneg', ('fadd', c, b)), a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Cannot remove the addition from ilt or ige due to overflow.
bf215546Sopenharmony_ci   (('ieq', ('iadd', a, b), a), ('ieq', b, 0)),
bf215546Sopenharmony_ci   (('ine', ('iadd', a, b), a), ('ine', b, 0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('feq', ('b2f', 'a@1'), 0.0), ('inot', a)),
bf215546Sopenharmony_ci   (('fneu', ('b2f', 'a@1'), 0.0), a),
bf215546Sopenharmony_ci   (('ieq', ('b2i', 'a@1'), 0),   ('inot', a)),
bf215546Sopenharmony_ci   (('ine', ('b2i', 'a@1'), 0),   a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fneu', ('u2f', a), 0.0), ('ine', a, 0)),
bf215546Sopenharmony_ci   (('feq', ('u2f', a), 0.0), ('ieq', a, 0)),
bf215546Sopenharmony_ci   (('fge', ('u2f', a), 0.0), True),
bf215546Sopenharmony_ci   (('fge', 0.0, ('u2f', a)), ('uge', 0, a)),    # ieq instead?
bf215546Sopenharmony_ci   (('flt', ('u2f', a), 0.0), False),
bf215546Sopenharmony_ci   (('flt', 0.0, ('u2f', a)), ('ult', 0, a)),    # ine instead?
bf215546Sopenharmony_ci   (('fneu', ('i2f', a), 0.0), ('ine', a, 0)),
bf215546Sopenharmony_ci   (('feq', ('i2f', a), 0.0), ('ieq', a, 0)),
bf215546Sopenharmony_ci   (('fge', ('i2f', a), 0.0), ('ige', a, 0)),
bf215546Sopenharmony_ci   (('fge', 0.0, ('i2f', a)), ('ige', 0, a)),
bf215546Sopenharmony_ci   (('flt', ('i2f', a), 0.0), ('ilt', a, 0)),
bf215546Sopenharmony_ci   (('flt', 0.0, ('i2f', a)), ('ilt', 0, a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # 0.0 < fabs(a)
bf215546Sopenharmony_ci   # fabs(a) > 0.0
bf215546Sopenharmony_ci   # fabs(a) != 0.0 because fabs(a) must be >= 0
bf215546Sopenharmony_ci   # a != 0.0
bf215546Sopenharmony_ci   (('~flt', 0.0, ('fabs', a)), ('fneu', a, 0.0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # -fabs(a) < 0.0
bf215546Sopenharmony_ci   # fabs(a) > 0.0
bf215546Sopenharmony_ci   (('~flt', ('fneg', ('fabs', a)), 0.0), ('fneu', a, 0.0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # 0.0 >= fabs(a)
bf215546Sopenharmony_ci   # 0.0 == fabs(a)   because fabs(a) must be >= 0
bf215546Sopenharmony_ci   # 0.0 == a
bf215546Sopenharmony_ci   (('fge', 0.0, ('fabs', a)), ('feq', a, 0.0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # -fabs(a) >= 0.0
bf215546Sopenharmony_ci   # 0.0 >= fabs(a)
bf215546Sopenharmony_ci   (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # (a >= 0.0) && (a <= 1.0) -> fsat(a) == a
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # This should be NaN safe.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # NaN >= 0 && 1 >= NaN -> false && false -> false
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # vs.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # NaN == fsat(NaN) -> NaN == 0 -> false
bf215546Sopenharmony_ci   (('iand', ('fge', a, 0.0), ('fge', 1.0, a)), ('feq', a, ('fsat', a)), '!options->lower_fsat'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Note: fmin(-a, -b) == -fmax(a, b)
bf215546Sopenharmony_ci   (('fmax',                        ('b2f(is_used_once)', 'a@1'),           ('b2f', 'b@1')),           ('b2f', ('ior', a, b))),
bf215546Sopenharmony_ci   (('fmax', ('fneg(is_used_once)', ('b2f(is_used_once)', 'a@1')), ('fneg', ('b2f', 'b@1'))), ('fneg', ('b2f', ('iand', a, b)))),
bf215546Sopenharmony_ci   (('fmin',                        ('b2f(is_used_once)', 'a@1'),           ('b2f', 'b@1')),           ('b2f', ('iand', a, b))),
bf215546Sopenharmony_ci   (('fmin', ('fneg(is_used_once)', ('b2f(is_used_once)', 'a@1')), ('fneg', ('b2f', 'b@1'))), ('fneg', ('b2f', ('ior', a, b)))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # fmin(b2f(a), b)
bf215546Sopenharmony_ci   # bcsel(a, fmin(b2f(a), b), fmin(b2f(a), b))
bf215546Sopenharmony_ci   # bcsel(a, fmin(b2f(True), b), fmin(b2f(False), b))
bf215546Sopenharmony_ci   # bcsel(a, fmin(1.0, b), fmin(0.0, b))
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # Since b is a constant, constant folding will eliminate the fmin and the
bf215546Sopenharmony_ci   # fmax.  If b is > 1.0, the bcsel will be replaced with a b2f.
bf215546Sopenharmony_ci   (('fmin', ('b2f', 'a@1'), '#b'), ('bcsel', a, ('fmin', b, 1.0), ('fmin', b, 0.0))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('flt', ('fadd(is_used_once)', a, ('fneg', b)), 0.0), ('flt', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
bf215546Sopenharmony_ci   (('~bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
bf215546Sopenharmony_ci   (('~bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
bf215546Sopenharmony_ci   (('~bcsel', ('fge', a, b), b, a), ('fmin', a, b)),
bf215546Sopenharmony_ci   (('~bcsel', ('fge', b, a), b, a), ('fmax', a, b)),
bf215546Sopenharmony_ci   (('bcsel', ('i2b', a), b, c), ('bcsel', ('ine', a, 0), b, c)),
bf215546Sopenharmony_ci   (('bcsel', ('inot', a), b, c), ('bcsel', a, c, b)),
bf215546Sopenharmony_ci   (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
bf215546Sopenharmony_ci   (('bcsel', a, b, ('bcsel', a, c, d)), ('bcsel', a, b, d)),
bf215546Sopenharmony_ci   (('bcsel', a, ('bcsel', b, c, d), ('bcsel(is_used_once)', b, c, 'e')), ('bcsel', b, c, ('bcsel', a, d, 'e'))),
bf215546Sopenharmony_ci   (('bcsel', a, ('bcsel(is_used_once)', b, c, d), ('bcsel', b, c, 'e')), ('bcsel', b, c, ('bcsel', a, d, 'e'))),
bf215546Sopenharmony_ci   (('bcsel', a, ('bcsel', b, c, d), ('bcsel(is_used_once)', b, 'e', d)), ('bcsel', b, ('bcsel', a, c, 'e'), d)),
bf215546Sopenharmony_ci   (('bcsel', a, ('bcsel(is_used_once)', b, c, d), ('bcsel', b, 'e', d)), ('bcsel', b, ('bcsel', a, c, 'e'), d)),
bf215546Sopenharmony_ci   (('bcsel', a, True, b), ('ior', a, b)),
bf215546Sopenharmony_ci   (('bcsel', a, a, b), ('ior', a, b)),
bf215546Sopenharmony_ci   (('bcsel', a, b, False), ('iand', a, b)),
bf215546Sopenharmony_ci   (('bcsel', a, b, a), ('iand', a, b)),
bf215546Sopenharmony_ci   (('~fmin', a, a), a),
bf215546Sopenharmony_ci   (('~fmax', a, a), a),
bf215546Sopenharmony_ci   (('imin', a, a), a),
bf215546Sopenharmony_ci   (('imax', a, a), a),
bf215546Sopenharmony_ci   (('umin', a, a), a),
bf215546Sopenharmony_ci   (('umin', a, 0), 0),
bf215546Sopenharmony_ci   (('umin', a, -1), a),
bf215546Sopenharmony_ci   (('umax', a, a), a),
bf215546Sopenharmony_ci   (('umax', a, 0), a),
bf215546Sopenharmony_ci   (('umax', a, -1), -1),
bf215546Sopenharmony_ci   (('fmax', ('fmax', a, b), b), ('fmax', a, b)),
bf215546Sopenharmony_ci   (('umax', ('umax', a, b), b), ('umax', a, b)),
bf215546Sopenharmony_ci   (('imax', ('imax', a, b), b), ('imax', a, b)),
bf215546Sopenharmony_ci   (('fmin', ('fmin', a, b), b), ('fmin', a, b)),
bf215546Sopenharmony_ci   (('umin', ('umin', a, b), b), ('umin', a, b)),
bf215546Sopenharmony_ci   (('imin', ('imin', a, b), b), ('imin', a, b)),
bf215546Sopenharmony_ci   (('fmax', ('fmax', ('fmax', a, b), c), a), ('fmax', ('fmax', a, b), c)),
bf215546Sopenharmony_ci   (('umax', ('umax', ('umax', a, b), c), a), ('umax', ('umax', a, b), c)),
bf215546Sopenharmony_ci   (('imax', ('imax', ('imax', a, b), c), a), ('imax', ('imax', a, b), c)),
bf215546Sopenharmony_ci   (('fmin', ('fmin', ('fmin', a, b), c), a), ('fmin', ('fmin', a, b), c)),
bf215546Sopenharmony_ci   (('umin', ('umin', ('umin', a, b), c), a), ('umin', ('umin', a, b), c)),
bf215546Sopenharmony_ci   (('imin', ('imin', ('imin', a, b), c), a), ('imin', ('imin', a, b), c)),
bf215546Sopenharmony_ci])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor N in [8, 16, 32, 64]:
bf215546Sopenharmony_ci    b2iN = 'b2i{0}'.format(N)
bf215546Sopenharmony_ci    optimizations.extend([
bf215546Sopenharmony_ci        (('ieq', (b2iN, 'a@1'), (b2iN, 'b@1')), ('ieq', a, b)),
bf215546Sopenharmony_ci        (('ine', (b2iN, 'a@1'), (b2iN, 'b@1')), ('ine', a, b)),
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor N in [16, 32, 64]:
bf215546Sopenharmony_ci    b2fN = 'b2f{0}'.format(N)
bf215546Sopenharmony_ci    optimizations.extend([
bf215546Sopenharmony_ci        (('feq', (b2fN, 'a@1'), (b2fN, 'b@1')), ('ieq', a, b)),
bf215546Sopenharmony_ci        (('fneu', (b2fN, 'a@1'), (b2fN, 'b@1')), ('ine', a, b)),
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Integer sizes
bf215546Sopenharmony_cifor s in [8, 16, 32, 64]:
bf215546Sopenharmony_ci    optimizations.extend([
bf215546Sopenharmony_ci       (('iand@{}'.format(s), a, ('inot', ('ishr', a, s - 1))), ('imax', a, 0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       # Simplify logic to detect sign of an integer.
bf215546Sopenharmony_ci       (('ieq', ('iand', 'a@{}'.format(s), 1 << (s - 1)), 0),            ('ige', a, 0)),
bf215546Sopenharmony_ci       (('ine', ('iand', 'a@{}'.format(s), 1 << (s - 1)), 1 << (s - 1)), ('ige', a, 0)),
bf215546Sopenharmony_ci       (('ine', ('iand', 'a@{}'.format(s), 1 << (s - 1)), 0),            ('ilt', a, 0)),
bf215546Sopenharmony_ci       (('ieq', ('iand', 'a@{}'.format(s), 1 << (s - 1)), 1 << (s - 1)), ('ilt', a, 0)),
bf215546Sopenharmony_ci       (('ine', ('ushr', 'a@{}'.format(s), s - 1), 0), ('ilt', a, 0)),
bf215546Sopenharmony_ci       (('ieq', ('ushr', 'a@{}'.format(s), s - 1), 0), ('ige', a, 0)),
bf215546Sopenharmony_ci       (('ieq', ('ushr', 'a@{}'.format(s), s - 1), 1), ('ilt', a, 0)),
bf215546Sopenharmony_ci       (('ine', ('ushr', 'a@{}'.format(s), s - 1), 1), ('ige', a, 0)),
bf215546Sopenharmony_ci       (('ine', ('ishr', 'a@{}'.format(s), s - 1), 0), ('ilt', a, 0)),
bf215546Sopenharmony_ci       (('ieq', ('ishr', 'a@{}'.format(s), s - 1), 0), ('ige', a, 0)),
bf215546Sopenharmony_ci       (('ieq', ('ishr', 'a@{}'.format(s), s - 1), -1), ('ilt', a, 0)),
bf215546Sopenharmony_ci       (('ine', ('ishr', 'a@{}'.format(s), s - 1), -1), ('ige', a, 0)),
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations.extend([
bf215546Sopenharmony_ci   (('fmin', a, ('fneg', a)), ('fneg', ('fabs', a))),
bf215546Sopenharmony_ci   (('imin', a, ('ineg', a)), ('ineg', ('iabs', a))),
bf215546Sopenharmony_ci   (('fmin', a, ('fneg', ('fabs', a))), ('fneg', ('fabs', a))),
bf215546Sopenharmony_ci   (('imin', a, ('ineg', ('iabs', a))), ('ineg', ('iabs', a))),
bf215546Sopenharmony_ci   (('~fmin', a, ('fabs', a)), a),
bf215546Sopenharmony_ci   (('imin', a, ('iabs', a)), a),
bf215546Sopenharmony_ci   (('~fmax', a, ('fneg', ('fabs', a))), a),
bf215546Sopenharmony_ci   (('imax', a, ('ineg', ('iabs', a))), a),
bf215546Sopenharmony_ci   (('fmax', a, ('fabs', a)), ('fabs', a)),
bf215546Sopenharmony_ci   (('imax', a, ('iabs', a)), ('iabs', a)),
bf215546Sopenharmony_ci   (('fmax', a, ('fneg', a)), ('fabs', a)),
bf215546Sopenharmony_ci   (('imax', a, ('ineg', a)), ('iabs', a), '!options->lower_iabs'),
bf215546Sopenharmony_ci   (('~fmax', ('fabs', a), 0.0), ('fabs', a)),
bf215546Sopenharmony_ci   (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
bf215546Sopenharmony_ci   # fmax(fmin(a, 1.0), 0.0) is inexact because it returns 1.0 on NaN, while
bf215546Sopenharmony_ci   # fsat(a) returns 0.0.
bf215546Sopenharmony_ci   (('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
bf215546Sopenharmony_ci   # fmin(fmax(a, -1.0), 0.0) is inexact because it returns -1.0 on NaN, while
bf215546Sopenharmony_ci   # fneg(fsat(fneg(a))) returns -0.0 on NaN.
bf215546Sopenharmony_ci   (('~fmin', ('fmax', a, -1.0),  0.0), ('fneg', ('fsat', ('fneg', a))), '!options->lower_fsat'),
bf215546Sopenharmony_ci   # fmax(fmin(a, 0.0), -1.0) is inexact because it returns 0.0 on NaN, while
bf215546Sopenharmony_ci   # fneg(fsat(fneg(a))) returns -0.0 on NaN. This only matters if
bf215546Sopenharmony_ci   # SignedZeroInfNanPreserve is set, but we don't currently have any way of
bf215546Sopenharmony_ci   # representing this in the optimizations other than the usual ~.
bf215546Sopenharmony_ci   (('~fmax', ('fmin', a,  0.0), -1.0), ('fneg', ('fsat', ('fneg', a))), '!options->lower_fsat'),
bf215546Sopenharmony_ci   # fsat(fsign(NaN)) = fsat(0) = 0, and b2f(0 < NaN) = b2f(False) = 0. Mark
bf215546Sopenharmony_ci   # the new comparison precise to prevent it being changed to 'a != 0'.
bf215546Sopenharmony_ci   (('fsat', ('fsign', a)), ('b2f', ('!flt', 0.0, a))),
bf215546Sopenharmony_ci   (('fsat', ('b2f', a)), ('b2f', a)),
bf215546Sopenharmony_ci   (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
bf215546Sopenharmony_ci   (('fsat', ('fsat', a)), ('fsat', a)),
bf215546Sopenharmony_ci   (('fsat', ('fneg(is_used_once)', ('fadd(is_used_once)', a, b))), ('fsat', ('fadd', ('fneg', a), ('fneg', b))), '!options->lower_fsat'),
bf215546Sopenharmony_ci   (('fsat', ('fneg(is_used_once)', ('fmul(is_used_once)', a, b))), ('fsat', ('fmul', ('fneg', a), b)), '!options->lower_fsat'),
bf215546Sopenharmony_ci   (('fsat', ('fneg(is_used_once)', ('fmulz(is_used_once)', a, b))), ('fsat', ('fmulz', ('fneg', a), b)), '!options->lower_fsat && !'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('fsat', ('fabs(is_used_once)', ('fmul(is_used_once)', a, b))), ('fsat', ('fmul', ('fabs', a), ('fabs', b))), '!options->lower_fsat'),
bf215546Sopenharmony_ci   (('fmin', ('fmax', ('fmin', ('fmax', a, b), c), b), c), ('fmin', ('fmax', a, b), c)),
bf215546Sopenharmony_ci   (('imin', ('imax', ('imin', ('imax', a, b), c), b), c), ('imin', ('imax', a, b), c)),
bf215546Sopenharmony_ci   (('umin', ('umax', ('umin', ('umax', a, b), c), b), c), ('umin', ('umax', a, b), c)),
bf215546Sopenharmony_ci   # Both the left and right patterns are "b" when isnan(a), so this is exact.
bf215546Sopenharmony_ci   (('fmax', ('fsat', a), '#b(is_zero_to_one)'), ('fsat', ('fmax', a, b))),
bf215546Sopenharmony_ci   # The left pattern is 0.0 when isnan(a) (because fmin(fsat(NaN), b) ->
bf215546Sopenharmony_ci   # fmin(0.0, b)) while the right one is "b", so this optimization is inexact.
bf215546Sopenharmony_ci   (('~fmin', ('fsat', a), '#b(is_zero_to_one)'), ('fsat', ('fmin', a, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # max(-min(b, a), b) -> max(abs(b), -a)
bf215546Sopenharmony_ci   # min(-max(b, a), b) -> min(-abs(b), -a)
bf215546Sopenharmony_ci   (('fmax', ('fneg', ('fmin', b, a)), b), ('fmax', ('fabs', b), ('fneg', a))),
bf215546Sopenharmony_ci   (('fmin', ('fneg', ('fmax', b, a)), b), ('fmin', ('fneg', ('fabs', b)), ('fneg', a))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # If a in [0,b] then b-a is also in [0,b].  Since b in [0,1], max(b-a, 0) =
bf215546Sopenharmony_ci   # fsat(b-a).
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # If a > b, then b-a < 0 and max(b-a, 0) = fsat(b-a) = 0
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # This should be NaN safe since max(NaN, 0) = fsat(NaN) = 0.
bf215546Sopenharmony_ci   (('fmax', ('fadd(is_used_once)', ('fneg', 'a(is_not_negative)'), '#b(is_zero_to_one)'), 0.0),
bf215546Sopenharmony_ci    ('fsat', ('fadd', ('fneg',  a), b)), '!options->lower_fsat'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('extract_u8', ('imin', ('imax', a, 0), 0xff), 0), ('imin', ('imax', a, 0), 0xff)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # The ior versions are exact because fmin and fmax will always pick a
bf215546Sopenharmony_ci   # non-NaN value, if one exists.  Therefore (a < NaN) || (a < c) == a <
bf215546Sopenharmony_ci   # fmax(NaN, c) == a < c.  Mark the fmin or fmax in the replacement as exact
bf215546Sopenharmony_ci   # to prevent other optimizations from ruining the "NaN clensing" property
bf215546Sopenharmony_ci   # of the fmin or fmax.
bf215546Sopenharmony_ci   (('ior', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('!fmax', b, c))),
bf215546Sopenharmony_ci   (('ior', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('!fmin', a, b), c)),
bf215546Sopenharmony_ci   (('ior', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('!fmin', b, c))),
bf215546Sopenharmony_ci   (('ior', ('fge(is_used_once)', a, c), ('fge', b, c)), ('fge', ('!fmax', a, b), c)),
bf215546Sopenharmony_ci   (('ior', ('flt', a, '#b'), ('flt', a, '#c')), ('flt', a, ('!fmax', b, c))),
bf215546Sopenharmony_ci   (('ior', ('flt', '#a', c), ('flt', '#b', c)), ('flt', ('!fmin', a, b), c)),
bf215546Sopenharmony_ci   (('ior', ('fge', a, '#b'), ('fge', a, '#c')), ('fge', a, ('!fmin', b, c))),
bf215546Sopenharmony_ci   (('ior', ('fge', '#a', c), ('fge', '#b', c)), ('fge', ('!fmax', a, b), c)),
bf215546Sopenharmony_ci   (('~iand', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('fmin', b, c))),
bf215546Sopenharmony_ci   (('~iand', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('fmax', a, b), c)),
bf215546Sopenharmony_ci   (('~iand', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('fmax', b, c))),
bf215546Sopenharmony_ci   (('~iand', ('fge(is_used_once)', a, c), ('fge', b, c)), ('fge', ('fmin', a, b), c)),
bf215546Sopenharmony_ci   (('iand', ('flt', a, '#b(is_a_number)'), ('flt', a, '#c(is_a_number)')), ('flt', a, ('fmin', b, c))),
bf215546Sopenharmony_ci   (('iand', ('flt', '#a(is_a_number)', c), ('flt', '#b(is_a_number)', c)), ('flt', ('fmax', a, b), c)),
bf215546Sopenharmony_ci   (('iand', ('fge', a, '#b(is_a_number)'), ('fge', a, '#c(is_a_number)')), ('fge', a, ('fmax', b, c))),
bf215546Sopenharmony_ci   (('iand', ('fge', '#a(is_a_number)', c), ('fge', '#b(is_a_number)', c)), ('fge', ('fmin', a, b), c)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ior', ('ilt(is_used_once)', a, b), ('ilt', a, c)), ('ilt', a, ('imax', b, c))),
bf215546Sopenharmony_ci   (('ior', ('ilt(is_used_once)', a, c), ('ilt', b, c)), ('ilt', ('imin', a, b), c)),
bf215546Sopenharmony_ci   (('ior', ('ige(is_used_once)', a, b), ('ige', a, c)), ('ige', a, ('imin', b, c))),
bf215546Sopenharmony_ci   (('ior', ('ige(is_used_once)', a, c), ('ige', b, c)), ('ige', ('imax', a, b), c)),
bf215546Sopenharmony_ci   (('ior', ('ult(is_used_once)', a, b), ('ult', a, c)), ('ult', a, ('umax', b, c))),
bf215546Sopenharmony_ci   (('ior', ('ult(is_used_once)', a, c), ('ult', b, c)), ('ult', ('umin', a, b), c)),
bf215546Sopenharmony_ci   (('ior', ('uge(is_used_once)', a, b), ('uge', a, c)), ('uge', a, ('umin', b, c))),
bf215546Sopenharmony_ci   (('ior', ('uge(is_used_once)', a, c), ('uge', b, c)), ('uge', ('umax', a, b), c)),
bf215546Sopenharmony_ci   (('iand', ('ilt(is_used_once)', a, b), ('ilt', a, c)), ('ilt', a, ('imin', b, c))),
bf215546Sopenharmony_ci   (('iand', ('ilt(is_used_once)', a, c), ('ilt', b, c)), ('ilt', ('imax', a, b), c)),
bf215546Sopenharmony_ci   (('iand', ('ige(is_used_once)', a, b), ('ige', a, c)), ('ige', a, ('imax', b, c))),
bf215546Sopenharmony_ci   (('iand', ('ige(is_used_once)', a, c), ('ige', b, c)), ('ige', ('imin', a, b), c)),
bf215546Sopenharmony_ci   (('iand', ('ult(is_used_once)', a, b), ('ult', a, c)), ('ult', a, ('umin', b, c))),
bf215546Sopenharmony_ci   (('iand', ('ult(is_used_once)', a, c), ('ult', b, c)), ('ult', ('umax', a, b), c)),
bf215546Sopenharmony_ci   (('iand', ('uge(is_used_once)', a, b), ('uge', a, c)), ('uge', a, ('umax', b, c))),
bf215546Sopenharmony_ci   (('iand', ('uge(is_used_once)', a, c), ('uge', b, c)), ('uge', ('umin', a, b), c)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # A number of shaders contain a pattern like a.x < 0.0 || a.x > 1.0 || a.y
bf215546Sopenharmony_ci   # < 0.0, || a.y > 1.0 || ...  These patterns rearrange and replace in a
bf215546Sopenharmony_ci   # single step.  Doing just the replacement can lead to an infinite loop as
bf215546Sopenharmony_ci   # the pattern is repeatedly applied to the result of the previous
bf215546Sopenharmony_ci   # application of the pattern.
bf215546Sopenharmony_ci   (('ior', ('ior(is_used_once)', ('flt(is_used_once)', a, c), d), ('flt', b, c)), ('ior', ('flt', ('!fmin', a, b), c), d)),
bf215546Sopenharmony_ci   (('ior', ('ior(is_used_once)', ('flt', a, c), d), ('flt(is_used_once)', b, c)), ('ior', ('flt', ('!fmin', a, b), c), d)),
bf215546Sopenharmony_ci   (('ior', ('ior(is_used_once)', ('flt(is_used_once)', a, b), d), ('flt', a, c)), ('ior', ('flt', a, ('!fmax', b, c)), d)),
bf215546Sopenharmony_ci   (('ior', ('ior(is_used_once)', ('flt', a, b), d), ('flt(is_used_once)', a, c)), ('ior', ('flt', a, ('!fmax', b, c)), d)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # This is how SpvOpFOrdNotEqual might be implemented.  If both values are
bf215546Sopenharmony_ci   # numbers, then it can be replaced with fneu.
bf215546Sopenharmony_ci   (('ior', ('flt', 'a(is_a_number)', 'b(is_a_number)'), ('flt', b, a)), ('fneu', a, b)),
bf215546Sopenharmony_ci])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Float sizes
bf215546Sopenharmony_cifor s in [16, 32, 64]:
bf215546Sopenharmony_ci    optimizations.extend([
bf215546Sopenharmony_ci       # These derive from the previous patterns with the application of b < 0 <=>
bf215546Sopenharmony_ci       # 0 < -b.  The transformation should be applied if either comparison is
bf215546Sopenharmony_ci       # used once as this ensures that the number of comparisons will not
bf215546Sopenharmony_ci       # increase.  The sources to the ior and iand are not symmetric, so the
bf215546Sopenharmony_ci       # rules have to be duplicated to get this behavior.
bf215546Sopenharmony_ci       (('ior', ('flt(is_used_once)', 0.0, 'a@{}'.format(s)), ('flt', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))),
bf215546Sopenharmony_ci       (('ior', ('flt', 0.0, 'a@{}'.format(s)), ('flt(is_used_once)', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))),
bf215546Sopenharmony_ci       (('ior', ('fge(is_used_once)', 0.0, 'a@{}'.format(s)), ('fge', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))),
bf215546Sopenharmony_ci       (('ior', ('fge', 0.0, 'a@{}'.format(s)), ('fge(is_used_once)', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))),
bf215546Sopenharmony_ci       (('~iand', ('flt(is_used_once)', 0.0, 'a@{}'.format(s)), ('flt', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmin', a, ('fneg', b)))),
bf215546Sopenharmony_ci       (('~iand', ('flt', 0.0, 'a@{}'.format(s)), ('flt(is_used_once)', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmin', a, ('fneg', b)))),
bf215546Sopenharmony_ci       (('~iand', ('fge(is_used_once)', 0.0, 'a@{}'.format(s)), ('fge', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))),
bf215546Sopenharmony_ci       (('~iand', ('fge', 0.0, 'a@{}'.format(s)), ('fge(is_used_once)', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       # The (i2f32, ...) part is an open-coded fsign.  When that is combined
bf215546Sopenharmony_ci       # with the bcsel, it's basically copysign(1.0, a).  There are some
bf215546Sopenharmony_ci       # behavior differences between this pattern and copysign w.r.t. ±0 and
bf215546Sopenharmony_ci       # NaN.  copysign(x, y) blindly takes the sign bit from y and applies it
bf215546Sopenharmony_ci       # to x, regardless of whether either or both values are NaN.
bf215546Sopenharmony_ci       #
bf215546Sopenharmony_ci       # If a != a: bcsel(False, 1.0, i2f(b2i(False) - b2i(False))) = 0,
bf215546Sopenharmony_ci       #            int(NaN >= 0.0) - int(NaN < 0.0) = 0 - 0 = 0
bf215546Sopenharmony_ci       # If a == ±0: bcsel(True, 1.0, ...) = 1.0,
bf215546Sopenharmony_ci       #            int(±0.0 >= 0.0) - int(±0.0 < 0.0) = 1 - 0 = 1
bf215546Sopenharmony_ci       #
bf215546Sopenharmony_ci       # For all other values of 'a', the original and replacement behave as
bf215546Sopenharmony_ci       # copysign.
bf215546Sopenharmony_ci       #
bf215546Sopenharmony_ci       # Marking the replacement comparisons as precise prevents any future
bf215546Sopenharmony_ci       # optimizations from replacing either of the comparisons with the
bf215546Sopenharmony_ci       # logical-not of the other.
bf215546Sopenharmony_ci       #
bf215546Sopenharmony_ci       # Note: Use b2i32 in the replacement because some platforms that
bf215546Sopenharmony_ci       # support fp16 don't support int16.
bf215546Sopenharmony_ci       (('bcsel@{}'.format(s), ('feq', a, 0.0), 1.0, ('i2f{}'.format(s), ('iadd', ('b2i{}'.format(s), ('flt', 0.0, 'a@{}'.format(s))), ('ineg', ('b2i{}'.format(s), ('flt', 'a@{}'.format(s), 0.0)))))),
bf215546Sopenharmony_ci        ('i2f{}'.format(s), ('iadd', ('b2i32', ('!fge', a, 0.0)), ('ineg', ('b2i32', ('!flt', a, 0.0)))))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       (('bcsel', a, ('b2f(is_used_once)', 'b@{}'.format(s)), ('b2f', 'c@{}'.format(s))), ('b2f', ('bcsel', a, b, c))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       # The C spec says, "If the value of the integral part cannot be represented
bf215546Sopenharmony_ci       # by the integer type, the behavior is undefined."  "Undefined" can mean
bf215546Sopenharmony_ci       # "the conversion doesn't happen at all."
bf215546Sopenharmony_ci       (('~i2f{}'.format(s), ('f2i', 'a@{}'.format(s))), ('ftrunc', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       # Ironically, mark these as imprecise because removing the conversions may
bf215546Sopenharmony_ci       # preserve more precision than doing the conversions (e.g.,
bf215546Sopenharmony_ci       # uint(float(0x81818181u)) == 0x81818200).
bf215546Sopenharmony_ci       (('~f2i{}'.format(s), ('i2f', 'a@{}'.format(s))), a),
bf215546Sopenharmony_ci       (('~f2i{}'.format(s), ('u2f', 'a@{}'.format(s))), a),
bf215546Sopenharmony_ci       (('~f2u{}'.format(s), ('i2f', 'a@{}'.format(s))), a),
bf215546Sopenharmony_ci       (('~f2u{}'.format(s), ('u2f', 'a@{}'.format(s))), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       (('fadd', ('b2f{}'.format(s), ('flt', 0.0, 'a@{}'.format(s))), ('fneg', ('b2f{}'.format(s), ('flt', 'a@{}'.format(s), 0.0)))), ('fsign', a), '!options->lower_fsign'),
bf215546Sopenharmony_ci       (('iadd', ('b2i{}'.format(s), ('flt', 0, 'a@{}'.format(s))), ('ineg', ('b2i{}'.format(s), ('flt', 'a@{}'.format(s), 0)))), ('f2i{}'.format(s), ('fsign', a)), '!options->lower_fsign'),
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    # float? -> float? -> floatS ==> float? -> floatS
bf215546Sopenharmony_ci    (('~f2f{}'.format(s), ('f2f', a)), ('f2f{}'.format(s), a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    # int? -> float? -> floatS ==> int? -> floatS
bf215546Sopenharmony_ci    (('~f2f{}'.format(s), ('u2f', a)), ('u2f{}'.format(s), a)),
bf215546Sopenharmony_ci    (('~f2f{}'.format(s), ('i2f', a)), ('i2f{}'.format(s), a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    # float? -> float? -> intS ==> float? -> intS
bf215546Sopenharmony_ci    (('~f2u{}'.format(s), ('f2f', a)), ('f2u{}'.format(s), a)),
bf215546Sopenharmony_ci    (('~f2i{}'.format(s), ('f2f', a)), ('f2i{}'.format(s), a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    for B in [32, 64]:
bf215546Sopenharmony_ci        if s < B:
bf215546Sopenharmony_ci            optimizations.extend([
bf215546Sopenharmony_ci               # S = smaller, B = bigger
bf215546Sopenharmony_ci               # typeS -> typeB -> typeS ==> identity
bf215546Sopenharmony_ci               (('f2f{}'.format(s), ('f2f{}'.format(B), 'a@{}'.format(s))), a),
bf215546Sopenharmony_ci               (('i2i{}'.format(s), ('i2i{}'.format(B), 'a@{}'.format(s))), a),
bf215546Sopenharmony_ci               (('u2u{}'.format(s), ('u2u{}'.format(B), 'a@{}'.format(s))), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci               # bool1 -> typeB -> typeS ==> bool1 -> typeS
bf215546Sopenharmony_ci               (('f2f{}'.format(s), ('b2f{}'.format(B), 'a@1')), ('b2f{}'.format(s), a)),
bf215546Sopenharmony_ci               (('i2i{}'.format(s), ('b2i{}'.format(B), 'a@1')), ('b2i{}'.format(s), a)),
bf215546Sopenharmony_ci               (('u2u{}'.format(s), ('b2i{}'.format(B), 'a@1')), ('b2i{}'.format(s), a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci               # floatS -> floatB -> intB ==> floatS -> intB
bf215546Sopenharmony_ci               (('f2u{}'.format(B), ('f2f{}'.format(B), 'a@{}'.format(s))), ('f2u{}'.format(B), a)),
bf215546Sopenharmony_ci               (('f2i{}'.format(B), ('f2f{}'.format(B), 'a@{}'.format(s))), ('f2i{}'.format(B), a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci               # int? -> floatB -> floatS ==> int? -> floatS
bf215546Sopenharmony_ci               (('f2f{}'.format(s), ('u2f{}'.format(B), a)), ('u2f{}'.format(s), a)),
bf215546Sopenharmony_ci               (('f2f{}'.format(s), ('i2f{}'.format(B), a)), ('i2f{}'.format(s), a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci               # intS -> intB -> floatB ==> intS -> floatB
bf215546Sopenharmony_ci               (('u2f{}'.format(B), ('u2u{}'.format(B), 'a@{}'.format(s))), ('u2f{}'.format(B), a)),
bf215546Sopenharmony_ci               (('i2f{}'.format(B), ('i2i{}'.format(B), 'a@{}'.format(s))), ('i2f{}'.format(B), a)),
bf215546Sopenharmony_ci            ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# mediump variants of the above
bf215546Sopenharmony_cioptimizations.extend([
bf215546Sopenharmony_ci    # int32 -> float32 -> float16 ==> int32 -> float16
bf215546Sopenharmony_ci    (('f2fmp', ('u2f32', 'a@32')), ('u2fmp', a)),
bf215546Sopenharmony_ci    (('f2fmp', ('i2f32', 'a@32')), ('i2fmp', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    # float32 -> float16 -> int16 ==> float32 -> int16
bf215546Sopenharmony_ci    (('f2u16', ('f2fmp', 'a@32')), ('f2u16', a)),
bf215546Sopenharmony_ci    (('f2i16', ('f2fmp', 'a@32')), ('f2i16', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    # float32 -> int32 -> int16 ==> float32 -> int16
bf215546Sopenharmony_ci    (('i2imp', ('f2u32', 'a@32')), ('f2ump', a)),
bf215546Sopenharmony_ci    (('i2imp', ('f2i32', 'a@32')), ('f2imp', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    # int32 -> int16 -> float16 ==> int32 -> float16
bf215546Sopenharmony_ci    (('u2f16', ('i2imp', 'a@32')), ('u2f16', a)),
bf215546Sopenharmony_ci    (('i2f16', ('i2imp', 'a@32')), ('i2f16', a)),
bf215546Sopenharmony_ci])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Clean up junk left from 8-bit integer to 16-bit integer lowering.
bf215546Sopenharmony_cioptimizations.extend([
bf215546Sopenharmony_ci    # The u2u16(u2u8(X)) just masks off the upper 8-bits of X.  This can be
bf215546Sopenharmony_ci    # accomplished by mask the upper 8-bit of the immediate operand to the
bf215546Sopenharmony_ci    # iand instruction.  Often times, both patterns will end up being applied
bf215546Sopenharmony_ci    # to the same original expression tree.
bf215546Sopenharmony_ci    (('iand', ('u2u16', ('u2u8', 'a@16')), '#b'),               ('iand', a, ('iand', b, 0xff))),
bf215546Sopenharmony_ci    (('u2u16', ('u2u8(is_used_once)', ('iand', 'a@16', '#b'))), ('iand', a, ('iand', b, 0xff))),
bf215546Sopenharmony_ci])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor op in ['iand', 'ior', 'ixor']:
bf215546Sopenharmony_ci    optimizations.extend([
bf215546Sopenharmony_ci        (('u2u8', (op, ('u2u16', ('u2u8', 'a@16')), ('u2u16', ('u2u8', 'b@16')))), ('u2u8', (op, a, b))),
bf215546Sopenharmony_ci        (('u2u8', (op, ('u2u16', ('u2u8', 'a@32')), ('u2u16', ('u2u8', 'b@32')))), ('u2u8', (op, a, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci        # Undistribute extract from a logic op
bf215546Sopenharmony_ci        ((op, ('extract_i8', a, '#b'), ('extract_i8', c, b)), ('extract_i8', (op, a, c), b)),
bf215546Sopenharmony_ci        ((op, ('extract_u8', a, '#b'), ('extract_u8', c, b)), ('extract_u8', (op, a, c), b)),
bf215546Sopenharmony_ci        ((op, ('extract_i16', a, '#b'), ('extract_i16', c, b)), ('extract_i16', (op, a, c), b)),
bf215546Sopenharmony_ci        ((op, ('extract_u16', a, '#b'), ('extract_u16', c, b)), ('extract_u16', (op, a, c), b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci        # Undistribute shifts from a logic op
bf215546Sopenharmony_ci        ((op, ('ushr(is_used_once)', a, '#b'), ('ushr', c, b)), ('ushr', (op, a, c), b)),
bf215546Sopenharmony_ci        ((op, ('ishr(is_used_once)', a, '#b'), ('ishr', c, b)), ('ishr', (op, a, c), b)),
bf215546Sopenharmony_ci        ((op, ('ishl(is_used_once)', a, '#b'), ('ishl', c, b)), ('ishl', (op, a, c), b)),
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Integer sizes
bf215546Sopenharmony_cifor s in [8, 16, 32, 64]:
bf215546Sopenharmony_ci    last_shift_bit = int(math.log2(s)) - 1
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    optimizations.extend([
bf215546Sopenharmony_ci       (('iand', ('ieq', 'a@{}'.format(s), 0), ('ieq', 'b@{}'.format(s), 0)), ('ieq', ('ior', a, b), 0), 'options->lower_umax'),
bf215546Sopenharmony_ci       (('ior',  ('ine', 'a@{}'.format(s), 0), ('ine', 'b@{}'.format(s), 0)), ('ine', ('ior', a, b), 0), 'options->lower_umin'),
bf215546Sopenharmony_ci       (('iand', ('ieq', 'a@{}'.format(s), 0), ('ieq', 'b@{}'.format(s), 0)), ('ieq', ('umax', a, b), 0), '!options->lower_umax'),
bf215546Sopenharmony_ci       (('ior',  ('ieq', 'a@{}'.format(s), 0), ('ieq', 'b@{}'.format(s), 0)), ('ieq', ('umin', a, b), 0), '!options->lower_umin'),
bf215546Sopenharmony_ci       (('iand', ('ine', 'a@{}'.format(s), 0), ('ine', 'b@{}'.format(s), 0)), ('ine', ('umin', a, b), 0), '!options->lower_umin'),
bf215546Sopenharmony_ci       (('ior',  ('ine', 'a@{}'.format(s), 0), ('ine', 'b@{}'.format(s), 0)), ('ine', ('umax', a, b), 0), '!options->lower_umax'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       # True/False are ~0 and 0 in NIR.  b2i of True is 1, and -1 is ~0 (True).
bf215546Sopenharmony_ci       (('ineg', ('b2i{}'.format(s), 'a@{}'.format(s))), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci       # SM5 32-bit shifts are defined to use the 5 least significant bits (or 4 bits for 16 bits)
bf215546Sopenharmony_ci       (('ishl', 'a@{}'.format(s), ('iand', s - 1, b)), ('ishl', a, b)),
bf215546Sopenharmony_ci       (('ishr', 'a@{}'.format(s), ('iand', s - 1, b)), ('ishr', a, b)),
bf215546Sopenharmony_ci       (('ushr', 'a@{}'.format(s), ('iand', s - 1, b)), ('ushr', a, b)),
bf215546Sopenharmony_ci       (('ushr', 'a@{}'.format(s), ('ishl(is_used_once)', ('iand', b, 1), last_shift_bit)), ('ushr', a, ('ishl', b, last_shift_bit))),
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations.extend([
bf215546Sopenharmony_ci   # Common pattern like 'if (i == 0 || i == 1 || ...)'
bf215546Sopenharmony_ci   (('ior', ('ieq', a, 0), ('ieq', a, 1)), ('uge', 1, a)),
bf215546Sopenharmony_ci   (('ior', ('uge', 1, a), ('ieq', a, 2)), ('uge', 2, a)),
bf215546Sopenharmony_ci   (('ior', ('uge', 2, a), ('ieq', a, 3)), ('uge', 3, a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ior', a, ('ieq', a, False)), True),
bf215546Sopenharmony_ci   (('ior', a, ('inot', a)), -1),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ine', ('ineg', ('b2i', 'a@1')), ('ineg', ('b2i', 'b@1'))), ('ine', a, b)),
bf215546Sopenharmony_ci   (('b2i', ('ine', 'a@1', 'b@1')), ('b2i', ('ixor', a, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # This pattern occurs coutresy of __flt64_nonnan in the soft-fp64 code.
bf215546Sopenharmony_ci   # The first part of the iand comes from the !__feq64_nonnan.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # The second pattern is a reformulation of the first based on the relation
bf215546Sopenharmony_ci   # (a == 0 || y == 0) <=> umin(a, y) == 0, where b in the first equation
bf215546Sopenharmony_ci   # happens to be y == 0.
bf215546Sopenharmony_ci   (('iand', ('inot', ('iand', ('ior', ('ieq', a, 0),  b), c)), ('ilt', a, 0)),
bf215546Sopenharmony_ci    ('iand', ('inot', ('iand',                         b , c)), ('ilt', a, 0))),
bf215546Sopenharmony_ci   (('iand', ('inot', ('iand', ('ieq', ('umin', a, b), 0), c)), ('ilt', a, 0)),
bf215546Sopenharmony_ci    ('iand', ('inot', ('iand', ('ieq',             b , 0), c)), ('ilt', a, 0))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # These patterns can result when (a < b || a < c) => (a < min(b, c))
bf215546Sopenharmony_ci   # transformations occur before constant propagation and loop-unrolling.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # The flt versions are exact.  If isnan(a), the original pattern is
bf215546Sopenharmony_ci   # trivially false, and the replacements are false too.  If isnan(b):
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   #    a < fmax(NaN, a) => a < a => false vs a < NaN => false
bf215546Sopenharmony_ci   (('flt', a, ('fmax', b, a)), ('flt', a, b)),
bf215546Sopenharmony_ci   (('flt', ('fmin', a, b), a), ('flt', b, a)),
bf215546Sopenharmony_ci   (('~fge', a, ('fmin', b, a)), True),
bf215546Sopenharmony_ci   (('~fge', ('fmax', a, b), a), True),
bf215546Sopenharmony_ci   (('flt', a, ('fmin', b, a)), False),
bf215546Sopenharmony_ci   (('flt', ('fmax', a, b), a), False),
bf215546Sopenharmony_ci   (('~fge', a, ('fmax', b, a)), ('fge', a, b)),
bf215546Sopenharmony_ci   (('~fge', ('fmin', a, b), a), ('fge', b, a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ilt', a, ('imax', b, a)), ('ilt', a, b)),
bf215546Sopenharmony_ci   (('ilt', ('imin', a, b), a), ('ilt', b, a)),
bf215546Sopenharmony_ci   (('ige', a, ('imin', b, a)), True),
bf215546Sopenharmony_ci   (('ige', ('imax', a, b), a), True),
bf215546Sopenharmony_ci   (('ult', a, ('umax', b, a)), ('ult', a, b)),
bf215546Sopenharmony_ci   (('ult', ('umin', a, b), a), ('ult', b, a)),
bf215546Sopenharmony_ci   (('uge', a, ('umin', b, a)), True),
bf215546Sopenharmony_ci   (('uge', ('umax', a, b), a), True),
bf215546Sopenharmony_ci   (('ilt', a, ('imin', b, a)), False),
bf215546Sopenharmony_ci   (('ilt', ('imax', a, b), a), False),
bf215546Sopenharmony_ci   (('ige', a, ('imax', b, a)), ('ige', a, b)),
bf215546Sopenharmony_ci   (('ige', ('imin', a, b), a), ('ige', b, a)),
bf215546Sopenharmony_ci   (('ult', a, ('umin', b, a)), False),
bf215546Sopenharmony_ci   (('ult', ('umax', a, b), a), False),
bf215546Sopenharmony_ci   (('uge', a, ('umax', b, a)), ('uge', a, b)),
bf215546Sopenharmony_ci   (('uge', ('umin', a, b), a), ('uge', b, a)),
bf215546Sopenharmony_ci   (('ult', a, ('iand', b, a)), False),
bf215546Sopenharmony_ci   (('ult', ('ior', a, b), a), False),
bf215546Sopenharmony_ci   (('uge', a, ('iand', b, a)), True),
bf215546Sopenharmony_ci   (('uge', ('ior', a, b), a), True),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ilt', '#a', ('imax', '#b', c)), ('ior', ('ilt', a, b), ('ilt', a, c))),
bf215546Sopenharmony_ci   (('ilt', ('imin', '#a', b), '#c'), ('ior', ('ilt', a, c), ('ilt', b, c))),
bf215546Sopenharmony_ci   (('ige', '#a', ('imin', '#b', c)), ('ior', ('ige', a, b), ('ige', a, c))),
bf215546Sopenharmony_ci   (('ige', ('imax', '#a', b), '#c'), ('ior', ('ige', a, c), ('ige', b, c))),
bf215546Sopenharmony_ci   (('ult', '#a', ('umax', '#b', c)), ('ior', ('ult', a, b), ('ult', a, c))),
bf215546Sopenharmony_ci   (('ult', ('umin', '#a', b), '#c'), ('ior', ('ult', a, c), ('ult', b, c))),
bf215546Sopenharmony_ci   (('uge', '#a', ('umin', '#b', c)), ('ior', ('uge', a, b), ('uge', a, c))),
bf215546Sopenharmony_ci   (('uge', ('umax', '#a', b), '#c'), ('ior', ('uge', a, c), ('uge', b, c))),
bf215546Sopenharmony_ci   (('ilt', '#a', ('imin', '#b', c)), ('iand', ('ilt', a, b), ('ilt', a, c))),
bf215546Sopenharmony_ci   (('ilt', ('imax', '#a', b), '#c'), ('iand', ('ilt', a, c), ('ilt', b, c))),
bf215546Sopenharmony_ci   (('ige', '#a', ('imax', '#b', c)), ('iand', ('ige', a, b), ('ige', a, c))),
bf215546Sopenharmony_ci   (('ige', ('imin', '#a', b), '#c'), ('iand', ('ige', a, c), ('ige', b, c))),
bf215546Sopenharmony_ci   (('ult', '#a', ('umin', '#b', c)), ('iand', ('ult', a, b), ('ult', a, c))),
bf215546Sopenharmony_ci   (('ult', ('umax', '#a', b), '#c'), ('iand', ('ult', a, c), ('ult', b, c))),
bf215546Sopenharmony_ci   (('uge', '#a', ('umax', '#b', c)), ('iand', ('uge', a, b), ('uge', a, c))),
bf215546Sopenharmony_ci   (('uge', ('umin', '#a', b), '#c'), ('iand', ('uge', a, c), ('uge', b, c))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Thanks to sign extension, the ishr(a, b) is negative if and only if a is
bf215546Sopenharmony_ci   # negative.
bf215546Sopenharmony_ci   (('bcsel', ('ilt', a, 0), ('ineg', ('ishr', a, b)), ('ishr', a, b)),
bf215546Sopenharmony_ci    ('iabs', ('ishr', a, b))),
bf215546Sopenharmony_ci   (('iabs', ('ishr', ('iabs', a), b)), ('ishr', ('iabs', a), b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fabs', ('slt', a, b)), ('slt', a, b)),
bf215546Sopenharmony_ci   (('fabs', ('sge', a, b)), ('sge', a, b)),
bf215546Sopenharmony_ci   (('fabs', ('seq', a, b)), ('seq', a, b)),
bf215546Sopenharmony_ci   (('fabs', ('sne', a, b)), ('sne', a, b)),
bf215546Sopenharmony_ci   (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
bf215546Sopenharmony_ci   (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
bf215546Sopenharmony_ci   (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
bf215546Sopenharmony_ci   (('sne', a, b), ('b2f', ('fneu', a, b)), 'options->lower_scmp'),
bf215546Sopenharmony_ci   (('seq', ('seq', a, b), 1.0), ('seq', a, b)),
bf215546Sopenharmony_ci   (('seq', ('sne', a, b), 1.0), ('sne', a, b)),
bf215546Sopenharmony_ci   (('seq', ('slt', a, b), 1.0), ('slt', a, b)),
bf215546Sopenharmony_ci   (('seq', ('sge', a, b), 1.0), ('sge', a, b)),
bf215546Sopenharmony_ci   (('sne', ('seq', a, b), 0.0), ('seq', a, b)),
bf215546Sopenharmony_ci   (('sne', ('sne', a, b), 0.0), ('sne', a, b)),
bf215546Sopenharmony_ci   (('sne', ('slt', a, b), 0.0), ('slt', a, b)),
bf215546Sopenharmony_ci   (('sne', ('sge', a, b), 0.0), ('sge', a, b)),
bf215546Sopenharmony_ci   (('seq', ('seq', a, b), 0.0), ('sne', a, b)),
bf215546Sopenharmony_ci   (('seq', ('sne', a, b), 0.0), ('seq', a, b)),
bf215546Sopenharmony_ci   (('seq', ('slt', a, b), 0.0), ('sge', a, b)),
bf215546Sopenharmony_ci   (('seq', ('sge', a, b), 0.0), ('slt', a, b)),
bf215546Sopenharmony_ci   (('sne', ('seq', a, b), 1.0), ('sne', a, b)),
bf215546Sopenharmony_ci   (('sne', ('sne', a, b), 1.0), ('seq', a, b)),
bf215546Sopenharmony_ci   (('sne', ('slt', a, b), 1.0), ('sge', a, b)),
bf215546Sopenharmony_ci   (('sne', ('sge', a, b), 1.0), ('slt', a, b)),
bf215546Sopenharmony_ci   (('fall_equal2', a, b), ('fmin', ('seq', 'a.x', 'b.x'), ('seq', 'a.y', 'b.y')), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('fall_equal3', a, b), ('seq', ('fany_nequal3', a, b), 0.0), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('fall_equal4', a, b), ('seq', ('fany_nequal4', a, b), 0.0), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('fany_nequal2', a, b), ('fmax', ('sne', 'a.x', 'b.x'), ('sne', 'a.y', 'b.y')), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('fany_nequal3', a, b), ('fsat', ('fdot3', ('sne', a, b), ('sne', a, b))), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('fany_nequal4', a, b), ('fsat', ('fdot4', ('sne', a, b), ('sne', a, b))), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ball_iequal2', a, b), ('iand', ('ieq', 'a.x', 'b.x'), ('ieq', 'a.y', 'b.y')), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('ball_iequal3', a, b), ('iand', ('iand', ('ieq', 'a.x', 'b.x'), ('ieq', 'a.y', 'b.y')), ('ieq', 'a.z', 'b.z')), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('ball_iequal4', a, b), ('iand', ('iand', ('ieq', 'a.x', 'b.x'), ('ieq', 'a.y', 'b.y')), ('iand', ('ieq', 'a.z', 'b.z'), ('ieq', 'a.w', 'b.w'))), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('bany_inequal2', a, b), ('ior', ('ine', 'a.x', 'b.x'), ('ine', 'a.y', 'b.y')), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('bany_inequal3', a, b), ('ior', ('ior', ('ine', 'a.x', 'b.x'), ('ine', 'a.y', 'b.y')), ('ine', 'a.z', 'b.z')), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('bany_inequal4', a, b), ('ior', ('ior', ('ine', 'a.x', 'b.x'), ('ine', 'a.y', 'b.y')), ('ior', ('ine', 'a.z', 'b.z'), ('ine', 'a.w', 'b.w'))), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ball_fequal2', a, b), ('iand', ('feq', 'a.x', 'b.x'), ('feq', 'a.y', 'b.y')), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('ball_fequal3', a, b), ('iand', ('iand', ('feq', 'a.x', 'b.x'), ('feq', 'a.y', 'b.y')), ('feq', 'a.z', 'b.z')), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('ball_fequal4', a, b), ('iand', ('iand', ('feq', 'a.x', 'b.x'), ('feq', 'a.y', 'b.y')), ('iand', ('feq', 'a.z', 'b.z'), ('feq', 'a.w', 'b.w'))), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('bany_fnequal2', a, b), ('ior', ('fneu', 'a.x', 'b.x'), ('fneu', 'a.y', 'b.y')), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('bany_fnequal3', a, b), ('ior', ('ior', ('fneu', 'a.x', 'b.x'), ('fneu', 'a.y', 'b.y')), ('fneu', 'a.z', 'b.z')), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci   (('bany_fnequal4', a, b), ('ior', ('ior', ('fneu', 'a.x', 'b.x'), ('fneu', 'a.y', 'b.y')), ('ior', ('fneu', 'a.z', 'b.z'), ('fneu', 'a.w', 'b.w'))), 'options->lower_vector_cmp'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('feq', ('seq', a, b), 1.0), ('feq', a, b)),
bf215546Sopenharmony_ci   (('feq', ('sne', a, b), 1.0), ('fneu', a, b)),
bf215546Sopenharmony_ci   (('feq', ('slt', a, b), 1.0), ('flt', a, b)),
bf215546Sopenharmony_ci   (('feq', ('sge', a, b), 1.0), ('fge', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('seq', a, b), 0.0), ('feq', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('sne', a, b), 0.0), ('fneu', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('slt', a, b), 0.0), ('flt', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('sge', a, b), 0.0), ('fge', a, b)),
bf215546Sopenharmony_ci   (('feq', ('seq', a, b), 0.0), ('fneu', a, b)),
bf215546Sopenharmony_ci   (('feq', ('sne', a, b), 0.0), ('feq', a, b)),
bf215546Sopenharmony_ci   (('feq', ('slt', a, b), 0.0), ('fge', a, b)),
bf215546Sopenharmony_ci   (('feq', ('sge', a, b), 0.0), ('flt', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('seq', a, b), 1.0), ('fneu', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('sne', a, b), 1.0), ('feq', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('slt', a, b), 1.0), ('fge', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('sge', a, b), 1.0), ('flt', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fneu', ('fneg', a), a), ('fneu', a, 0.0)),
bf215546Sopenharmony_ci   (('feq', ('fneg', a), a), ('feq', a, 0.0)),
bf215546Sopenharmony_ci   # Emulating booleans
bf215546Sopenharmony_ci   (('imul', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('iand', a, b))),
bf215546Sopenharmony_ci   (('iand', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('iand', a, b))),
bf215546Sopenharmony_ci   (('ior', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('ior', a, b))),
bf215546Sopenharmony_ci   (('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), ('b2f', ('iand', a, b))),
bf215546Sopenharmony_ci   (('fsat', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('b2f', ('ior', a, b))),
bf215546Sopenharmony_ci   (('iand', 'a@bool16', 1.0), ('b2f', a)),
bf215546Sopenharmony_ci   (('iand', 'a@bool32', 1.0), ('b2f', a)),
bf215546Sopenharmony_ci   (('flt', ('fneg', ('b2f', 'a@1')), 0), a), # Generated by TGSI KILL_IF.
bf215546Sopenharmony_ci   # Comparison with the same args.  Note that these are only done for the
bf215546Sopenharmony_ci   # float versions when the source must be a number.  Generally, NaN cmp NaN
bf215546Sopenharmony_ci   # produces the opposite result of X cmp X.  flt is the outlier.  NaN < NaN
bf215546Sopenharmony_ci   # is false, and, for any number X, X < X is also false.
bf215546Sopenharmony_ci   (('ilt', a, a), False),
bf215546Sopenharmony_ci   (('ige', a, a), True),
bf215546Sopenharmony_ci   (('ieq', a, a), True),
bf215546Sopenharmony_ci   (('ine', a, a), False),
bf215546Sopenharmony_ci   (('ult', a, a), False),
bf215546Sopenharmony_ci   (('uge', a, a), True),
bf215546Sopenharmony_ci   (('flt', a, a), False),
bf215546Sopenharmony_ci   (('fge', 'a(is_a_number)', a), True),
bf215546Sopenharmony_ci   (('feq', 'a(is_a_number)', a), True),
bf215546Sopenharmony_ci   (('fneu', 'a(is_a_number)', a), False),
bf215546Sopenharmony_ci   # Logical and bit operations
bf215546Sopenharmony_ci   (('iand', a, a), a),
bf215546Sopenharmony_ci   (('iand', a, ~0), a),
bf215546Sopenharmony_ci   (('iand', a, 0), 0),
bf215546Sopenharmony_ci   (('ior', a, a), a),
bf215546Sopenharmony_ci   (('ior', a, 0), a),
bf215546Sopenharmony_ci   (('ior', a, True), True),
bf215546Sopenharmony_ci   (('ixor', a, a), 0),
bf215546Sopenharmony_ci   (('ixor', a, 0), a),
bf215546Sopenharmony_ci   (('ixor', a, ('ixor', a, b)), b),
bf215546Sopenharmony_ci   (('ixor', a, -1), ('inot', a)),
bf215546Sopenharmony_ci   (('inot', ('inot', a)), a),
bf215546Sopenharmony_ci   (('ior', ('iand', a, b), b), b),
bf215546Sopenharmony_ci   (('ior', ('ior', a, b), b), ('ior', a, b)),
bf215546Sopenharmony_ci   (('iand', ('ior', a, b), b), b),
bf215546Sopenharmony_ci   (('iand', ('iand', a, b), b), ('iand', a, b)),
bf215546Sopenharmony_ci   # DeMorgan's Laws
bf215546Sopenharmony_ci   (('iand', ('inot', a), ('inot', b)), ('inot', ('ior',  a, b))),
bf215546Sopenharmony_ci   (('ior',  ('inot', a), ('inot', b)), ('inot', ('iand', a, b))),
bf215546Sopenharmony_ci   # Shift optimizations
bf215546Sopenharmony_ci   (('ishl', 0, a), 0),
bf215546Sopenharmony_ci   (('ishl', a, 0), a),
bf215546Sopenharmony_ci   (('ishr', 0, a), 0),
bf215546Sopenharmony_ci   (('ishr', -1, a), -1),
bf215546Sopenharmony_ci   (('ishr', a, 0), a),
bf215546Sopenharmony_ci   (('ushr', 0, a), 0),
bf215546Sopenharmony_ci   (('ushr', a, 0), a),
bf215546Sopenharmony_ci   (('ior', ('ishl@16', a, b), ('ushr@16', a, ('iadd', 16, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'),
bf215546Sopenharmony_ci   (('ior', ('ishl@16', a, b), ('ushr@16', a, ('isub', 16, b))), ('urol', a, b), '!options->lower_rotate'),
bf215546Sopenharmony_ci   (('ior', ('ishl@32', a, b), ('ushr@32', a, ('iadd', 32, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'),
bf215546Sopenharmony_ci   (('ior', ('ishl@32', a, b), ('ushr@32', a, ('isub', 32, b))), ('urol', a, b), '!options->lower_rotate'),
bf215546Sopenharmony_ci   (('ior', ('ushr@16', a, b), ('ishl@16', a, ('iadd', 16, ('ineg', b)))), ('uror', a, b), '!options->lower_rotate'),
bf215546Sopenharmony_ci   (('ior', ('ushr@16', a, b), ('ishl@16', a, ('isub', 16, b))), ('uror', a, b), '!options->lower_rotate'),
bf215546Sopenharmony_ci   (('ior', ('ushr@32', a, b), ('ishl@32', a, ('iadd', 32, ('ineg', b)))), ('uror', a, b), '!options->lower_rotate'),
bf215546Sopenharmony_ci   (('ior', ('ushr@32', a, b), ('ishl@32', a, ('isub', 32, b))), ('uror', a, b), '!options->lower_rotate'),
bf215546Sopenharmony_ci   (('urol@16', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 16, b))), 'options->lower_rotate'),
bf215546Sopenharmony_ci   (('urol@32', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 32, b))), 'options->lower_rotate'),
bf215546Sopenharmony_ci   (('uror@16', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 16, b))), 'options->lower_rotate'),
bf215546Sopenharmony_ci   (('uror@32', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 32, b))), 'options->lower_rotate'),
bf215546Sopenharmony_ci   # Exponential/logarithmic identities
bf215546Sopenharmony_ci   (('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
bf215546Sopenharmony_ci   (('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
bf215546Sopenharmony_ci   (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
bf215546Sopenharmony_ci   (('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
bf215546Sopenharmony_ci   (('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
bf215546Sopenharmony_ci    ('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
bf215546Sopenharmony_ci   (('~fexp2', ('fmul', ('flog2', a), 0.5)), ('fsqrt', a)),
bf215546Sopenharmony_ci   (('~fexp2', ('fmul', ('flog2', a), 2.0)), ('fmul', a, a)),
bf215546Sopenharmony_ci   (('~fexp2', ('fmul', ('flog2', a), 4.0)), ('fmul', ('fmul', a, a), ('fmul', a, a))),
bf215546Sopenharmony_ci   (('~fpow', a, 1.0), a),
bf215546Sopenharmony_ci   (('~fpow', a, 2.0), ('fmul', a, a)),
bf215546Sopenharmony_ci   (('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
bf215546Sopenharmony_ci   (('~fpow', 2.0, a), ('fexp2', a)),
bf215546Sopenharmony_ci   (('~fpow', ('fpow', a, 2.2), 0.454545), a),
bf215546Sopenharmony_ci   (('~fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
bf215546Sopenharmony_ci   (('~fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
bf215546Sopenharmony_ci   (('~frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
bf215546Sopenharmony_ci   (('~frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
bf215546Sopenharmony_ci   (('~flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
bf215546Sopenharmony_ci   (('~flog2', ('frcp', a)), ('fneg', ('flog2', a))),
bf215546Sopenharmony_ci   (('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
bf215546Sopenharmony_ci   (('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
bf215546Sopenharmony_ci   (('~fmul', ('fexp2(is_used_once)', a), ('fexp2(is_used_once)', b)), ('fexp2', ('fadd', a, b))),
bf215546Sopenharmony_ci   (('bcsel', ('flt', a, 0.0), 0.0, ('fsqrt', a)), ('fsqrt', ('fmax', a, 0.0))),
bf215546Sopenharmony_ci   (('~fmul', ('fsqrt', a), ('fsqrt', a)), ('fabs',a)),
bf215546Sopenharmony_ci   (('~fmulz', ('fsqrt', a), ('fsqrt', a)), ('fabs', a)),
bf215546Sopenharmony_ci   # Division and reciprocal
bf215546Sopenharmony_ci   (('~fdiv', 1.0, a), ('frcp', a)),
bf215546Sopenharmony_ci   (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
bf215546Sopenharmony_ci   (('~frcp', ('frcp', a)), a),
bf215546Sopenharmony_ci   (('~frcp', ('fsqrt', a)), ('frsq', a)),
bf215546Sopenharmony_ci   (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
bf215546Sopenharmony_ci   (('~frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
bf215546Sopenharmony_ci   # Trig
bf215546Sopenharmony_ci   (('fsin', a), lowered_sincos(0.5), 'options->lower_sincos'),
bf215546Sopenharmony_ci   (('fcos', a), lowered_sincos(0.75), 'options->lower_sincos'),
bf215546Sopenharmony_ci   # Boolean simplifications
bf215546Sopenharmony_ci   (('i2b16(is_used_by_if)', a), ('ine16', a, 0)),
bf215546Sopenharmony_ci   (('i2b32(is_used_by_if)', a), ('ine32', a, 0)),
bf215546Sopenharmony_ci   (('i2b1(is_used_by_if)', a), ('ine', a, 0)),
bf215546Sopenharmony_ci   (('ieq', a, True), a),
bf215546Sopenharmony_ci   (('ine(is_not_used_by_if)', a, True), ('inot', a)),
bf215546Sopenharmony_ci   (('ine', a, False), a),
bf215546Sopenharmony_ci   (('ieq(is_not_used_by_if)', a, False), ('inot', 'a')),
bf215546Sopenharmony_ci   (('bcsel', a, True, False), a),
bf215546Sopenharmony_ci   (('bcsel', a, False, True), ('inot', a)),
bf215546Sopenharmony_ci   (('bcsel', True, b, c), b),
bf215546Sopenharmony_ci   (('bcsel', False, b, c), c),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('bcsel@16', a, 1.0, 0.0), ('b2f', a)),
bf215546Sopenharmony_ci   (('bcsel@16', a, 0.0, 1.0), ('b2f', ('inot', a))),
bf215546Sopenharmony_ci   (('bcsel@16', a, -1.0, -0.0), ('fneg', ('b2f', a))),
bf215546Sopenharmony_ci   (('bcsel@16', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
bf215546Sopenharmony_ci   (('bcsel@32', a, 1.0, 0.0), ('b2f', a)),
bf215546Sopenharmony_ci   (('bcsel@32', a, 0.0, 1.0), ('b2f', ('inot', a))),
bf215546Sopenharmony_ci   (('bcsel@32', a, -1.0, -0.0), ('fneg', ('b2f', a))),
bf215546Sopenharmony_ci   (('bcsel@32', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
bf215546Sopenharmony_ci   (('bcsel@64', a, 1.0, 0.0), ('b2f', a), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
bf215546Sopenharmony_ci   (('bcsel@64', a, 0.0, 1.0), ('b2f', ('inot', a)), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
bf215546Sopenharmony_ci   (('bcsel@64', a, -1.0, -0.0), ('fneg', ('b2f', a)), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
bf215546Sopenharmony_ci   (('bcsel@64', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a))), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('bcsel', a, b, b), b),
bf215546Sopenharmony_ci   (('~fcsel', a, b, b), b),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # D3D Boolean emulation
bf215546Sopenharmony_ci   (('bcsel', a, -1, 0), ('ineg', ('b2i', 'a@1'))),
bf215546Sopenharmony_ci   (('bcsel', a, 0, -1), ('ineg', ('b2i', ('inot', a)))),
bf215546Sopenharmony_ci   (('bcsel', a, 1, 0), ('b2i', 'a@1')),
bf215546Sopenharmony_ci   (('bcsel', a, 0, 1), ('b2i', ('inot', a))),
bf215546Sopenharmony_ci   (('iand', ('ineg', ('b2i', 'a@1')), ('ineg', ('b2i', 'b@1'))),
bf215546Sopenharmony_ci    ('ineg', ('b2i', ('iand', a, b)))),
bf215546Sopenharmony_ci   (('ior', ('ineg', ('b2i','a@1')), ('ineg', ('b2i', 'b@1'))),
bf215546Sopenharmony_ci    ('ineg', ('b2i', ('ior', a, b)))),
bf215546Sopenharmony_ci   (('ieq', ('ineg', ('b2i', 'a@1')), 0), ('inot', a)),
bf215546Sopenharmony_ci   (('ieq', ('ineg', ('b2i', 'a@1')), -1), a),
bf215546Sopenharmony_ci   (('ine', ('ineg', ('b2i', 'a@1')), 0), a),
bf215546Sopenharmony_ci   (('ine', ('ineg', ('b2i', 'a@1')), -1), ('inot', a)),
bf215546Sopenharmony_ci   (('ige', ('ineg', ('b2i', 'a@1')), 0), ('inot', a)),
bf215546Sopenharmony_ci   (('ilt', ('ineg', ('b2i', 'a@1')), 0), a),
bf215546Sopenharmony_ci   (('ult', 0, ('ineg', ('b2i', 'a@1'))), a),
bf215546Sopenharmony_ci   (('iand', ('ineg', ('b2i', a)), 1.0), ('b2f', a)),
bf215546Sopenharmony_ci   (('iand', ('ineg', ('b2i', a)), 1),   ('b2i', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # With D3D booleans, imax is AND and umax is OR
bf215546Sopenharmony_ci   (('imax', ('ineg', ('b2i', 'a@1')), ('ineg', ('b2i', 'b@1'))),
bf215546Sopenharmony_ci    ('ineg', ('b2i', ('iand', a, b)))),
bf215546Sopenharmony_ci   (('imin', ('ineg', ('b2i', 'a@1')), ('ineg', ('b2i', 'b@1'))),
bf215546Sopenharmony_ci    ('ineg', ('b2i', ('ior', a, b)))),
bf215546Sopenharmony_ci   (('umax', ('ineg', ('b2i', 'a@1')), ('ineg', ('b2i', 'b@1'))),
bf215546Sopenharmony_ci    ('ineg', ('b2i', ('ior', a, b)))),
bf215546Sopenharmony_ci   (('umin', ('ineg', ('b2i', 'a@1')), ('ineg', ('b2i', 'b@1'))),
bf215546Sopenharmony_ci    ('ineg', ('b2i', ('iand', a, b)))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Conversions
bf215546Sopenharmony_ci   (('i2b16', ('b2i', 'a@16')), a),
bf215546Sopenharmony_ci   (('i2b32', ('b2i', 'a@32')), a),
bf215546Sopenharmony_ci   (('f2i', ('ftrunc', a)), ('f2i', a)),
bf215546Sopenharmony_ci   (('f2u', ('ftrunc', a)), ('f2u', a)),
bf215546Sopenharmony_ci   (('i2b', ('ineg', a)), ('i2b', a)),
bf215546Sopenharmony_ci   (('i2b', ('iabs', a)), ('i2b', a)),
bf215546Sopenharmony_ci   (('inot', ('f2b1', a)), ('feq', a, 0.0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Conversions from 16 bits to 32 bits and back can always be removed
bf215546Sopenharmony_ci   (('f2fmp', ('f2f32', 'a@16')), a),
bf215546Sopenharmony_ci   (('i2imp', ('i2i32', 'a@16')), a),
bf215546Sopenharmony_ci   (('i2imp', ('u2u32', 'a@16')), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('f2imp', ('f2f32', 'a@16')), ('f2i16', a)),
bf215546Sopenharmony_ci   (('f2ump', ('f2f32', 'a@16')), ('f2u16', a)),
bf215546Sopenharmony_ci   (('i2fmp', ('i2i32', 'a@16')), ('i2f16', a)),
bf215546Sopenharmony_ci   (('u2fmp', ('u2u32', 'a@16')), ('u2f16', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('f2fmp', ('b2f32', 'a@1')), ('b2f16', a)),
bf215546Sopenharmony_ci   (('i2imp', ('b2i32', 'a@1')), ('b2i16', a)),
bf215546Sopenharmony_ci   (('i2imp', ('b2i32', 'a@1')), ('b2i16', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('f2imp', ('b2f32', 'a@1')), ('b2i16', a)),
bf215546Sopenharmony_ci   (('f2ump', ('b2f32', 'a@1')), ('b2i16', a)),
bf215546Sopenharmony_ci   (('i2fmp', ('b2i32', 'a@1')), ('b2f16', a)),
bf215546Sopenharmony_ci   (('u2fmp', ('b2i32', 'a@1')), ('b2f16', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Conversions to 16 bits would be lossy so they should only be removed if
bf215546Sopenharmony_ci   # the instruction was generated by the precision lowering pass.
bf215546Sopenharmony_ci   (('f2f32', ('f2fmp', 'a@32')), a),
bf215546Sopenharmony_ci   (('i2i32', ('i2imp', 'a@32')), a),
bf215546Sopenharmony_ci   (('u2u32', ('i2imp', 'a@32')), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('i2i32', ('f2imp', 'a@32')), ('f2i32', a)),
bf215546Sopenharmony_ci   (('u2u32', ('f2ump', 'a@32')), ('f2u32', a)),
bf215546Sopenharmony_ci   (('f2f32', ('i2fmp', 'a@32')), ('i2f32', a)),
bf215546Sopenharmony_ci   (('f2f32', ('u2fmp', 'a@32')), ('u2f32', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Conversions from float32 to float64 and back can be removed as long as
bf215546Sopenharmony_ci   # it doesn't need to be precise, since the conversion may e.g. flush denorms
bf215546Sopenharmony_ci   (('~f2f32', ('f2f64', 'a@32')), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ffloor', 'a(is_integral)'), a),
bf215546Sopenharmony_ci   (('fceil', 'a(is_integral)'), a),
bf215546Sopenharmony_ci   (('ftrunc', 'a(is_integral)'), a),
bf215546Sopenharmony_ci   (('fround_even', 'a(is_integral)'), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # fract(x) = x - floor(x), so fract(NaN) = NaN
bf215546Sopenharmony_ci   (('~ffract', 'a(is_integral)'), 0.0),
bf215546Sopenharmony_ci   (('fabs', 'a(is_not_negative)'), a),
bf215546Sopenharmony_ci   (('iabs', 'a(is_not_negative)'), a),
bf215546Sopenharmony_ci   (('fsat', 'a(is_not_positive)'), 0.0),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~fmin', 'a(is_not_negative)', 1.0), ('fsat', a), '!options->lower_fsat'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # The result of the multiply must be in [-1, 0], so the result of the ffma
bf215546Sopenharmony_ci   # must be in [0, 1].
bf215546Sopenharmony_ci   (('flt', ('fadd', ('fmul', ('fsat', a), ('fneg', ('fsat', a))), 1.0), 0.0), False),
bf215546Sopenharmony_ci   (('flt', ('fadd', ('fneg', ('fmul', ('fsat', a), ('fsat', a))), 1.0), 0.0), False),
bf215546Sopenharmony_ci   (('fmax', ('fadd', ('fmul', ('fsat', a), ('fneg', ('fsat', a))), 1.0), 0.0), ('fadd', ('fmul', ('fsat', a), ('fneg', ('fsat', a))), 1.0)),
bf215546Sopenharmony_ci   (('fmax', ('fadd', ('fneg', ('fmul', ('fsat', a), ('fsat', a))), 1.0), 0.0), ('fadd', ('fneg', ('fmul', ('fsat', a), ('fsat', a))), 1.0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fneu', 'a(is_not_zero)', 0.0), True),
bf215546Sopenharmony_ci   (('feq', 'a(is_not_zero)', 0.0), False),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # In this chart, + means value > 0 and - means value < 0.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # + >= + -> unknown  0 >= + -> false    - >= + -> false
bf215546Sopenharmony_ci   # + >= 0 -> true     0 >= 0 -> true     - >= 0 -> false
bf215546Sopenharmony_ci   # + >= - -> true     0 >= - -> true     - >= - -> unknown
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # Using grouping conceptually similar to a Karnaugh map...
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # (+ >= 0, + >= -, 0 >= 0, 0 >= -) == (is_not_negative >= is_not_positive) -> true
bf215546Sopenharmony_ci   # (0 >= +, - >= +) == (is_not_positive >= gt_zero) -> false
bf215546Sopenharmony_ci   # (- >= +, - >= 0) == (lt_zero >= is_not_negative) -> false
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # The flt / ilt cases just invert the expected result.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # The results expecting true, must be marked imprecise.  The results
bf215546Sopenharmony_ci   # expecting false are fine because NaN compared >= or < anything is false.
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fge', 'a(is_a_number_not_negative)', 'b(is_a_number_not_positive)'), True),
bf215546Sopenharmony_ci   (('fge', 'a(is_not_positive)',          'b(is_gt_zero)'),               False),
bf215546Sopenharmony_ci   (('fge', 'a(is_lt_zero)',               'b(is_not_negative)'),          False),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('flt', 'a(is_not_negative)',          'b(is_not_positive)'),          False),
bf215546Sopenharmony_ci   (('flt', 'a(is_a_number_not_positive)', 'b(is_a_number_gt_zero)'),      True),
bf215546Sopenharmony_ci   (('flt', 'a(is_a_number_lt_zero)',      'b(is_a_number_not_negative)'), True),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ine', 'a(is_not_zero)', 0), True),
bf215546Sopenharmony_ci   (('ieq', 'a(is_not_zero)', 0), False),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ige', 'a(is_not_negative)', 'b(is_not_positive)'), True),
bf215546Sopenharmony_ci   (('ige', 'a(is_not_positive)', 'b(is_gt_zero)'),      False),
bf215546Sopenharmony_ci   (('ige', 'a(is_lt_zero)',      'b(is_not_negative)'), False),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ilt', 'a(is_not_negative)', 'b(is_not_positive)'), False),
bf215546Sopenharmony_ci   (('ilt', 'a(is_not_positive)', 'b(is_gt_zero)'),      True),
bf215546Sopenharmony_ci   (('ilt', 'a(is_lt_zero)',      'b(is_not_negative)'), True),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ult', 0, 'a(is_gt_zero)'), True),
bf215546Sopenharmony_ci   (('ult', a, 0), False),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Packing and then unpacking does nothing
bf215546Sopenharmony_ci   (('unpack_64_2x32_split_x', ('pack_64_2x32_split', a, b)), a),
bf215546Sopenharmony_ci   (('unpack_64_2x32_split_y', ('pack_64_2x32_split', a, b)), b),
bf215546Sopenharmony_ci   (('unpack_64_2x32_split_x', ('pack_64_2x32', a)), 'a.x'),
bf215546Sopenharmony_ci   (('unpack_64_2x32_split_y', ('pack_64_2x32', a)), 'a.y'),
bf215546Sopenharmony_ci   (('unpack_64_2x32', ('pack_64_2x32_split', a, b)), ('vec2', a, b)),
bf215546Sopenharmony_ci   (('unpack_64_2x32', ('pack_64_2x32', a)), a),
bf215546Sopenharmony_ci   (('unpack_double_2x32_dxil', ('pack_double_2x32_dxil', a)), a),
bf215546Sopenharmony_ci   (('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
bf215546Sopenharmony_ci                           ('unpack_64_2x32_split_y', a)), a),
bf215546Sopenharmony_ci   (('pack_64_2x32', ('vec2', ('unpack_64_2x32_split_x', a),
bf215546Sopenharmony_ci                              ('unpack_64_2x32_split_y', a))), a),
bf215546Sopenharmony_ci   (('pack_64_2x32', ('unpack_64_2x32', a)), a),
bf215546Sopenharmony_ci   (('pack_double_2x32_dxil', ('unpack_double_2x32_dxil', a)), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Comparing two halves of an unpack separately.  While this optimization
bf215546Sopenharmony_ci   # should be correct for non-constant values, it's less obvious that it's
bf215546Sopenharmony_ci   # useful in that case.  For constant values, the pack will fold and we're
bf215546Sopenharmony_ci   # guaranteed to reduce the whole tree to one instruction.
bf215546Sopenharmony_ci   (('iand', ('ieq', ('unpack_32_2x16_split_x', a), '#b'),
bf215546Sopenharmony_ci             ('ieq', ('unpack_32_2x16_split_y', a), '#c')),
bf215546Sopenharmony_ci    ('ieq', a, ('pack_32_2x16_split', b, c))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Byte extraction
bf215546Sopenharmony_ci   (('ushr', 'a@16',  8), ('extract_u8', a, 1), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ushr', 'a@32', 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ushr', 'a@64', 56), ('extract_u8', a, 7), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ishr', 'a@16',  8), ('extract_i8', a, 1), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ishr', 'a@32', 24), ('extract_i8', a, 3), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ishr', 'a@64', 56), ('extract_i8', a, 7), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Common pattern in many Vulkan CTS tests that read 8-bit integers from a
bf215546Sopenharmony_ci   # storage buffer.
bf215546Sopenharmony_ci   (('u2u8', ('extract_u16', a, 1)), ('u2u8', ('extract_u8', a, 2)), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('u2u8', ('ushr', a, 8)), ('u2u8', ('extract_u8', a, 1)), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Common pattern after lowering 8-bit integers to 16-bit.
bf215546Sopenharmony_ci   (('i2i16', ('u2u8', ('extract_u8', a, b))), ('i2i16', ('extract_i8', a, b))),
bf215546Sopenharmony_ci   (('u2u16', ('u2u8', ('extract_u8', a, b))), ('u2u16', ('extract_u8', a, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ubfe', a,  0, 8), ('extract_u8', a, 0), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ubfe', a,  8, 8), ('extract_u8', a, 1), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ubfe', a, 16, 8), ('extract_u8', a, 2), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ubfe', a, 24, 8), ('extract_u8', a, 3), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ibfe', a,  0, 8), ('extract_i8', a, 0), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ibfe', a,  8, 8), ('extract_i8', a, 1), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ibfe', a, 16, 8), ('extract_i8', a, 2), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci   (('ibfe', a, 24, 8), ('extract_i8', a, 3), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('extract_u8', ('extract_i8', a, b), 0), ('extract_u8', a, b)),
bf215546Sopenharmony_ci   (('extract_u8', ('extract_u8', a, b), 0), ('extract_u8', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    # Word extraction
bf215546Sopenharmony_ci   (('ushr', ('ishl', 'a@32', 16), 16), ('extract_u16', a, 0), '!options->lower_extract_word'),
bf215546Sopenharmony_ci   (('ushr', 'a@32', 16), ('extract_u16', a, 1), '!options->lower_extract_word'),
bf215546Sopenharmony_ci   (('ishr', ('ishl', 'a@32', 16), 16), ('extract_i16', a, 0), '!options->lower_extract_word'),
bf215546Sopenharmony_ci   (('ishr', 'a@32', 16), ('extract_i16', a, 1), '!options->lower_extract_word'),
bf215546Sopenharmony_ci   (('iand', 0xffff, a), ('extract_u16', a, 0), '!options->lower_extract_word'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ubfe', a,  0, 16), ('extract_u16', a, 0), '!options->lower_extract_word'),
bf215546Sopenharmony_ci   (('ubfe', a, 16, 16), ('extract_u16', a, 1), '!options->lower_extract_word'),
bf215546Sopenharmony_ci   (('ibfe', a,  0, 16), ('extract_i16', a, 0), '!options->lower_extract_word'),
bf215546Sopenharmony_ci   (('ibfe', a, 16, 16), ('extract_i16', a, 1), '!options->lower_extract_word'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Packing a u8vec4 to write to an SSBO.
bf215546Sopenharmony_ci   (('ior', ('ishl', ('u2u32', 'a@8'), 24), ('ior', ('ishl', ('u2u32', 'b@8'), 16), ('ior', ('ishl', ('u2u32', 'c@8'), 8), ('u2u32', 'd@8')))),
bf215546Sopenharmony_ci    ('pack_32_4x8', ('vec4', d, c, b, a)), 'options->has_pack_32_4x8'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('extract_u16', ('extract_i16', a, b), 0), ('extract_u16', a, b)),
bf215546Sopenharmony_ci   (('extract_u16', ('extract_u16', a, b), 0), ('extract_u16', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Lower pack/unpack
bf215546Sopenharmony_ci   (('pack_64_2x32_split', a, b), ('ior', ('u2u64', a), ('ishl', ('u2u64', b), 32)), 'options->lower_pack_64_2x32_split'),
bf215546Sopenharmony_ci   (('pack_32_2x16_split', a, b), ('ior', ('u2u32', a), ('ishl', ('u2u32', b), 16)), 'options->lower_pack_32_2x16_split'),
bf215546Sopenharmony_ci   (('unpack_64_2x32_split_x', a), ('u2u32', a), 'options->lower_unpack_64_2x32_split'),
bf215546Sopenharmony_ci   (('unpack_64_2x32_split_y', a), ('u2u32', ('ushr', a, 32)), 'options->lower_unpack_64_2x32_split'),
bf215546Sopenharmony_ci   (('unpack_32_2x16_split_x', a), ('u2u16', a), 'options->lower_unpack_32_2x16_split'),
bf215546Sopenharmony_ci   (('unpack_32_2x16_split_y', a), ('u2u16', ('ushr', a, 16)), 'options->lower_unpack_32_2x16_split'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Useless masking before unpacking
bf215546Sopenharmony_ci   (('unpack_half_2x16_split_x', ('iand', a, 0xffff)), ('unpack_half_2x16_split_x', a)),
bf215546Sopenharmony_ci   (('unpack_32_2x16_split_x', ('iand', a, 0xffff)), ('unpack_32_2x16_split_x', a)),
bf215546Sopenharmony_ci   (('unpack_64_2x32_split_x', ('iand', a, 0xffffffff)), ('unpack_64_2x32_split_x', a)),
bf215546Sopenharmony_ci   (('unpack_half_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_half_2x16_split_y', a)),
bf215546Sopenharmony_ci   (('unpack_32_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_32_2x16_split_y', a)),
bf215546Sopenharmony_ci   (('unpack_64_2x32_split_y', ('iand', a, 0xffffffff00000000)), ('unpack_64_2x32_split_y', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('unpack_half_2x16_split_x', ('extract_u16', a, 0)), ('unpack_half_2x16_split_x', a)),
bf215546Sopenharmony_ci   (('unpack_half_2x16_split_x', ('extract_u16', a, 1)), ('unpack_half_2x16_split_y', a)),
bf215546Sopenharmony_ci   (('unpack_half_2x16_split_x', ('ushr', a, 16)), ('unpack_half_2x16_split_y', a)),
bf215546Sopenharmony_ci   (('unpack_32_2x16_split_x', ('extract_u16', a, 0)), ('unpack_32_2x16_split_x', a)),
bf215546Sopenharmony_ci   (('unpack_32_2x16_split_x', ('extract_u16', a, 1)), ('unpack_32_2x16_split_y', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Optimize half packing
bf215546Sopenharmony_ci   (('ishl', ('pack_half_2x16', ('vec2', a, 0)), 16), ('pack_half_2x16', ('vec2', 0, a))),
bf215546Sopenharmony_ci   (('ushr', ('pack_half_2x16', ('vec2', 0, a)), 16), ('pack_half_2x16', ('vec2', a, 0))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('iadd', ('pack_half_2x16', ('vec2', a, 0)), ('pack_half_2x16', ('vec2', 0, b))),
bf215546Sopenharmony_ci    ('pack_half_2x16', ('vec2', a, b))),
bf215546Sopenharmony_ci   (('ior', ('pack_half_2x16', ('vec2', a, 0)), ('pack_half_2x16', ('vec2', 0, b))),
bf215546Sopenharmony_ci    ('pack_half_2x16', ('vec2', a, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ishl', ('pack_half_2x16_split', a, 0), 16), ('pack_half_2x16_split', 0, a)),
bf215546Sopenharmony_ci   (('ushr', ('pack_half_2x16_split', 0, a), 16), ('pack_half_2x16_split', a, 0)),
bf215546Sopenharmony_ci   (('extract_u16', ('pack_half_2x16_split', 0, a), 1), ('pack_half_2x16_split', a, 0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('iadd', ('pack_half_2x16_split', a, 0), ('pack_half_2x16_split', 0, b)), ('pack_half_2x16_split', a, b)),
bf215546Sopenharmony_ci   (('ior',  ('pack_half_2x16_split', a, 0), ('pack_half_2x16_split', 0, b)), ('pack_half_2x16_split', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('extract_i8', ('pack_32_4x8_split', a, b, c, d), 0), ('i2i', a)),
bf215546Sopenharmony_ci   (('extract_i8', ('pack_32_4x8_split', a, b, c, d), 1), ('i2i', b)),
bf215546Sopenharmony_ci   (('extract_i8', ('pack_32_4x8_split', a, b, c, d), 2), ('i2i', c)),
bf215546Sopenharmony_ci   (('extract_i8', ('pack_32_4x8_split', a, b, c, d), 3), ('i2i', d)),
bf215546Sopenharmony_ci   (('extract_u8', ('pack_32_4x8_split', a, b, c, d), 0), ('u2u', a)),
bf215546Sopenharmony_ci   (('extract_u8', ('pack_32_4x8_split', a, b, c, d), 1), ('u2u', b)),
bf215546Sopenharmony_ci   (('extract_u8', ('pack_32_4x8_split', a, b, c, d), 2), ('u2u', c)),
bf215546Sopenharmony_ci   (('extract_u8', ('pack_32_4x8_split', a, b, c, d), 3), ('u2u', d)),
bf215546Sopenharmony_ci])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# After the ('extract_u8', a, 0) pattern, above, triggers, there will be
bf215546Sopenharmony_ci# patterns like those below.
bf215546Sopenharmony_cifor op in ('ushr', 'ishr'):
bf215546Sopenharmony_ci   optimizations.extend([(('extract_u8', (op, 'a@16',  8),     0), ('extract_u8', a, 1))])
bf215546Sopenharmony_ci   optimizations.extend([(('extract_u8', (op, 'a@32',  8 * i), 0), ('extract_u8', a, i)) for i in range(1, 4)])
bf215546Sopenharmony_ci   optimizations.extend([(('extract_u8', (op, 'a@64',  8 * i), 0), ('extract_u8', a, i)) for i in range(1, 8)])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations.extend([(('extract_u8', ('extract_u16', a, 1), 0), ('extract_u8', a, 2))])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# After the ('extract_[iu]8', a, 3) patterns, above, trigger, there will be
bf215546Sopenharmony_ci# patterns like those below.
bf215546Sopenharmony_cifor op in ('extract_u8', 'extract_i8'):
bf215546Sopenharmony_ci   optimizations.extend([((op, ('ishl', 'a@16',      8),     1), (op, a, 0))])
bf215546Sopenharmony_ci   optimizations.extend([((op, ('ishl', 'a@32', 24 - 8 * i), 3), (op, a, i)) for i in range(2, -1, -1)])
bf215546Sopenharmony_ci   optimizations.extend([((op, ('ishl', 'a@64', 56 - 8 * i), 7), (op, a, i)) for i in range(6, -1, -1)])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations.extend([
bf215546Sopenharmony_ci   # Subtracts
bf215546Sopenharmony_ci   (('ussub_4x8_vc4', a, 0), a),
bf215546Sopenharmony_ci   (('ussub_4x8_vc4', a, ~0), 0),
bf215546Sopenharmony_ci   # Lower all Subtractions first - they can get recombined later
bf215546Sopenharmony_ci   (('fsub', a, b), ('fadd', a, ('fneg', b))),
bf215546Sopenharmony_ci   (('isub', a, b), ('iadd', a, ('ineg', b))),
bf215546Sopenharmony_ci   (('uabs_usub', a, b), ('bcsel', ('ult', a, b), ('ineg', ('isub', a, b)), ('isub', a, b))),
bf215546Sopenharmony_ci   # This is correct.  We don't need isub_sat because the result type is unsigned, so it cannot overflow.
bf215546Sopenharmony_ci   (('uabs_isub', a, b), ('bcsel', ('ilt', a, b), ('ineg', ('isub', a, b)), ('isub', a, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Propagate negation up multiplication chains
bf215546Sopenharmony_ci   (('fmul(is_used_by_non_fsat)', ('fneg', a), b), ('fneg', ('fmul', a, b))),
bf215546Sopenharmony_ci   (('fmulz(is_used_by_non_fsat)', ('fneg', a), b), ('fneg', ('fmulz', a, b)), '!'+signed_zero_inf_nan_preserve_32),
bf215546Sopenharmony_ci   (('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
bf215546Sopenharmony_ci   (('ffmaz', ('fneg', a), ('fneg', b), c), ('ffmaz', a, b, c)),
bf215546Sopenharmony_ci   (('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Propagate constants up multiplication chains
bf215546Sopenharmony_ci   (('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmul', ('fmul', a, c), b)),
bf215546Sopenharmony_ci   (('~fmulz(is_used_once)', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmulz', ('fmulz', a, c), b)),
bf215546Sopenharmony_ci   (('~fmul(is_used_once)', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)'), ('fmulz', ('fmul', a, c), b)),
bf215546Sopenharmony_ci   (('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)),
bf215546Sopenharmony_ci   (('~ffma', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffma', ('fmul', a, c), b, d)),
bf215546Sopenharmony_ci   (('~ffmaz', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffmaz', ('fmulz', a, c), b, d)),
bf215546Sopenharmony_ci   (('~ffma', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)', d), ('ffmaz', ('fmul', a, c), b, d)),
bf215546Sopenharmony_ci   # Prefer moving out a multiplication for more MAD/FMA-friendly code
bf215546Sopenharmony_ci   (('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_fmul)'), '#c'), ('fadd', ('fadd', a, c), b)),
bf215546Sopenharmony_ci   (('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)),
bf215546Sopenharmony_ci   (('~fadd(is_used_once)', ('ffma(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffma', a, b, d), c)),
bf215546Sopenharmony_ci   (('~fadd(is_used_once)', ('ffmaz(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffmaz', a, b, d), c)),
bf215546Sopenharmony_ci   (('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Reassociate constants in add/mul chains so they can be folded together.
bf215546Sopenharmony_ci   # For now, we mostly only handle cases where the constants are separated by
bf215546Sopenharmony_ci   # a single non-constant.  We could do better eventually.
bf215546Sopenharmony_ci   (('~fmul', '#a', ('fmul', 'b(is_not_const)', '#c')), ('fmul', ('fmul', a, c), b)),
bf215546Sopenharmony_ci   (('~fmulz', '#a', ('fmulz', 'b(is_not_const)', '#c')), ('fmulz', ('fmulz', a, c), b)),
bf215546Sopenharmony_ci   (('~fmul', '#a(is_finite_not_zero)', ('fmulz', 'b(is_not_const)', '#c')), ('fmulz', ('fmul', a, c), b)),
bf215546Sopenharmony_ci   (('~ffma', '#a', ('fmul', 'b(is_not_const)', '#c'), d), ('ffma', ('fmul', a, c), b, d)),
bf215546Sopenharmony_ci   (('~ffmaz', '#a', ('fmulz', 'b(is_not_const)', '#c'), d), ('ffmaz', ('fmulz', a, c), b, d)),
bf215546Sopenharmony_ci   (('~ffmaz', '#a(is_finite_not_zero)', ('fmulz', 'b(is_not_const)', '#c'), d), ('ffmaz', ('fmul', a, c), b, d)),
bf215546Sopenharmony_ci   (('imul', '#a', ('imul', 'b(is_not_const)', '#c')), ('imul', ('imul', a, c), b)),
bf215546Sopenharmony_ci   (('~fadd', '#a',          ('fadd', 'b(is_not_const)', '#c')),  ('fadd', ('fadd', a,          c),           b)),
bf215546Sopenharmony_ci   (('~fadd', '#a', ('fneg', ('fadd', 'b(is_not_const)', '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
bf215546Sopenharmony_ci   (('~fadd', '#a',          ('ffma', 'b(is_not_const)', 'c(is_not_const)', '#d')),  ('ffma',          b,  c, ('fadd', a,          d))),
bf215546Sopenharmony_ci   (('~fadd', '#a', ('fneg', ('ffma', 'b(is_not_const)', 'c(is_not_const)', '#d'))), ('ffma', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
bf215546Sopenharmony_ci   (('~fadd', '#a',          ('ffmaz', 'b(is_not_const)', 'c(is_not_const)', '#d')),  ('ffmaz',          b,  c, ('fadd', a,          d))),
bf215546Sopenharmony_ci   (('~fadd', '#a', ('fneg', ('ffmaz', 'b(is_not_const)', 'c(is_not_const)', '#d'))), ('ffmaz', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
bf215546Sopenharmony_ci   (('iadd', '#a', ('iadd', 'b(is_not_const)', '#c')), ('iadd', ('iadd', a, c), b)),
bf215546Sopenharmony_ci   (('iand', '#a', ('iand', 'b(is_not_const)', '#c')), ('iand', ('iand', a, c), b)),
bf215546Sopenharmony_ci   (('ior',  '#a', ('ior',  'b(is_not_const)', '#c')), ('ior',  ('ior',  a, c), b)),
bf215546Sopenharmony_ci   (('ixor', '#a', ('ixor', 'b(is_not_const)', '#c')), ('ixor', ('ixor', a, c), b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Reassociate add chains for more MAD/FMA-friendly code
bf215546Sopenharmony_ci   (('~fadd', ('fadd(is_used_once)', 'a(is_fmul)', 'b(is_fmul)'), 'c(is_not_fmul)'), ('fadd', ('fadd', a, c), b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Drop mul-div by the same value when there's no wrapping.
bf215546Sopenharmony_ci   (('idiv', ('imul(no_signed_wrap)', a, b), b), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # By definition...
bf215546Sopenharmony_ci   (('bcsel', ('ige', ('find_lsb', a), 0), ('find_lsb', a), -1), ('find_lsb', a)),
bf215546Sopenharmony_ci   (('bcsel', ('ige', ('ifind_msb', a), 0), ('ifind_msb', a), -1), ('ifind_msb', a)),
bf215546Sopenharmony_ci   (('bcsel', ('ige', ('ufind_msb', a), 0), ('ufind_msb', a), -1), ('ufind_msb', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('bcsel', ('ine', a, 0), ('find_lsb', a), -1), ('find_lsb', a)),
bf215546Sopenharmony_ci   (('bcsel', ('ine', a, 0), ('ifind_msb', a), -1), ('ifind_msb', a)),
bf215546Sopenharmony_ci   (('bcsel', ('ine', a, 0), ('ufind_msb', a), -1), ('ufind_msb', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('bcsel', ('ine', a, -1), ('ifind_msb', a), -1), ('ifind_msb', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~fmul', ('bcsel(is_used_once)', c, -1.0, 1.0), b), ('bcsel', c, ('fneg', b), b)),
bf215546Sopenharmony_ci   (('~fmul', ('bcsel(is_used_once)', c, 1.0, -1.0), b), ('bcsel', c, b, ('fneg', b))),
bf215546Sopenharmony_ci   (('~fmulz', ('bcsel(is_used_once)', c, -1.0, 1.0), b), ('bcsel', c, ('fneg', b), b)),
bf215546Sopenharmony_ci   (('~fmulz', ('bcsel(is_used_once)', c, 1.0, -1.0), b), ('bcsel', c, b, ('fneg', b))),
bf215546Sopenharmony_ci   (('~bcsel', ('flt', a, 0.0), ('fneg', a), a), ('fabs', a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('bcsel', a, ('bcsel', b, c, d), d), ('bcsel', ('iand', a, b), c, d)),
bf215546Sopenharmony_ci   (('bcsel', a, b, ('bcsel', c, b, d)), ('bcsel', ('ior', a, c), b, d)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Misc. lowering
bf215546Sopenharmony_ci   (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'),
bf215546Sopenharmony_ci   (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'),
bf215546Sopenharmony_ci   (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'),
bf215546Sopenharmony_ci   (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
bf215546Sopenharmony_ci    ('bcsel', ('ult', 31, 'bits'), 'insert',
bf215546Sopenharmony_ci              ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')),
bf215546Sopenharmony_ci    'options->lower_bitfield_insert'),
bf215546Sopenharmony_ci   (('ihadd', a, b), ('iadd', ('iand', a, b), ('ishr', ('ixor', a, b), 1)), 'options->lower_hadd'),
bf215546Sopenharmony_ci   (('uhadd', a, b), ('iadd', ('iand', a, b), ('ushr', ('ixor', a, b), 1)), 'options->lower_hadd'),
bf215546Sopenharmony_ci   (('irhadd', a, b), ('isub', ('ior', a, b), ('ishr', ('ixor', a, b), 1)), 'options->lower_hadd'),
bf215546Sopenharmony_ci   (('urhadd', a, b), ('isub', ('ior', a, b), ('ushr', ('ixor', a, b), 1)), 'options->lower_hadd'),
bf215546Sopenharmony_ci   (('ihadd@64', a, b), ('iadd', ('iand', a, b), ('ishr', ('ixor', a, b), 1)), 'options->lower_hadd64 || (options->lower_int64_options & nir_lower_iadd64) != 0'),
bf215546Sopenharmony_ci   (('uhadd@64', a, b), ('iadd', ('iand', a, b), ('ushr', ('ixor', a, b), 1)), 'options->lower_hadd64 || (options->lower_int64_options & nir_lower_iadd64) != 0'),
bf215546Sopenharmony_ci   (('irhadd@64', a, b), ('isub', ('ior', a, b), ('ishr', ('ixor', a, b), 1)), 'options->lower_hadd64 || (options->lower_int64_options & nir_lower_iadd64) != 0'),
bf215546Sopenharmony_ci   (('urhadd@64', a, b), ('isub', ('ior', a, b), ('ushr', ('ixor', a, b), 1)), 'options->lower_hadd64 || (options->lower_int64_options & nir_lower_iadd64) != 0'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('imul_32x16', a, b), ('imul', a, ('extract_i16', b, 0)), 'options->lower_mul_32x16'),
bf215546Sopenharmony_ci   (('umul_32x16', a, b), ('imul', a, ('extract_u16', b, 0)), 'options->lower_mul_32x16'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('uadd_sat@64', a, b), ('bcsel', ('ult', ('iadd', a, b), a), -1, ('iadd', a, b)), 'options->lower_uadd_sat || (options->lower_int64_options & nir_lower_iadd64) != 0'),
bf215546Sopenharmony_ci   (('uadd_sat', a, b), ('bcsel', ('ult', ('iadd', a, b), a), -1, ('iadd', a, b)), 'options->lower_uadd_sat'),
bf215546Sopenharmony_ci   (('usub_sat', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), 'options->lower_usub_sat'),
bf215546Sopenharmony_ci   (('usub_sat@64', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), '(options->lower_int64_options & nir_lower_usub_sat64) != 0'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # int64_t sum = a + b;
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # if (a < 0 && b < 0 && a < sum)
bf215546Sopenharmony_ci   #    sum = INT64_MIN;
bf215546Sopenharmony_ci   # } else if (a >= 0 && b >= 0 && sum < a)
bf215546Sopenharmony_ci   #    sum = INT64_MAX;
bf215546Sopenharmony_ci   # }
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # A couple optimizations are applied.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # 1. a < sum => sum >= 0.  This replacement works because it is known that
bf215546Sopenharmony_ci   #    a < 0 and b < 0, so sum should also be < 0 unless there was
bf215546Sopenharmony_ci   #    underflow.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # 2. sum < a => sum < 0.  This replacement works because it is known that
bf215546Sopenharmony_ci   #    a >= 0 and b >= 0, so sum should also be >= 0 unless there was
bf215546Sopenharmony_ci   #    overflow.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # 3. Invert the second if-condition and swap the order of parameters for
bf215546Sopenharmony_ci   #    the bcsel. !(a >= 0 && b >= 0 && sum < 0) becomes !(a >= 0) || !(b >=
bf215546Sopenharmony_ci   #    0) || !(sum < 0), and that becomes (a < 0) || (b < 0) || (sum >= 0)
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # On Intel Gen11, this saves ~11 instructions.
bf215546Sopenharmony_ci   (('iadd_sat@64', a, b), ('bcsel',
bf215546Sopenharmony_ci                            ('iand', ('iand', ('ilt', a, 0), ('ilt', b, 0)), ('ige', ('iadd', a, b), 0)),
bf215546Sopenharmony_ci                            0x8000000000000000,
bf215546Sopenharmony_ci                            ('bcsel',
bf215546Sopenharmony_ci                             ('ior', ('ior', ('ilt', a, 0), ('ilt', b, 0)), ('ige', ('iadd', a, b), 0)),
bf215546Sopenharmony_ci                             ('iadd', a, b),
bf215546Sopenharmony_ci                             0x7fffffffffffffff)),
bf215546Sopenharmony_ci    '(options->lower_int64_options & nir_lower_iadd_sat64) != 0'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # int64_t sum = a - b;
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # if (a < 0 && b >= 0 && a < sum)
bf215546Sopenharmony_ci   #    sum = INT64_MIN;
bf215546Sopenharmony_ci   # } else if (a >= 0 && b < 0 && a >= sum)
bf215546Sopenharmony_ci   #    sum = INT64_MAX;
bf215546Sopenharmony_ci   # }
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # Optimizations similar to the iadd_sat case are applied here.
bf215546Sopenharmony_ci   (('isub_sat@64', a, b), ('bcsel',
bf215546Sopenharmony_ci                            ('iand', ('iand', ('ilt', a, 0), ('ige', b, 0)), ('ige', ('isub', a, b), 0)),
bf215546Sopenharmony_ci                            0x8000000000000000,
bf215546Sopenharmony_ci                            ('bcsel',
bf215546Sopenharmony_ci                             ('ior', ('ior', ('ilt', a, 0), ('ige', b, 0)), ('ige', ('isub', a, b), 0)),
bf215546Sopenharmony_ci                             ('isub', a, b),
bf215546Sopenharmony_ci                             0x7fffffffffffffff)),
bf215546Sopenharmony_ci    '(options->lower_int64_options & nir_lower_iadd_sat64) != 0'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # These are done here instead of in the backend because the int64 lowering
bf215546Sopenharmony_ci   # pass will make a mess of the patterns.  The first patterns are
bf215546Sopenharmony_ci   # conditioned on nir_lower_minmax64 because it was not clear that it was
bf215546Sopenharmony_ci   # always an improvement on platforms that have real int64 support.  No
bf215546Sopenharmony_ci   # shaders in shader-db hit this, so it was hard to say one way or the
bf215546Sopenharmony_ci   # other.
bf215546Sopenharmony_ci   (('ilt', ('imax(is_used_once)', 'a@64', 'b@64'), 0), ('ilt', ('imax', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'),
bf215546Sopenharmony_ci   (('ilt', ('imin(is_used_once)', 'a@64', 'b@64'), 0), ('ilt', ('imin', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'),
bf215546Sopenharmony_ci   (('ige', ('imax(is_used_once)', 'a@64', 'b@64'), 0), ('ige', ('imax', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'),
bf215546Sopenharmony_ci   (('ige', ('imin(is_used_once)', 'a@64', 'b@64'), 0), ('ige', ('imin', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'),
bf215546Sopenharmony_ci   (('ilt', 'a@64', 0), ('ilt', ('unpack_64_2x32_split_y', a), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'),
bf215546Sopenharmony_ci   (('ige', 'a@64', 0), ('ige', ('unpack_64_2x32_split_y', a), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ine', 'a@64', 0), ('ine', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_y', a)), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'),
bf215546Sopenharmony_ci   (('ieq', 'a@64', 0), ('ieq', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_y', a)), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'),
bf215546Sopenharmony_ci   # 0u < uint(a) <=> uint(a) != 0u
bf215546Sopenharmony_ci   (('ult', 0, 'a@64'), ('ine', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_y', a)), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Alternative lowering that doesn't rely on bfi.
bf215546Sopenharmony_ci   (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
bf215546Sopenharmony_ci    ('bcsel', ('ult', 31, 'bits'),
bf215546Sopenharmony_ci     'insert',
bf215546Sopenharmony_ci    (('ior',
bf215546Sopenharmony_ci     ('iand', 'base', ('inot', ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))),
bf215546Sopenharmony_ci     ('iand', ('ishl', 'insert', 'offset'), ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))))),
bf215546Sopenharmony_ci    'options->lower_bitfield_insert_to_shifts'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Alternative lowering that uses bitfield_select.
bf215546Sopenharmony_ci   (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
bf215546Sopenharmony_ci    ('bcsel', ('ult', 31, 'bits'), 'insert',
bf215546Sopenharmony_ci              ('bitfield_select', ('bfm', 'bits', 'offset'), ('ishl', 'insert', 'offset'), 'base')),
bf215546Sopenharmony_ci    'options->lower_bitfield_insert_to_bitfield_select'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ibitfield_extract', 'value', 'offset', 'bits'),
bf215546Sopenharmony_ci    ('bcsel', ('ult', 31, 'bits'), 'value',
bf215546Sopenharmony_ci              ('ibfe', 'value', 'offset', 'bits')),
bf215546Sopenharmony_ci    'options->lower_bitfield_extract'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ubitfield_extract', 'value', 'offset', 'bits'),
bf215546Sopenharmony_ci    ('bcsel', ('ult', 31, 'bits'), 'value',
bf215546Sopenharmony_ci              ('ubfe', 'value', 'offset', 'bits')),
bf215546Sopenharmony_ci    'options->lower_bitfield_extract'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # (src0 & src1) | (~src0 & src2). Constant fold if src2 is 0.
bf215546Sopenharmony_ci   (('bitfield_select', a, b, 0), ('iand', a, b)),
bf215546Sopenharmony_ci   (('bitfield_select', a, ('iand', a, b), c), ('bitfield_select', a, b, c)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Note that these opcodes are defined to only use the five least significant bits of 'offset' and 'bits'
bf215546Sopenharmony_ci   (('ubfe', 'value', 'offset', ('iand', 31, 'bits')), ('ubfe', 'value', 'offset', 'bits')),
bf215546Sopenharmony_ci   (('ubfe', 'value', ('iand', 31, 'offset'), 'bits'), ('ubfe', 'value', 'offset', 'bits')),
bf215546Sopenharmony_ci   (('ibfe', 'value', 'offset', ('iand', 31, 'bits')), ('ibfe', 'value', 'offset', 'bits')),
bf215546Sopenharmony_ci   (('ibfe', 'value', ('iand', 31, 'offset'), 'bits'), ('ibfe', 'value', 'offset', 'bits')),
bf215546Sopenharmony_ci   (('bfm', 'bits', ('iand', 31, 'offset')), ('bfm', 'bits', 'offset')),
bf215546Sopenharmony_ci   (('bfm', ('iand', 31, 'bits'), 'offset'), ('bfm', 'bits', 'offset')),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Section 8.8 (Integer Functions) of the GLSL 4.60 spec says:
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   #    If bits is zero, the result will be zero.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # These patterns prevent other patterns from generating invalid results
bf215546Sopenharmony_ci   # when count is zero.
bf215546Sopenharmony_ci   (('ubfe', a, b, 0), 0),
bf215546Sopenharmony_ci   (('ibfe', a, b, 0), 0),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ubfe', a, 0, '#b'), ('iand', a, ('ushr', 0xffffffff, ('ineg', b)))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('b2i32', ('i2b', ('ubfe', a, b, 1))), ('ubfe', a, b, 1)),
bf215546Sopenharmony_ci   (('b2i32', ('i2b', ('ibfe', a, b, 1))), ('ubfe', a, b, 1)), # ubfe in the replacement is correct
bf215546Sopenharmony_ci   (('ine', ('ibfe(is_used_once)', a, '#b', '#c'), 0), ('ine', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)),
bf215546Sopenharmony_ci   (('ieq', ('ibfe(is_used_once)', a, '#b', '#c'), 0), ('ieq', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)),
bf215546Sopenharmony_ci   (('ine', ('ubfe(is_used_once)', a, '#b', '#c'), 0), ('ine', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)),
bf215546Sopenharmony_ci   (('ieq', ('ubfe(is_used_once)', a, '#b', '#c'), 0), ('ieq', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ibitfield_extract', 'value', 'offset', 'bits'),
bf215546Sopenharmony_ci    ('bcsel', ('ieq', 0, 'bits'),
bf215546Sopenharmony_ci     0,
bf215546Sopenharmony_ci     ('ishr',
bf215546Sopenharmony_ci       ('ishl', 'value', ('isub', ('isub', 32, 'bits'), 'offset')),
bf215546Sopenharmony_ci       ('isub', 32, 'bits'))),
bf215546Sopenharmony_ci    'options->lower_bitfield_extract_to_shifts'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ubitfield_extract', 'value', 'offset', 'bits'),
bf215546Sopenharmony_ci    ('iand',
bf215546Sopenharmony_ci     ('ushr', 'value', 'offset'),
bf215546Sopenharmony_ci     ('bcsel', ('ieq', 'bits', 32),
bf215546Sopenharmony_ci      0xffffffff,
bf215546Sopenharmony_ci      ('isub', ('ishl', 1, 'bits'), 1))),
bf215546Sopenharmony_ci    'options->lower_bitfield_extract_to_shifts'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ifind_msb', 'value'),
bf215546Sopenharmony_ci    ('ufind_msb', ('bcsel', ('ilt', 'value', 0), ('inot', 'value'), 'value')),
bf215546Sopenharmony_ci    'options->lower_ifind_msb'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ifind_msb', 'value'),
bf215546Sopenharmony_ci    ('bcsel', ('ige', ('ifind_msb_rev', 'value'), 0),
bf215546Sopenharmony_ci     ('isub', 31, ('ifind_msb_rev', 'value')),
bf215546Sopenharmony_ci     ('ifind_msb_rev', 'value')),
bf215546Sopenharmony_ci    'options->lower_find_msb_to_reverse'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    (('ufind_msb', 'value'),
bf215546Sopenharmony_ci     ('bcsel', ('ige', ('ufind_msb_rev', 'value'), 0),
bf215546Sopenharmony_ci      ('isub', 31, ('ufind_msb_rev', 'value')),
bf215546Sopenharmony_ci      ('ufind_msb_rev', 'value')),
bf215546Sopenharmony_ci     'options->lower_find_msb_to_reverse'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('find_lsb', 'value'),
bf215546Sopenharmony_ci    ('ufind_msb', ('iand', 'value', ('ineg', 'value'))),
bf215546Sopenharmony_ci    'options->lower_find_lsb'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('extract_i8', a, 'b@32'),
bf215546Sopenharmony_ci    ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24),
bf215546Sopenharmony_ci    'options->lower_extract_byte'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('extract_u8', a, 'b@32'),
bf215546Sopenharmony_ci    ('iand', ('ushr', a, ('imul', b, 8)), 0xff),
bf215546Sopenharmony_ci    'options->lower_extract_byte'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('extract_i16', a, 'b@32'),
bf215546Sopenharmony_ci    ('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16),
bf215546Sopenharmony_ci    'options->lower_extract_word'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('extract_u16', a, 'b@32'),
bf215546Sopenharmony_ci    ('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
bf215546Sopenharmony_ci    'options->lower_extract_word'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    (('pack_unorm_2x16', 'v'),
bf215546Sopenharmony_ci     ('pack_uvec2_to_uint',
bf215546Sopenharmony_ci        ('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))),
bf215546Sopenharmony_ci     'options->lower_pack_unorm_2x16'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    (('pack_unorm_4x8', 'v'),
bf215546Sopenharmony_ci     ('pack_uvec4_to_uint',
bf215546Sopenharmony_ci        ('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))),
bf215546Sopenharmony_ci     'options->lower_pack_unorm_4x8'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    (('pack_snorm_2x16', 'v'),
bf215546Sopenharmony_ci     ('pack_uvec2_to_uint',
bf215546Sopenharmony_ci        ('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))),
bf215546Sopenharmony_ci     'options->lower_pack_snorm_2x16'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    (('pack_snorm_4x8', 'v'),
bf215546Sopenharmony_ci     ('pack_uvec4_to_uint',
bf215546Sopenharmony_ci        ('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))),
bf215546Sopenharmony_ci     'options->lower_pack_snorm_4x8'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    (('unpack_unorm_2x16', 'v'),
bf215546Sopenharmony_ci     ('fdiv', ('u2f32', ('vec2', ('extract_u16', 'v', 0),
bf215546Sopenharmony_ci                                  ('extract_u16', 'v', 1))),
bf215546Sopenharmony_ci              65535.0),
bf215546Sopenharmony_ci     'options->lower_unpack_unorm_2x16'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    (('unpack_unorm_4x8', 'v'),
bf215546Sopenharmony_ci     ('fdiv', ('u2f32', ('vec4', ('extract_u8', 'v', 0),
bf215546Sopenharmony_ci                                  ('extract_u8', 'v', 1),
bf215546Sopenharmony_ci                                  ('extract_u8', 'v', 2),
bf215546Sopenharmony_ci                                  ('extract_u8', 'v', 3))),
bf215546Sopenharmony_ci              255.0),
bf215546Sopenharmony_ci     'options->lower_unpack_unorm_4x8'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    (('unpack_snorm_2x16', 'v'),
bf215546Sopenharmony_ci     ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0),
bf215546Sopenharmony_ci                                                            ('extract_i16', 'v', 1))),
bf215546Sopenharmony_ci                                           32767.0))),
bf215546Sopenharmony_ci     'options->lower_unpack_snorm_2x16'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    (('unpack_snorm_4x8', 'v'),
bf215546Sopenharmony_ci     ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0),
bf215546Sopenharmony_ci                                                            ('extract_i8', 'v', 1),
bf215546Sopenharmony_ci                                                            ('extract_i8', 'v', 2),
bf215546Sopenharmony_ci                                                            ('extract_i8', 'v', 3))),
bf215546Sopenharmony_ci                                           127.0))),
bf215546Sopenharmony_ci     'options->lower_unpack_snorm_4x8'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('pack_half_2x16_split', 'a@32', 'b@32'),
bf215546Sopenharmony_ci    ('ior', ('ishl', ('u2u32', ('f2f16', b)), 16), ('u2u32', ('f2f16', a))),
bf215546Sopenharmony_ci    'options->lower_pack_split'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('unpack_half_2x16_split_x', 'a@32'),
bf215546Sopenharmony_ci    ('f2f32', ('u2u16', a)),
bf215546Sopenharmony_ci    'options->lower_pack_split'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('unpack_half_2x16_split_y', 'a@32'),
bf215546Sopenharmony_ci    ('f2f32', ('u2u16', ('ushr', a, 16))),
bf215546Sopenharmony_ci    'options->lower_pack_split'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('pack_32_2x16_split', 'a@16', 'b@16'),
bf215546Sopenharmony_ci    ('ior', ('ishl', ('u2u32', b), 16), ('u2u32', a)),
bf215546Sopenharmony_ci    'options->lower_pack_split'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('unpack_32_2x16_split_x', 'a@32'),
bf215546Sopenharmony_ci    ('u2u16', a),
bf215546Sopenharmony_ci    'options->lower_pack_split'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('unpack_32_2x16_split_y', 'a@32'),
bf215546Sopenharmony_ci    ('u2u16', ('ushr', 'a', 16)),
bf215546Sopenharmony_ci    'options->lower_pack_split'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('isign', a), ('imin', ('imax', a, -1), 1), 'options->lower_isign'),
bf215546Sopenharmony_ci   (('imin', ('imax', a, -1), 1), ('isign', a), '!options->lower_isign'),
bf215546Sopenharmony_ci   (('imax', ('imin', a, 1), -1), ('isign', a), '!options->lower_isign'),
bf215546Sopenharmony_ci   # float(0 < NaN) - float(NaN < 0) = float(False) - float(False) = 0 - 0 = 0
bf215546Sopenharmony_ci   # Mark the new comparisons precise to prevent them being changed to 'a !=
bf215546Sopenharmony_ci   # 0' or 'a == 0'.
bf215546Sopenharmony_ci   (('fsign', a), ('fsub', ('b2f', ('!flt', 0.0, a)), ('b2f', ('!flt', a, 0.0))), 'options->lower_fsign'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Address/offset calculations:
bf215546Sopenharmony_ci   # Drivers supporting imul24 should use the nir_lower_amul() pass, this
bf215546Sopenharmony_ci   # rule converts everyone else to imul:
bf215546Sopenharmony_ci   (('amul', a, b), ('imul', a, b), '!options->has_imul24'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('umul24', a, b),
bf215546Sopenharmony_ci    ('imul', ('iand', a, 0xffffff), ('iand', b, 0xffffff)),
bf215546Sopenharmony_ci    '!options->has_umul24'),
bf215546Sopenharmony_ci   (('umad24', a, b, c),
bf215546Sopenharmony_ci    ('iadd', ('imul', ('iand', a, 0xffffff), ('iand', b, 0xffffff)), c),
bf215546Sopenharmony_ci    '!options->has_umad24'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Relaxed 24bit ops
bf215546Sopenharmony_ci   (('imul24_relaxed', a, b), ('imul24', a, b), 'options->has_imul24'),
bf215546Sopenharmony_ci   (('imul24_relaxed', a, b), ('imul', a, b), '!options->has_imul24'),
bf215546Sopenharmony_ci   (('umad24_relaxed', a, b, c), ('umad24', a, b, c), 'options->has_umad24'),
bf215546Sopenharmony_ci   (('umad24_relaxed', a, b, c), ('iadd', ('umul24_relaxed', a, b), c), '!options->has_umad24'),
bf215546Sopenharmony_ci   (('umul24_relaxed', a, b), ('umul24', a, b), 'options->has_umul24'),
bf215546Sopenharmony_ci   (('umul24_relaxed', a, b), ('imul', a, b), '!options->has_umul24'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('imad24_ir3', a, b, 0), ('imul24', a, b)),
bf215546Sopenharmony_ci   (('imad24_ir3', a, 0, c), (c)),
bf215546Sopenharmony_ci   (('imad24_ir3', a, 1, c), ('iadd', a, c)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # if first two srcs are const, crack apart the imad so constant folding
bf215546Sopenharmony_ci   # can clean up the imul:
bf215546Sopenharmony_ci   # TODO ffma should probably get a similar rule:
bf215546Sopenharmony_ci   (('imad24_ir3', '#a', '#b', c), ('iadd', ('imul', a, b), c)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # These will turn 24b address/offset calc back into 32b shifts, but
bf215546Sopenharmony_ci   # it should be safe to get back some of the bits of precision that we
bf215546Sopenharmony_ci   # already decided were no necessary:
bf215546Sopenharmony_ci   (('imul24', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b)), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('imul24', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b)))), '!options->lower_bitops'),
bf215546Sopenharmony_ci   (('imul24', a, 0), (0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fcsel', ('slt', 0, a), b, c), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci   (('fcsel', ('slt', a, 0), b, c), ('fcsel_gt', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci   (('fcsel', ('sge', a, 0), b, c), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci   (('fcsel', ('sge', 0, a), b, c), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci   (('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci   (('bcsel', ('ige', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, b, c), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci   (('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('bcsel', ('flt', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci   (('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_gt', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci   (('bcsel', ('fge', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci   (('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# bit_size dependent lowerings
bf215546Sopenharmony_cifor bit_size in [8, 16, 32, 64]:
bf215546Sopenharmony_ci   # convenience constants
bf215546Sopenharmony_ci   intmax = (1 << (bit_size - 1)) - 1
bf215546Sopenharmony_ci   intmin = 1 << (bit_size - 1)
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   optimizations += [
bf215546Sopenharmony_ci      (('iadd_sat@' + str(bit_size), a, b),
bf215546Sopenharmony_ci       ('bcsel', ('ige', b, 1), ('bcsel', ('ilt', ('iadd', a, b), a), intmax, ('iadd', a, b)),
bf215546Sopenharmony_ci                                ('bcsel', ('ilt', a, ('iadd', a, b)), intmin, ('iadd', a, b))), 'options->lower_iadd_sat'),
bf215546Sopenharmony_ci      (('isub_sat@' + str(bit_size), a, b),
bf215546Sopenharmony_ci       ('bcsel', ('ilt', b, 0), ('bcsel', ('ilt', ('isub', a, b), a), intmax, ('isub', a, b)),
bf215546Sopenharmony_ci                                ('bcsel', ('ilt', a, ('isub', a, b)), intmin, ('isub', a, b))), 'options->lower_iadd_sat'),
bf215546Sopenharmony_ci   ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ciinvert = OrderedDict([('feq', 'fneu'), ('fneu', 'feq')])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor left, right in itertools.combinations_with_replacement(invert.keys(), 2):
bf215546Sopenharmony_ci   optimizations.append((('inot', ('ior(is_used_once)', (left, a, b), (right, c, d))),
bf215546Sopenharmony_ci                         ('iand', (invert[left], a, b), (invert[right], c, d))))
bf215546Sopenharmony_ci   optimizations.append((('inot', ('iand(is_used_once)', (left, a, b), (right, c, d))),
bf215546Sopenharmony_ci                         ('ior', (invert[left], a, b), (invert[right], c, d))))
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Optimize x2bN(b2x(x)) -> x
bf215546Sopenharmony_cifor size in type_sizes('bool'):
bf215546Sopenharmony_ci    aN = 'a@' + str(size)
bf215546Sopenharmony_ci    f2bN = 'f2b' + str(size)
bf215546Sopenharmony_ci    i2bN = 'i2b' + str(size)
bf215546Sopenharmony_ci    optimizations.append(((f2bN, ('b2f', aN)), a))
bf215546Sopenharmony_ci    optimizations.append(((i2bN, ('b2i', aN)), a))
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Optimize x2yN(b2x(x)) -> b2y
bf215546Sopenharmony_cifor x, y in itertools.product(['f', 'u', 'i'], ['f', 'u', 'i']):
bf215546Sopenharmony_ci   if x != 'f' and y != 'f' and x != y:
bf215546Sopenharmony_ci      continue
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   b2x = 'b2f' if x == 'f' else 'b2i'
bf215546Sopenharmony_ci   b2y = 'b2f' if y == 'f' else 'b2i'
bf215546Sopenharmony_ci   x2yN = '{}2{}'.format(x, y)
bf215546Sopenharmony_ci   optimizations.append(((x2yN, (b2x, a)), (b2y, a)))
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Optimize away x2xN(a@N)
bf215546Sopenharmony_cifor t in ['int', 'uint', 'float', 'bool']:
bf215546Sopenharmony_ci   for N in type_sizes(t):
bf215546Sopenharmony_ci      x2xN = '{0}2{0}{1}'.format(t[0], N)
bf215546Sopenharmony_ci      aN = 'a@{0}'.format(N)
bf215546Sopenharmony_ci      optimizations.append(((x2xN, aN), a))
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Optimize x2xN(y2yM(a@P)) -> y2yN(a) for integers
bf215546Sopenharmony_ci# In particular, we can optimize away everything except upcast of downcast and
bf215546Sopenharmony_ci# upcasts where the type differs from the other cast
bf215546Sopenharmony_cifor N, M in itertools.product(type_sizes('uint'), type_sizes('uint')):
bf215546Sopenharmony_ci   if N < M:
bf215546Sopenharmony_ci      # The outer cast is a down-cast.  It doesn't matter what the size of the
bf215546Sopenharmony_ci      # argument of the inner cast is because we'll never been in the upcast
bf215546Sopenharmony_ci      # of downcast case.  Regardless of types, we'll always end up with y2yN
bf215546Sopenharmony_ci      # in the end.
bf215546Sopenharmony_ci      for x, y in itertools.product(['i', 'u'], ['i', 'u']):
bf215546Sopenharmony_ci         x2xN = '{0}2{0}{1}'.format(x, N)
bf215546Sopenharmony_ci         y2yM = '{0}2{0}{1}'.format(y, M)
bf215546Sopenharmony_ci         y2yN = '{0}2{0}{1}'.format(y, N)
bf215546Sopenharmony_ci         optimizations.append(((x2xN, (y2yM, a)), (y2yN, a)))
bf215546Sopenharmony_ci   elif N > M:
bf215546Sopenharmony_ci      # If the outer cast is an up-cast, we have to be more careful about the
bf215546Sopenharmony_ci      # size of the argument of the inner cast and with types.  In this case,
bf215546Sopenharmony_ci      # the type is always the type of type up-cast which is given by the
bf215546Sopenharmony_ci      # outer cast.
bf215546Sopenharmony_ci      for P in type_sizes('uint'):
bf215546Sopenharmony_ci         # We can't optimize away up-cast of down-cast.
bf215546Sopenharmony_ci         if M < P:
bf215546Sopenharmony_ci            continue
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci         # Because we're doing down-cast of down-cast, the types always have
bf215546Sopenharmony_ci         # to match between the two casts
bf215546Sopenharmony_ci         for x in ['i', 'u']:
bf215546Sopenharmony_ci            x2xN = '{0}2{0}{1}'.format(x, N)
bf215546Sopenharmony_ci            x2xM = '{0}2{0}{1}'.format(x, M)
bf215546Sopenharmony_ci            aP = 'a@{0}'.format(P)
bf215546Sopenharmony_ci            optimizations.append(((x2xN, (x2xM, aP)), (x2xN, a)))
bf215546Sopenharmony_ci   else:
bf215546Sopenharmony_ci      # The N == M case is handled by other optimizations
bf215546Sopenharmony_ci      pass
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Downcast operations should be able to see through pack
bf215546Sopenharmony_cifor t in ['i', 'u']:
bf215546Sopenharmony_ci    for N in [8, 16, 32]:
bf215546Sopenharmony_ci        x2xN = '{0}2{0}{1}'.format(t, N)
bf215546Sopenharmony_ci        optimizations += [
bf215546Sopenharmony_ci            ((x2xN, ('pack_64_2x32_split', a, b)), (x2xN, a)),
bf215546Sopenharmony_ci            ((x2xN, ('pack_64_2x32_split', a, b)), (x2xN, a)),
bf215546Sopenharmony_ci        ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Optimize comparisons with up-casts
bf215546Sopenharmony_cifor t in ['int', 'uint', 'float']:
bf215546Sopenharmony_ci    for N, M in itertools.product(type_sizes(t), repeat=2):
bf215546Sopenharmony_ci        if N == 1 or N >= M:
bf215546Sopenharmony_ci            continue
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci        cond = 'true'
bf215546Sopenharmony_ci        if N == 8:
bf215546Sopenharmony_ci            cond = 'options->support_8bit_alu'
bf215546Sopenharmony_ci        elif N == 16:
bf215546Sopenharmony_ci            cond = 'options->support_16bit_alu'
bf215546Sopenharmony_ci        x2xM = '{0}2{0}{1}'.format(t[0], M)
bf215546Sopenharmony_ci        x2xN = '{0}2{0}{1}'.format(t[0], N)
bf215546Sopenharmony_ci        aN = 'a@' + str(N)
bf215546Sopenharmony_ci        bN = 'b@' + str(N)
bf215546Sopenharmony_ci        xeq = 'feq' if t == 'float' else 'ieq'
bf215546Sopenharmony_ci        xne = 'fneu' if t == 'float' else 'ine'
bf215546Sopenharmony_ci        xge = '{0}ge'.format(t[0])
bf215546Sopenharmony_ci        xlt = '{0}lt'.format(t[0])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci        # Up-casts are lossless so for correctly signed comparisons of
bf215546Sopenharmony_ci        # up-casted values we can do the comparison at the largest of the two
bf215546Sopenharmony_ci        # original sizes and drop one or both of the casts.  (We have
bf215546Sopenharmony_ci        # optimizations to drop the no-op casts which this may generate.)
bf215546Sopenharmony_ci        for P in type_sizes(t):
bf215546Sopenharmony_ci            if P == 1 or P > N:
bf215546Sopenharmony_ci                continue
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci            bP = 'b@' + str(P)
bf215546Sopenharmony_ci            optimizations += [
bf215546Sopenharmony_ci                ((xeq, (x2xM, aN), (x2xM, bP)), (xeq, a, (x2xN, b)), cond),
bf215546Sopenharmony_ci                ((xne, (x2xM, aN), (x2xM, bP)), (xne, a, (x2xN, b)), cond),
bf215546Sopenharmony_ci                ((xge, (x2xM, aN), (x2xM, bP)), (xge, a, (x2xN, b)), cond),
bf215546Sopenharmony_ci                ((xlt, (x2xM, aN), (x2xM, bP)), (xlt, a, (x2xN, b)), cond),
bf215546Sopenharmony_ci                ((xge, (x2xM, bP), (x2xM, aN)), (xge, (x2xN, b), a), cond),
bf215546Sopenharmony_ci                ((xlt, (x2xM, bP), (x2xM, aN)), (xlt, (x2xN, b), a), cond),
bf215546Sopenharmony_ci            ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci        # The next bit doesn't work on floats because the range checks would
bf215546Sopenharmony_ci        # get way too complicated.
bf215546Sopenharmony_ci        if t in ['int', 'uint']:
bf215546Sopenharmony_ci            if t == 'int':
bf215546Sopenharmony_ci                xN_min = -(1 << (N - 1))
bf215546Sopenharmony_ci                xN_max = (1 << (N - 1)) - 1
bf215546Sopenharmony_ci            elif t == 'uint':
bf215546Sopenharmony_ci                xN_min = 0
bf215546Sopenharmony_ci                xN_max = (1 << N) - 1
bf215546Sopenharmony_ci            else:
bf215546Sopenharmony_ci                assert False
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci            # If we're up-casting and comparing to a constant, we can unfold
bf215546Sopenharmony_ci            # the comparison into a comparison with the shrunk down constant
bf215546Sopenharmony_ci            # and a check that the constant fits in the smaller bit size.
bf215546Sopenharmony_ci            optimizations += [
bf215546Sopenharmony_ci                ((xeq, (x2xM, aN), '#b'),
bf215546Sopenharmony_ci                 ('iand', (xeq, a, (x2xN, b)), (xeq, (x2xM, (x2xN, b)), b)), cond),
bf215546Sopenharmony_ci                ((xne, (x2xM, aN), '#b'),
bf215546Sopenharmony_ci                 ('ior', (xne, a, (x2xN, b)), (xne, (x2xM, (x2xN, b)), b)), cond),
bf215546Sopenharmony_ci                ((xlt, (x2xM, aN), '#b'),
bf215546Sopenharmony_ci                 ('iand', (xlt, xN_min, b),
bf215546Sopenharmony_ci                          ('ior', (xlt, xN_max, b), (xlt, a, (x2xN, b)))), cond),
bf215546Sopenharmony_ci                ((xlt, '#a', (x2xM, bN)),
bf215546Sopenharmony_ci                 ('iand', (xlt, a, xN_max),
bf215546Sopenharmony_ci                          ('ior', (xlt, a, xN_min), (xlt, (x2xN, a), b))), cond),
bf215546Sopenharmony_ci                ((xge, (x2xM, aN), '#b'),
bf215546Sopenharmony_ci                 ('iand', (xge, xN_max, b),
bf215546Sopenharmony_ci                          ('ior', (xge, xN_min, b), (xge, a, (x2xN, b)))), cond),
bf215546Sopenharmony_ci                ((xge, '#a', (x2xM, bN)),
bf215546Sopenharmony_ci                 ('iand', (xge, a, xN_min),
bf215546Sopenharmony_ci                          ('ior', (xge, a, xN_max), (xge, (x2xN, a), b))), cond),
bf215546Sopenharmony_ci            ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Convert masking followed by signed downcast to just unsigned downcast
bf215546Sopenharmony_cioptimizations += [
bf215546Sopenharmony_ci    (('i2i32', ('iand', 'a@64', 0xffffffff)), ('u2u32', a)),
bf215546Sopenharmony_ci    (('i2i16', ('iand', 'a@32', 0xffff)), ('u2u16', a)),
bf215546Sopenharmony_ci    (('i2i16', ('iand', 'a@64', 0xffff)), ('u2u16', a)),
bf215546Sopenharmony_ci    (('i2i8', ('iand', 'a@16', 0xff)), ('u2u8', a)),
bf215546Sopenharmony_ci    (('i2i8', ('iand', 'a@32', 0xff)), ('u2u8', a)),
bf215546Sopenharmony_ci    (('i2i8', ('iand', 'a@64', 0xff)), ('u2u8', a)),
bf215546Sopenharmony_ci]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Some operations such as iadd have the property that the bottom N bits of the
bf215546Sopenharmony_ci# output only depends on the bottom N bits of each of the inputs so we can
bf215546Sopenharmony_ci# remove casts
bf215546Sopenharmony_cifor N in [16, 32]:
bf215546Sopenharmony_ci    for M in [8, 16]:
bf215546Sopenharmony_ci        if M >= N:
bf215546Sopenharmony_ci            continue
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci        aN = 'a@' + str(N)
bf215546Sopenharmony_ci        u2uM = 'u2u{0}'.format(M)
bf215546Sopenharmony_ci        i2iM = 'i2i{0}'.format(M)
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci        for x in ['u', 'i']:
bf215546Sopenharmony_ci            x2xN = '{0}2{0}{1}'.format(x, N)
bf215546Sopenharmony_ci            extract_xM = 'extract_{0}{1}'.format(x, M)
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci            x2xN_M_bits = '{0}(only_lower_{1}_bits_used)'.format(x2xN, M)
bf215546Sopenharmony_ci            extract_xM_M_bits = \
bf215546Sopenharmony_ci                '{0}(only_lower_{1}_bits_used)'.format(extract_xM, M)
bf215546Sopenharmony_ci            optimizations += [
bf215546Sopenharmony_ci                ((x2xN_M_bits, (u2uM, aN)), a),
bf215546Sopenharmony_ci                ((extract_xM_M_bits, aN, 0), a),
bf215546Sopenharmony_ci            ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci            bcsel_M_bits = 'bcsel(only_lower_{0}_bits_used)'.format(M)
bf215546Sopenharmony_ci            optimizations += [
bf215546Sopenharmony_ci                ((bcsel_M_bits, c, (x2xN, (u2uM, aN)), b), ('bcsel', c, a, b)),
bf215546Sopenharmony_ci                ((bcsel_M_bits, c, (x2xN, (i2iM, aN)), b), ('bcsel', c, a, b)),
bf215546Sopenharmony_ci                ((bcsel_M_bits, c, (extract_xM, aN, 0), b), ('bcsel', c, a, b)),
bf215546Sopenharmony_ci            ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci            for op in ['iadd', 'imul', 'iand', 'ior', 'ixor']:
bf215546Sopenharmony_ci                op_M_bits = '{0}(only_lower_{1}_bits_used)'.format(op, M)
bf215546Sopenharmony_ci                optimizations += [
bf215546Sopenharmony_ci                    ((op_M_bits, (x2xN, (u2uM, aN)), b), (op, a, b)),
bf215546Sopenharmony_ci                    ((op_M_bits, (x2xN, (i2iM, aN)), b), (op, a, b)),
bf215546Sopenharmony_ci                    ((op_M_bits, (extract_xM, aN, 0), b), (op, a, b)),
bf215546Sopenharmony_ci                ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_cidef fexp2i(exp, bits):
bf215546Sopenharmony_ci   # Generate an expression which constructs value 2.0^exp or 0.0.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # We assume that exp is already in a valid range:
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   #   * [-15, 15] for 16-bit float
bf215546Sopenharmony_ci   #   * [-127, 127] for 32-bit float
bf215546Sopenharmony_ci   #   * [-1023, 1023] for 16-bit float
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # If exp is the lowest value in the valid range, a value of 0.0 is
bf215546Sopenharmony_ci   # constructed.  Otherwise, the value 2.0^exp is constructed.
bf215546Sopenharmony_ci   if bits == 16:
bf215546Sopenharmony_ci      return ('i2i16', ('ishl', ('iadd', exp, 15), 10))
bf215546Sopenharmony_ci   elif bits == 32:
bf215546Sopenharmony_ci      return ('ishl', ('iadd', exp, 127), 23)
bf215546Sopenharmony_ci   elif bits == 64:
bf215546Sopenharmony_ci      return ('pack_64_2x32_split', 0, ('ishl', ('iadd', exp, 1023), 20))
bf215546Sopenharmony_ci   else:
bf215546Sopenharmony_ci      assert False
bf215546Sopenharmony_ci
bf215546Sopenharmony_cidef ldexp(f, exp, bits):
bf215546Sopenharmony_ci   # The maximum possible range for a normal exponent is [-126, 127] and,
bf215546Sopenharmony_ci   # throwing in denormals, you get a maximum range of [-149, 127].  This
bf215546Sopenharmony_ci   # means that we can potentially have a swing of +-276.  If you start with
bf215546Sopenharmony_ci   # FLT_MAX, you actually have to do ldexp(FLT_MAX, -278) to get it to flush
bf215546Sopenharmony_ci   # all the way to zero.  The GLSL spec only requires that we handle a subset
bf215546Sopenharmony_ci   # of this range.  From version 4.60 of the spec:
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   #    "If exp is greater than +128 (single-precision) or +1024
bf215546Sopenharmony_ci   #    (double-precision), the value returned is undefined. If exp is less
bf215546Sopenharmony_ci   #    than -126 (single-precision) or -1022 (double-precision), the value
bf215546Sopenharmony_ci   #    returned may be flushed to zero. Additionally, splitting the value
bf215546Sopenharmony_ci   #    into a significand and exponent using frexp() and then reconstructing
bf215546Sopenharmony_ci   #    a floating-point value using ldexp() should yield the original input
bf215546Sopenharmony_ci   #    for zero and all finite non-denormalized values."
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # The SPIR-V spec has similar language.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # In order to handle the maximum value +128 using the fexp2i() helper
bf215546Sopenharmony_ci   # above, we have to split the exponent in half and do two multiply
bf215546Sopenharmony_ci   # operations.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # First, we clamp exp to a reasonable range.  Specifically, we clamp to
bf215546Sopenharmony_ci   # twice the full range that is valid for the fexp2i() function above.  If
bf215546Sopenharmony_ci   # exp/2 is the bottom value of that range, the fexp2i() expression will
bf215546Sopenharmony_ci   # yield 0.0f which, when multiplied by f, will flush it to zero which is
bf215546Sopenharmony_ci   # allowed by the GLSL and SPIR-V specs for low exponent values.  If the
bf215546Sopenharmony_ci   # value is clamped from above, then it must have been above the supported
bf215546Sopenharmony_ci   # range of the GLSL built-in and therefore any return value is acceptable.
bf215546Sopenharmony_ci   if bits == 16:
bf215546Sopenharmony_ci      exp = ('imin', ('imax', exp, -30), 30)
bf215546Sopenharmony_ci   elif bits == 32:
bf215546Sopenharmony_ci      exp = ('imin', ('imax', exp, -254), 254)
bf215546Sopenharmony_ci   elif bits == 64:
bf215546Sopenharmony_ci      exp = ('imin', ('imax', exp, -2046), 2046)
bf215546Sopenharmony_ci   else:
bf215546Sopenharmony_ci      assert False
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Now we compute two powers of 2, one for exp/2 and one for exp-exp/2.
bf215546Sopenharmony_ci   # (We use ishr which isn't the same for -1, but the -1 case still works
bf215546Sopenharmony_ci   # since we use exp-exp/2 as the second exponent.)  While the spec
bf215546Sopenharmony_ci   # technically defines ldexp as f * 2.0^exp, simply multiplying once doesn't
bf215546Sopenharmony_ci   # work with denormals and doesn't allow for the full swing in exponents
bf215546Sopenharmony_ci   # that you can get with normalized values.  Instead, we create two powers
bf215546Sopenharmony_ci   # of two and multiply by them each in turn.  That way the effective range
bf215546Sopenharmony_ci   # of our exponent is doubled.
bf215546Sopenharmony_ci   pow2_1 = fexp2i(('ishr', exp, 1), bits)
bf215546Sopenharmony_ci   pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1)), bits)
bf215546Sopenharmony_ci   return ('fmul', ('fmul', f, pow2_1), pow2_2)
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations += [
bf215546Sopenharmony_ci   (('ldexp@16', 'x', 'exp'), ldexp('x', 'exp', 16), 'options->lower_ldexp'),
bf215546Sopenharmony_ci   (('ldexp@32', 'x', 'exp'), ldexp('x', 'exp', 32), 'options->lower_ldexp'),
bf215546Sopenharmony_ci   (('ldexp@64', 'x', 'exp'), ldexp('x', 'exp', 64), 'options->lower_ldexp'),
bf215546Sopenharmony_ci]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Unreal Engine 4 demo applications open-codes bitfieldReverse()
bf215546Sopenharmony_cidef bitfield_reverse_ue4(u):
bf215546Sopenharmony_ci    step1 = ('ior', ('ishl', u, 16), ('ushr', u, 16))
bf215546Sopenharmony_ci    step2 = ('ior', ('ishl', ('iand', step1, 0x00ff00ff), 8), ('ushr', ('iand', step1, 0xff00ff00), 8))
bf215546Sopenharmony_ci    step3 = ('ior', ('ishl', ('iand', step2, 0x0f0f0f0f), 4), ('ushr', ('iand', step2, 0xf0f0f0f0), 4))
bf215546Sopenharmony_ci    step4 = ('ior', ('ishl', ('iand', step3, 0x33333333), 2), ('ushr', ('iand', step3, 0xcccccccc), 2))
bf215546Sopenharmony_ci    step5 = ('ior(many-comm-expr)', ('ishl', ('iand', step4, 0x55555555), 1), ('ushr', ('iand', step4, 0xaaaaaaaa), 1))
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    return step5
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Cyberpunk 2077 open-codes bitfieldReverse()
bf215546Sopenharmony_cidef bitfield_reverse_cp2077(u):
bf215546Sopenharmony_ci    step1 = ('ior', ('ishl', u, 16), ('ushr', u, 16))
bf215546Sopenharmony_ci    step2 = ('ior', ('iand', ('ishl', step1, 1), 0xaaaaaaaa), ('iand', ('ushr', step1, 1), 0x55555555))
bf215546Sopenharmony_ci    step3 = ('ior', ('iand', ('ishl', step2, 2), 0xcccccccc), ('iand', ('ushr', step2, 2), 0x33333333))
bf215546Sopenharmony_ci    step4 = ('ior', ('iand', ('ishl', step3, 4), 0xf0f0f0f0), ('iand', ('ushr', step3, 4), 0x0f0f0f0f))
bf215546Sopenharmony_ci    step5 = ('ior(many-comm-expr)', ('iand', ('ishl', step4, 8), 0xff00ff00), ('iand', ('ushr', step4, 8), 0x00ff00ff))
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    return step5
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations += [(bitfield_reverse_ue4('x@32'), ('bitfield_reverse', 'x'), '!options->lower_bitfield_reverse')]
bf215546Sopenharmony_cioptimizations += [(bitfield_reverse_cp2077('x@32'), ('bitfield_reverse', 'x'), '!options->lower_bitfield_reverse')]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# "all_equal(eq(a, b), vec(~0))" is the same as "all_equal(a, b)"
bf215546Sopenharmony_ci# "any_nequal(neq(a, b), vec(0))" is the same as "any_nequal(a, b)"
bf215546Sopenharmony_cifor ncomp in [2, 3, 4, 8, 16]:
bf215546Sopenharmony_ci   optimizations += [
bf215546Sopenharmony_ci      (('ball_iequal' + str(ncomp), ('ieq', a, b), ~0), ('ball_iequal' + str(ncomp), a, b)),
bf215546Sopenharmony_ci      (('ball_iequal' + str(ncomp), ('feq', a, b), ~0), ('ball_fequal' + str(ncomp), a, b)),
bf215546Sopenharmony_ci      (('bany_inequal' + str(ncomp), ('ine', a, b), 0), ('bany_inequal' + str(ncomp), a, b)),
bf215546Sopenharmony_ci      (('bany_inequal' + str(ncomp), ('fneu', a, b), 0), ('bany_fnequal' + str(ncomp), a, b)),
bf215546Sopenharmony_ci   ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# For any float comparison operation, "cmp", if you have "a == a && a cmp b"
bf215546Sopenharmony_ci# then the "a == a" is redundant because it's equivalent to "a is not NaN"
bf215546Sopenharmony_ci# and, if a is a NaN then the second comparison will fail anyway.
bf215546Sopenharmony_cifor op in ['flt', 'fge', 'feq']:
bf215546Sopenharmony_ci   optimizations += [
bf215546Sopenharmony_ci      (('iand', ('feq', a, a), (op, a, b)), ('!' + op, a, b)),
bf215546Sopenharmony_ci      (('iand', ('feq', a, a), (op, b, a)), ('!' + op, b, a)),
bf215546Sopenharmony_ci   ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Add optimizations to handle the case where the result of a ternary is
bf215546Sopenharmony_ci# compared to a constant.  This way we can take things like
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# (a ? 0 : 1) > 0
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# and turn it into
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# a ? (0 > 0) : (1 > 0)
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# which constant folding will eat for lunch.  The resulting ternary will
bf215546Sopenharmony_ci# further get cleaned up by the boolean reductions above and we will be
bf215546Sopenharmony_ci# left with just the original variable "a".
bf215546Sopenharmony_cifor op in ['feq', 'fneu', 'ieq', 'ine']:
bf215546Sopenharmony_ci   optimizations += [
bf215546Sopenharmony_ci      ((op, ('bcsel', 'a', '#b', '#c'), '#d'),
bf215546Sopenharmony_ci       ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))),
bf215546Sopenharmony_ci   ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor op in ['flt', 'fge', 'ilt', 'ige', 'ult', 'uge']:
bf215546Sopenharmony_ci   optimizations += [
bf215546Sopenharmony_ci      ((op, ('bcsel', 'a', '#b', '#c'), '#d'),
bf215546Sopenharmony_ci       ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))),
bf215546Sopenharmony_ci      ((op, '#d', ('bcsel', a, '#b', '#c')),
bf215546Sopenharmony_ci       ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
bf215546Sopenharmony_ci   ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# For example, this converts things like
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci#    1 + mix(0, a - 1, condition)
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# into
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci#    mix(1, (a-1)+1, condition)
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# Other optimizations will rearrange the constants.
bf215546Sopenharmony_cifor op in ['fadd', 'fmul', 'fmulz', 'iadd', 'imul']:
bf215546Sopenharmony_ci   optimizations += [
bf215546Sopenharmony_ci      ((op, ('bcsel(is_used_once)', a, '#b', c), '#d'), ('bcsel', a, (op, b, d), (op, c, d)))
bf215546Sopenharmony_ci   ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# For derivatives in compute shaders, GLSL_NV_compute_shader_derivatives
bf215546Sopenharmony_ci# states:
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci#     If neither layout qualifier is specified, derivatives in compute shaders
bf215546Sopenharmony_ci#     return zero, which is consistent with the handling of built-in texture
bf215546Sopenharmony_ci#     functions like texture() in GLSL 4.50 compute shaders.
bf215546Sopenharmony_cifor op in ['fddx', 'fddx_fine', 'fddx_coarse',
bf215546Sopenharmony_ci           'fddy', 'fddy_fine', 'fddy_coarse']:
bf215546Sopenharmony_ci   optimizations += [
bf215546Sopenharmony_ci      ((op, 'a'), 0.0, 'info->stage == MESA_SHADER_COMPUTE && info->cs.derivative_group == DERIVATIVE_GROUP_NONE')
bf215546Sopenharmony_ci]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Some optimizations for ir3-specific instructions.
bf215546Sopenharmony_cioptimizations += [
bf215546Sopenharmony_ci   # 'al * bl': If either 'al' or 'bl' is zero, return zero.
bf215546Sopenharmony_ci   (('umul_low', '#a(is_lower_half_zero)', 'b'), (0)),
bf215546Sopenharmony_ci   # '(ah * bl) << 16 + c': If either 'ah' or 'bl' is zero, return 'c'.
bf215546Sopenharmony_ci   (('imadsh_mix16', '#a@32(is_lower_half_zero)', 'b@32', 'c@32'), ('c')),
bf215546Sopenharmony_ci   (('imadsh_mix16', 'a@32', '#b@32(is_upper_half_zero)', 'c@32'), ('c')),
bf215546Sopenharmony_ci]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# These kinds of sequences can occur after nir_opt_peephole_select.
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# NOTE: fadd is not handled here because that gets in the way of ffma
bf215546Sopenharmony_ci# generation in the i965 driver.  Instead, fadd and ffma are handled in
bf215546Sopenharmony_ci# late_optimizations.
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor op in ['flrp']:
bf215546Sopenharmony_ci    optimizations += [
bf215546Sopenharmony_ci        (('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, c, e)), (op, b, c, ('bcsel', a, d, e))),
bf215546Sopenharmony_ci        (('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, c, e)), (op, b, c, ('bcsel', a, d, e))),
bf215546Sopenharmony_ci        (('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, e, d)), (op, b, ('bcsel', a, c, e), d)),
bf215546Sopenharmony_ci        (('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, e, d)), (op, b, ('bcsel', a, c, e), d)),
bf215546Sopenharmony_ci        (('bcsel', a, (op + '(is_used_once)', b, c, d), (op, e, c, d)), (op, ('bcsel', a, b, e), c, d)),
bf215546Sopenharmony_ci        (('bcsel', a, (op, b, c, d), (op + '(is_used_once)', e, c, d)), (op, ('bcsel', a, b, e), c, d)),
bf215546Sopenharmony_ci    ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor op in ['fmulz', 'fmul', 'iadd', 'imul', 'iand', 'ior', 'ixor', 'fmin', 'fmax', 'imin', 'imax', 'umin', 'umax']:
bf215546Sopenharmony_ci    optimizations += [
bf215546Sopenharmony_ci        (('bcsel', a, (op + '(is_used_once)', b, c), (op, b, 'd(is_not_const)')), (op, b, ('bcsel', a, c, d))),
bf215546Sopenharmony_ci        (('bcsel', a, (op + '(is_used_once)', b, 'c(is_not_const)'), (op, b, d)), (op, b, ('bcsel', a, c, d))),
bf215546Sopenharmony_ci        (('bcsel', a, (op, b, 'c(is_not_const)'), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))),
bf215546Sopenharmony_ci        (('bcsel', a, (op, b, c), (op + '(is_used_once)', b, 'd(is_not_const)')), (op, b, ('bcsel', a, c, d))),
bf215546Sopenharmony_ci    ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor op in ['fpow']:
bf215546Sopenharmony_ci    optimizations += [
bf215546Sopenharmony_ci        (('bcsel', a, (op + '(is_used_once)', b, c), (op, b, d)), (op, b, ('bcsel', a, c, d))),
bf215546Sopenharmony_ci        (('bcsel', a, (op, b, c), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))),
bf215546Sopenharmony_ci        (('bcsel', a, (op + '(is_used_once)', b, c), (op, d, c)), (op, ('bcsel', a, b, d), c)),
bf215546Sopenharmony_ci        (('bcsel', a, (op, b, c), (op + '(is_used_once)', d, c)), (op, ('bcsel', a, b, d), c)),
bf215546Sopenharmony_ci    ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos', 'fsin_amd', 'fcos_amd', 'fneg', 'fabs', 'fsign']:
bf215546Sopenharmony_ci    optimizations += [
bf215546Sopenharmony_ci        (('bcsel', c, (op + '(is_used_once)', a), (op + '(is_used_once)', b)), (op, ('bcsel', c, a, b))),
bf215546Sopenharmony_ci    ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor op in ['ineg', 'iabs', 'inot', 'isign']:
bf215546Sopenharmony_ci    optimizations += [
bf215546Sopenharmony_ci        ((op, ('bcsel', c, '#a', '#b')), ('bcsel', c, (op, a), (op, b))),
bf215546Sopenharmony_ci    ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_cioptimizations.extend([
bf215546Sopenharmony_ci    (('fisnormal', 'a@16'), ('ult', 0xfff, ('iadd', ('ishl', a, 1), 0x800)), 'options->lower_fisnormal'),
bf215546Sopenharmony_ci    (('fisnormal', 'a@32'), ('ult', 0x1ffffff, ('iadd', ('ishl', a, 1), 0x1000000)), 'options->lower_fisnormal'),
bf215546Sopenharmony_ci    (('fisnormal', 'a@64'), ('ult', 0x3fffffffffffff, ('iadd', ('ishl', a, 1), 0x20000000000000)), 'options->lower_fisnormal')
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# This section contains optimizations to propagate downsizing conversions of
bf215546Sopenharmony_ci# constructed vectors into vectors of downsized components. Whether this is
bf215546Sopenharmony_ci# useful depends on the SIMD semantics of the backend. On a true SIMD machine,
bf215546Sopenharmony_ci# this reduces the register pressure of the vector itself and often enables the
bf215546Sopenharmony_ci# conversions to be eliminated via other algebraic rules or constant folding.
bf215546Sopenharmony_ci# In the worst case on a SIMD architecture, the propagated conversions may be
bf215546Sopenharmony_ci# revectorized via nir_opt_vectorize so instruction count is minimally
bf215546Sopenharmony_ci# impacted.
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# On a machine with SIMD-within-a-register only, this actually
bf215546Sopenharmony_ci# counterintuitively hurts instruction count. These machines are the same that
bf215546Sopenharmony_ci# require vectorize_vec2_16bit, so we predicate the optimizations on that flag
bf215546Sopenharmony_ci# not being set.
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# Finally for scalar architectures, there should be no difference in generated
bf215546Sopenharmony_ci# code since it all ends up scalarized at the end, but it might minimally help
bf215546Sopenharmony_ci# compile-times.
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor i in range(2, 4 + 1):
bf215546Sopenharmony_ci   for T in ('f', 'u', 'i'):
bf215546Sopenharmony_ci      vec_inst = ('vec' + str(i),)
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      indices = ['a', 'b', 'c', 'd']
bf215546Sopenharmony_ci      suffix_in = tuple((indices[j] + '@32') for j in range(i))
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      to_16 = '{}2{}16'.format(T, T)
bf215546Sopenharmony_ci      to_mp = '{}2{}mp'.format(T, T)
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      out_16 = tuple((to_16, indices[j]) for j in range(i))
bf215546Sopenharmony_ci      out_mp = tuple((to_mp, indices[j]) for j in range(i))
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      optimizations  += [
bf215546Sopenharmony_ci         ((to_16, vec_inst + suffix_in), vec_inst + out_16, '!options->vectorize_vec2_16bit'),
bf215546Sopenharmony_ci      ]
bf215546Sopenharmony_ci      # u2ump doesn't exist, because it's equal to i2imp
bf215546Sopenharmony_ci      if T in ['f', 'i']:
bf215546Sopenharmony_ci          optimizations  += [
bf215546Sopenharmony_ci             ((to_mp, vec_inst + suffix_in), vec_inst + out_mp, '!options->vectorize_vec2_16bit')
bf215546Sopenharmony_ci          ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# This section contains "late" optimizations that should be run before
bf215546Sopenharmony_ci# creating ffmas and calling regular optimizations for the final time.
bf215546Sopenharmony_ci# Optimizations should go here if they help code generation and conflict
bf215546Sopenharmony_ci# with the regular optimizations.
bf215546Sopenharmony_cibefore_ffma_optimizations = [
bf215546Sopenharmony_ci   # Propagate constants down multiplication chains
bf215546Sopenharmony_ci   (('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fmul', ('fmul', a, c), b)),
bf215546Sopenharmony_ci   (('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('imul', ('imul', a, c), b)),
bf215546Sopenharmony_ci   (('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fadd', ('fadd', a, c), b)),
bf215546Sopenharmony_ci   (('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('iadd', ('iadd', a, c), b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
bf215546Sopenharmony_ci   (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
bf215546Sopenharmony_ci   (('~fadd', ('fneg', a), a), 0.0),
bf215546Sopenharmony_ci   (('iadd', ('ineg', a), a), 0),
bf215546Sopenharmony_ci   (('iadd', ('ineg', a), ('iadd', a, b)), b),
bf215546Sopenharmony_ci   (('iadd', a, ('iadd', ('ineg', a), b)), b),
bf215546Sopenharmony_ci   (('~fadd', ('fneg', a), ('fadd', a, b)), b),
bf215546Sopenharmony_ci   (('~fadd', a, ('fadd', ('fneg', a), b)), b),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~flrp', ('fadd(is_used_once)', a, -1.0), ('fadd(is_used_once)', a,  1.0), d), ('fadd', ('flrp', -1.0,  1.0, d), a)),
bf215546Sopenharmony_ci   (('~flrp', ('fadd(is_used_once)', a,  1.0), ('fadd(is_used_once)', a, -1.0), d), ('fadd', ('flrp',  1.0, -1.0, d), a)),
bf215546Sopenharmony_ci   (('~flrp', ('fadd(is_used_once)', a, '#b'), ('fadd(is_used_once)', a, '#c'), d), ('fadd', ('fmul', d, ('fadd', c, ('fneg', b))), ('fadd', a, b))),
bf215546Sopenharmony_ci]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# This section contains "late" optimizations that should be run after the
bf215546Sopenharmony_ci# regular optimizations have finished.  Optimizations should go here if
bf215546Sopenharmony_ci# they help code generation but do not necessarily produce code that is
bf215546Sopenharmony_ci# more easily optimizable.
bf215546Sopenharmony_cilate_optimizations = [
bf215546Sopenharmony_ci   # The rearrangements are fine w.r.t. NaN.  However, they produce incorrect
bf215546Sopenharmony_ci   # results if one operand is +Inf and the other is -Inf.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # 1. Inf + -Inf = NaN
bf215546Sopenharmony_ci   # 2. ∀x: x + NaN = NaN and x - NaN = NaN
bf215546Sopenharmony_ci   # 3. ∀x: x != NaN = true
bf215546Sopenharmony_ci   # 4. ∀x, ∀ cmp ∈ {<, >, ≤, ≥, =}: x cmp NaN = false
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   #               a=Inf, b=-Inf   a=-Inf, b=Inf    a=NaN    b=NaN
bf215546Sopenharmony_ci   #  (a+b) < 0        false            false       false    false
bf215546Sopenharmony_ci   #      a < -b       false            false       false    false
bf215546Sopenharmony_ci   # -(a+b) < 0        false            false       false    false
bf215546Sopenharmony_ci   #     -a < b        false            false       false    false
bf215546Sopenharmony_ci   #  (a+b) >= 0       false            false       false    false
bf215546Sopenharmony_ci   #      a >= -b      true             true        false    false
bf215546Sopenharmony_ci   # -(a+b) >= 0       false            false       false    false
bf215546Sopenharmony_ci   #     -a >= b       true             true        false    false
bf215546Sopenharmony_ci   #  (a+b) == 0       false            false       false    false
bf215546Sopenharmony_ci   #      a == -b      true             true        false    false
bf215546Sopenharmony_ci   #  (a+b) != 0       true             true        true     true
bf215546Sopenharmony_ci   #      a != -b      false            false       true     true
bf215546Sopenharmony_ci   (('flt',                        ('fadd(is_used_once)', a, b),  0.0), ('flt',          a, ('fneg', b))),
bf215546Sopenharmony_ci   (('flt', ('fneg(is_used_once)', ('fadd(is_used_once)', a, b)), 0.0), ('flt', ('fneg', a),         b)),
bf215546Sopenharmony_ci   (('flt', 0.0,                        ('fadd(is_used_once)', a, b) ), ('flt', ('fneg', a),         b)),
bf215546Sopenharmony_ci   (('flt', 0.0, ('fneg(is_used_once)', ('fadd(is_used_once)', a, b))), ('flt',          a, ('fneg', b))),
bf215546Sopenharmony_ci   (('~fge',                        ('fadd(is_used_once)', a, b),  0.0), ('fge',          a, ('fneg', b))),
bf215546Sopenharmony_ci   (('~fge', ('fneg(is_used_once)', ('fadd(is_used_once)', a, b)), 0.0), ('fge', ('fneg', a),         b)),
bf215546Sopenharmony_ci   (('~fge', 0.0,                        ('fadd(is_used_once)', a, b) ), ('fge', ('fneg', a),         b)),
bf215546Sopenharmony_ci   (('~fge', 0.0, ('fneg(is_used_once)', ('fadd(is_used_once)', a, b))), ('fge',          a, ('fneg', b))),
bf215546Sopenharmony_ci   (('~feq', ('fadd(is_used_once)', a, b), 0.0), ('feq', a, ('fneg', b))),
bf215546Sopenharmony_ci   (('~fneu', ('fadd(is_used_once)', a, b), 0.0), ('fneu', a, ('fneg', b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # If either source must be finite, then the original (a+b) cannot produce
bf215546Sopenharmony_ci   # NaN due to Inf-Inf.  The patterns and the replacements produce the same
bf215546Sopenharmony_ci   # result if b is NaN. Therefore, the replacements are exact.
bf215546Sopenharmony_ci   (('fge',                        ('fadd(is_used_once)', 'a(is_finite)', b),  0.0), ('fge',          a, ('fneg', b))),
bf215546Sopenharmony_ci   (('fge', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_finite)', b)), 0.0), ('fge', ('fneg', a),         b)),
bf215546Sopenharmony_ci   (('fge', 0.0,                        ('fadd(is_used_once)', 'a(is_finite)', b) ), ('fge', ('fneg', a),         b)),
bf215546Sopenharmony_ci   (('fge', 0.0, ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_finite)', b))), ('fge',          a, ('fneg', b))),
bf215546Sopenharmony_ci   (('feq',  ('fadd(is_used_once)', 'a(is_finite)', b), 0.0), ('feq',  a, ('fneg', b))),
bf215546Sopenharmony_ci   (('fneu', ('fadd(is_used_once)', 'a(is_finite)', b), 0.0), ('fneu', a, ('fneg', b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # This is how SpvOpFOrdNotEqual might be implemented.  Replace it with
bf215546Sopenharmony_ci   # SpvOpLessOrGreater.
bf215546Sopenharmony_ci   (('iand', ('fneu', a, b),   ('iand', ('feq', a, a), ('feq', b, b))), ('ior', ('!flt', a, b), ('!flt', b, a))),
bf215546Sopenharmony_ci   (('iand', ('fneu', a, 0.0),          ('feq', a, a)                ), ('!flt', 0.0, ('fabs', a))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # This is how SpvOpFUnordEqual might be implemented.  Replace it with
bf215546Sopenharmony_ci   # !SpvOpLessOrGreater.
bf215546Sopenharmony_ci   (('ior', ('feq', a, b),   ('ior', ('fneu', a, a), ('fneu', b, b))), ('inot', ('ior', ('!flt', a, b), ('!flt', b, a)))),
bf215546Sopenharmony_ci   (('ior', ('feq', a, 0.0),         ('fneu', a, a),                ), ('inot', ('!flt', 0.0, ('fabs', a)))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # nir_lower_to_source_mods will collapse this, but its existence during the
bf215546Sopenharmony_ci   # optimization loop can prevent other optimizations.
bf215546Sopenharmony_ci   (('fneg', ('fneg', a)), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # re-combine inexact mul+add to ffma. Do this before fsub so that a * b - c
bf215546Sopenharmony_ci   # gets combined to fma(a, b, -c).
bf215546Sopenharmony_ci   (('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma16'),
bf215546Sopenharmony_ci   (('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma32'),
bf215546Sopenharmony_ci   (('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma64'),
bf215546Sopenharmony_ci   (('~fadd@32', ('fmulz', a, b), c), ('ffmaz', a, b, c), 'options->fuse_ffma32'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Subtractions get lowered during optimization, so we need to recombine them
bf215546Sopenharmony_ci   (('fadd@8', a, ('fneg', 'b')), ('fsub', 'a', 'b'), 'options->has_fsub'),
bf215546Sopenharmony_ci   (('fadd@16', a, ('fneg', 'b')), ('fsub', 'a', 'b'), 'options->has_fsub'),
bf215546Sopenharmony_ci   (('fadd@32', a, ('fneg', 'b')), ('fsub', 'a', 'b'), 'options->has_fsub'),
bf215546Sopenharmony_ci   (('fadd@64', a, ('fneg', 'b')), ('fsub', 'a', 'b'), 'options->has_fsub && !(options->lower_doubles_options & nir_lower_dsub)'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fneg', a), ('fmul', a, -1.0), 'options->lower_fneg'),
bf215546Sopenharmony_ci   (('iadd', a, ('ineg', 'b')), ('isub', 'a', 'b'), 'options->has_isub || options->lower_ineg'),
bf215546Sopenharmony_ci   (('ineg', a), ('isub', 0, a), 'options->lower_ineg'),
bf215546Sopenharmony_ci   (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'),
bf215546Sopenharmony_ci   (('iadd', ('isub(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, ('ineg', b), c), 'options->has_iadd3'),
bf215546Sopenharmony_ci   (('isub', ('isub(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, ('ineg', b), ('ineg', c)), 'options->has_iadd3'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    # fneg_lo / fneg_hi
bf215546Sopenharmony_ci   (('vec2(is_only_used_as_float)', ('fneg@16', a), b), ('fmul', ('vec2', a, b), ('vec2', -1.0, 1.0)), 'options->vectorize_vec2_16bit'),
bf215546Sopenharmony_ci   (('vec2(is_only_used_as_float)', a, ('fneg@16', b)), ('fmul', ('vec2', a, b), ('vec2', 1.0, -1.0)), 'options->vectorize_vec2_16bit'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # These are duplicated from the main optimizations table.  The late
bf215546Sopenharmony_ci   # patterns that rearrange expressions like x - .5 < 0 to x < .5 can create
bf215546Sopenharmony_ci   # new patterns like these.  The patterns that compare with zero are removed
bf215546Sopenharmony_ci   # because they are unlikely to be created in by anything in
bf215546Sopenharmony_ci   # late_optimizations.
bf215546Sopenharmony_ci   (('flt', '#b(is_gt_0_and_lt_1)', ('fsat(is_used_once)', a)), ('flt', b, a)),
bf215546Sopenharmony_ci   (('fge', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('fge', a, b)),
bf215546Sopenharmony_ci   (('feq', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('feq', a, b)),
bf215546Sopenharmony_ci   (('fneu', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('fneu', a, b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fge', ('fsat(is_used_once)', a), 1.0), ('fge', a, 1.0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~fge', ('fmin(is_used_once)', ('fadd(is_used_once)', a, b), ('fadd', c, d)), 0.0), ('iand', ('fge', a, ('fneg', b)), ('fge', c, ('fneg', d)))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('flt', ('fneg', a), ('fneg', b)), ('flt', b, a)),
bf215546Sopenharmony_ci   (('fge', ('fneg', a), ('fneg', b)), ('fge', b, a)),
bf215546Sopenharmony_ci   (('feq', ('fneg', a), ('fneg', b)), ('feq', b, a)),
bf215546Sopenharmony_ci   (('fneu', ('fneg', a), ('fneg', b)), ('fneu', b, a)),
bf215546Sopenharmony_ci   (('flt', ('fneg', a), -1.0), ('flt', 1.0, a)),
bf215546Sopenharmony_ci   (('flt', -1.0, ('fneg', a)), ('flt', a, 1.0)),
bf215546Sopenharmony_ci   (('fge', ('fneg', a), -1.0), ('fge', 1.0, a)),
bf215546Sopenharmony_ci   (('fge', -1.0, ('fneg', a)), ('fge', a, 1.0)),
bf215546Sopenharmony_ci   (('fneu', ('fneg', a), -1.0), ('fneu', 1.0, a)),
bf215546Sopenharmony_ci   (('feq', -1.0, ('fneg', a)), ('feq', a, 1.0)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('ior', a, a), a),
bf215546Sopenharmony_ci   (('iand', a, a), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fdot2', a, b), ('fdot2_replicated', a, b), 'options->fdot_replicates'),
bf215546Sopenharmony_ci   (('fdot3', a, b), ('fdot3_replicated', a, b), 'options->fdot_replicates'),
bf215546Sopenharmony_ci   (('fdot4', a, b), ('fdot4_replicated', a, b), 'options->fdot_replicates'),
bf215546Sopenharmony_ci   (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~flrp', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Approximate handling of fround_even for DX9 addressing from gallium nine on
bf215546Sopenharmony_ci   # DX9-class hardware with no proper fround support.  This is in
bf215546Sopenharmony_ci   # late_optimizations so that the is_integral() opts in the main pass get a
bf215546Sopenharmony_ci   # chance to eliminate the fround_even first.
bf215546Sopenharmony_ci   (('fround_even', a), ('bcsel',
bf215546Sopenharmony_ci                         ('feq', ('ffract', a), 0.5),
bf215546Sopenharmony_ci                         ('fadd', ('ffloor', ('fadd', a, 0.5)), 1.0),
bf215546Sopenharmony_ci                         ('ffloor', ('fadd', a, 0.5))), 'options->lower_fround_even'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # A similar operation could apply to any ffma(#a, b, #(-a/2)), but this
bf215546Sopenharmony_ci   # particular operation is common for expanding values stored in a texture
bf215546Sopenharmony_ci   # from [0,1] to [-1,1].
bf215546Sopenharmony_ci   (('~ffma@32', a,  2.0, -1.0), ('flrp', -1.0,  1.0,          a ), '!options->lower_flrp32'),
bf215546Sopenharmony_ci   (('~ffma@32', a, -2.0, -1.0), ('flrp', -1.0,  1.0, ('fneg', a)), '!options->lower_flrp32'),
bf215546Sopenharmony_ci   (('~ffma@32', a, -2.0,  1.0), ('flrp',  1.0, -1.0,          a ), '!options->lower_flrp32'),
bf215546Sopenharmony_ci   (('~ffma@32', a,  2.0,  1.0), ('flrp',  1.0, -1.0, ('fneg', a)), '!options->lower_flrp32'),
bf215546Sopenharmony_ci   (('~fadd@32', ('fmul(is_used_once)',  2.0, a), -1.0), ('flrp', -1.0,  1.0,          a ), '!options->lower_flrp32'),
bf215546Sopenharmony_ci   (('~fadd@32', ('fmul(is_used_once)', -2.0, a), -1.0), ('flrp', -1.0,  1.0, ('fneg', a)), '!options->lower_flrp32'),
bf215546Sopenharmony_ci   (('~fadd@32', ('fmul(is_used_once)', -2.0, a),  1.0), ('flrp',  1.0, -1.0,          a ), '!options->lower_flrp32'),
bf215546Sopenharmony_ci   (('~fadd@32', ('fmul(is_used_once)',  2.0, a),  1.0), ('flrp',  1.0, -1.0, ('fneg', a)), '!options->lower_flrp32'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    # flrp(a, b, a)
bf215546Sopenharmony_ci    # a*(1-a) + b*a
bf215546Sopenharmony_ci    # a + -a*a + a*b    (1)
bf215546Sopenharmony_ci    # a + a*(b - a)
bf215546Sopenharmony_ci    # Option 1: ffma(a, (b-a), a)
bf215546Sopenharmony_ci    #
bf215546Sopenharmony_ci    # Alternately, after (1):
bf215546Sopenharmony_ci    # a*(1+b) + -a*a
bf215546Sopenharmony_ci    # a*((1+b) + -a)
bf215546Sopenharmony_ci    #
bf215546Sopenharmony_ci    # Let b=1
bf215546Sopenharmony_ci    #
bf215546Sopenharmony_ci    # Option 2: ffma(a, 2, -(a*a))
bf215546Sopenharmony_ci    # Option 3: ffma(a, 2, (-a)*a)
bf215546Sopenharmony_ci    # Option 4: ffma(a, -a, (2*a)
bf215546Sopenharmony_ci    # Option 5: a * (2 - a)
bf215546Sopenharmony_ci    #
bf215546Sopenharmony_ci    # There are a lot of other possible combinations.
bf215546Sopenharmony_ci   (('~ffma@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'),
bf215546Sopenharmony_ci   (('~ffma@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
bf215546Sopenharmony_ci   (('~ffma@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
bf215546Sopenharmony_ci   (('~ffma@32', a, ('fneg', a), ('fmul', 2.0, a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
bf215546Sopenharmony_ci   (('~fmul@32', a, ('fadd', 2.0, ('fneg', a))),    ('flrp', a, 1.0, a), '!options->lower_flrp32'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # we do these late so that we don't get in the way of creating ffmas
bf215546Sopenharmony_ci   (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
bf215546Sopenharmony_ci   (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Putting this in 'optimizations' interferes with the bcsel(a, op(b, c),
bf215546Sopenharmony_ci   # op(b, d)) => op(b, bcsel(a, c, d)) transformations.  I do not know why.
bf215546Sopenharmony_ci   (('bcsel', ('feq', ('fsqrt', 'a(is_not_negative)'), 0.0), intBitsToFloat(0x7f7fffff), ('frsq', a)),
bf215546Sopenharmony_ci    ('fmin', ('frsq', a), intBitsToFloat(0x7f7fffff))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Things that look like DPH in the source shader may get expanded to
bf215546Sopenharmony_ci   # something that looks like dot(v1.xyz, v2.xyz) + v1.w by the time it gets
bf215546Sopenharmony_ci   # to NIR.  After FFMA is generated, this can look like:
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   #    fadd(ffma(v1.z, v2.z, ffma(v1.y, v2.y, fmul(v1.x, v2.x))), v1.w)
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # Reassociate the last addition into the first multiplication.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # Some shaders do not use 'invariant' in vertex and (possibly) geometry
bf215546Sopenharmony_ci   # shader stages on some outputs that are intended to be invariant.  For
bf215546Sopenharmony_ci   # various reasons, this optimization may not be fully applied in all
bf215546Sopenharmony_ci   # shaders used for different rendering passes of the same geometry.  This
bf215546Sopenharmony_ci   # can result in Z-fighting artifacts (at best).  For now, disable this
bf215546Sopenharmony_ci   # optimization in these stages.  See bugzilla #111490.  In tessellation
bf215546Sopenharmony_ci   # stages applications seem to use 'precise' when necessary, so allow the
bf215546Sopenharmony_ci   # optimization in those stages.
bf215546Sopenharmony_ci   (('~fadd', ('ffma(is_used_once)', a, b, ('ffma', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
bf215546Sopenharmony_ci    ('ffma', a, b, ('ffma', c, d, ('ffma', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
bf215546Sopenharmony_ci   (('~fadd', ('ffma(is_used_once)', a, b, ('fmul(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
bf215546Sopenharmony_ci    ('ffma', a, b, ('ffma', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
bf215546Sopenharmony_ci   (('~fadd', ('fneg', ('ffma(is_used_once)', a, b, ('ffma', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
bf215546Sopenharmony_ci    ('ffma', ('fneg', a), b, ('ffma', ('fneg', c), d, ('ffma', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('~fadd', ('ffmaz(is_used_once)', a, b, ('ffmaz', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
bf215546Sopenharmony_ci    ('ffmaz', a, b, ('ffmaz', c, d, ('ffmaz', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
bf215546Sopenharmony_ci   (('~fadd', ('ffmaz(is_used_once)', a, b, ('fmulz(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
bf215546Sopenharmony_ci    ('ffmaz', a, b, ('ffmaz', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
bf215546Sopenharmony_ci   (('~fadd', ('fneg', ('ffmaz(is_used_once)', a, b, ('ffmaz', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
bf215546Sopenharmony_ci    ('ffmaz', ('fneg', a), b, ('ffmaz', ('fneg', c), d, ('ffmaz', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Section 8.8 (Integer Functions) of the GLSL 4.60 spec says:
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   #    If bits is zero, the result will be zero.
bf215546Sopenharmony_ci   #
bf215546Sopenharmony_ci   # These prevent the next two lowerings generating incorrect results when
bf215546Sopenharmony_ci   # count is zero.
bf215546Sopenharmony_ci   (('ubfe', a, b, 0), 0),
bf215546Sopenharmony_ci   (('ibfe', a, b, 0), 0),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # On Intel GPUs, BFE is a 3-source instruction.  Like all 3-source
bf215546Sopenharmony_ci   # instructions on Intel GPUs, it cannot have an immediate values as
bf215546Sopenharmony_ci   # sources.  There are also limitations on source register strides.  As a
bf215546Sopenharmony_ci   # result, it is very easy for 3-source instruction combined with either
bf215546Sopenharmony_ci   # loads of immediate values or copies from weird register strides to be
bf215546Sopenharmony_ci   # more expensive than the primitive instructions it represents.
bf215546Sopenharmony_ci   (('ubfe', a, '#b', '#c'), ('iand', ('ushr', 0xffffffff, ('ineg', c)), ('ushr', a, b)), 'options->avoid_ternary_with_two_constants'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # b is the lowest order bit to be extracted and c is the number of bits to
bf215546Sopenharmony_ci   # extract.  The inner shift removes the bits above b + c by shifting left
bf215546Sopenharmony_ci   # 32 - (b + c).  ishl only sees the low 5 bits of the shift count, which is
bf215546Sopenharmony_ci   # -(b + c).  The outer shift moves the bit that was at b to bit zero.
bf215546Sopenharmony_ci   # After the first shift, that bit is now at b + (32 - (b + c)) or 32 - c.
bf215546Sopenharmony_ci   # This means that it must be shifted right by 32 - c or -c bits.
bf215546Sopenharmony_ci   (('ibfe', a, '#b', '#c'), ('ishr', ('ishl', a, ('ineg', ('iadd', b, c))), ('ineg', c)), 'options->avoid_ternary_with_two_constants'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Clean up no-op shifts that may result from the bfe lowerings.
bf215546Sopenharmony_ci   (('ishl', a, 0), a),
bf215546Sopenharmony_ci   (('ishl', a, -32), a),
bf215546Sopenharmony_ci   (('ishr', a, 0), a),
bf215546Sopenharmony_ci   (('ishr', a, -32), a),
bf215546Sopenharmony_ci   (('ushr', a, 0), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('extract_i8', ('extract_i8', a, b), 0), ('extract_i8', a, b)),
bf215546Sopenharmony_ci   (('extract_i8', ('extract_u8', a, b), 0), ('extract_i8', a, b)),
bf215546Sopenharmony_ci   (('extract_u8', ('extract_i8', a, b), 0), ('extract_u8', a, b)),
bf215546Sopenharmony_ci   (('extract_u8', ('extract_u8', a, b), 0), ('extract_u8', a, b)),
bf215546Sopenharmony_ci]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# A few more extract cases we'd rather leave late
bf215546Sopenharmony_cifor N in [16, 32]:
bf215546Sopenharmony_ci    aN = 'a@{0}'.format(N)
bf215546Sopenharmony_ci    u2uM = 'u2u{0}'.format(M)
bf215546Sopenharmony_ci    i2iM = 'i2i{0}'.format(M)
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    for x in ['u', 'i']:
bf215546Sopenharmony_ci        x2xN = '{0}2{0}{1}'.format(x, N)
bf215546Sopenharmony_ci        extract_x8 = 'extract_{0}8'.format(x)
bf215546Sopenharmony_ci        extract_x16 = 'extract_{0}16'.format(x)
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci        late_optimizations.extend([
bf215546Sopenharmony_ci            ((x2xN, ('u2u8', aN)), (extract_x8, a, 0), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci            ((x2xN, ('i2i8', aN)), (extract_x8, a, 0), '!options->lower_extract_byte'),
bf215546Sopenharmony_ci        ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci        if N > 16:
bf215546Sopenharmony_ci            late_optimizations.extend([
bf215546Sopenharmony_ci                ((x2xN, ('u2u16', aN)), (extract_x16, a, 0), '!options->lower_extract_word'),
bf215546Sopenharmony_ci                ((x2xN, ('i2i16', aN)), (extract_x16, a, 0), '!options->lower_extract_word'),
bf215546Sopenharmony_ci            ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Byte insertion
bf215546Sopenharmony_cilate_optimizations.extend([(('ishl', ('extract_u8', 'a@32', 0), 8 * i), ('insert_u8', a, i), '!options->lower_insert_byte') for i in range(1, 4)])
bf215546Sopenharmony_cilate_optimizations.extend([(('iand', ('ishl', 'a@32', 8 * i), 0xff << (8 * i)), ('insert_u8', a, i), '!options->lower_insert_byte') for i in range(1, 4)])
bf215546Sopenharmony_cilate_optimizations.append((('ishl', 'a@32', 24), ('insert_u8', a, 3), '!options->lower_insert_byte'))
bf215546Sopenharmony_ci
bf215546Sopenharmony_cilate_optimizations += [
bf215546Sopenharmony_ci   # Word insertion
bf215546Sopenharmony_ci   (('ishl', 'a@32', 16), ('insert_u16', a, 1), '!options->lower_insert_word'),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Extract and then insert
bf215546Sopenharmony_ci   (('insert_u8', ('extract_u8', 'a', 0), b), ('insert_u8', a, b)),
bf215546Sopenharmony_ci   (('insert_u16', ('extract_u16', 'a', 0), b), ('insert_u16', a, b)),
bf215546Sopenharmony_ci]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Integer sizes
bf215546Sopenharmony_cifor s in [8, 16, 32, 64]:
bf215546Sopenharmony_ci    late_optimizations.extend([
bf215546Sopenharmony_ci        (('iand', ('ine(is_used_once)', 'a@{}'.format(s), 0), ('ine', 'b@{}'.format(s), 0)), ('ine', ('umin', a, b), 0)),
bf215546Sopenharmony_ci        (('ior',  ('ieq(is_used_once)', 'a@{}'.format(s), 0), ('ieq', 'b@{}'.format(s), 0)), ('ieq', ('umin', a, b), 0)),
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Float sizes
bf215546Sopenharmony_cifor s in [16, 32, 64]:
bf215546Sopenharmony_ci    late_optimizations.extend([
bf215546Sopenharmony_ci       (('~fadd@{}'.format(s), 1.0, ('fmul(is_used_once)', c , ('fadd', b, -1.0 ))), ('fadd', ('fadd', 1.0, ('fneg', c)), ('fmul', b, c)), 'options->lower_flrp{}'.format(s)),
bf215546Sopenharmony_ci       (('bcsel', a, 0, ('b2f{}'.format(s), ('inot', 'b@bool'))), ('b2f{}'.format(s), ('inot', ('ior', a, b)))),
bf215546Sopenharmony_ci    ])
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor op in ['fadd']:
bf215546Sopenharmony_ci    late_optimizations += [
bf215546Sopenharmony_ci        (('bcsel', a, (op + '(is_used_once)', b, c), (op, b, d)), (op, b, ('bcsel', a, c, d))),
bf215546Sopenharmony_ci        (('bcsel', a, (op, b, c), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))),
bf215546Sopenharmony_ci    ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifor op in ['ffma', 'ffmaz']:
bf215546Sopenharmony_ci    late_optimizations += [
bf215546Sopenharmony_ci        (('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, c, e)), (op, b, c, ('bcsel', a, d, e))),
bf215546Sopenharmony_ci        (('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, c, e)), (op, b, c, ('bcsel', a, d, e))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci        (('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, e, d)), (op, b, ('bcsel', a, c, e), d)),
bf215546Sopenharmony_ci        (('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, e, d)), (op, b, ('bcsel', a, c, e), d)),
bf215546Sopenharmony_ci    ]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# mediump: If an opcode is surrounded by conversions, remove the conversions.
bf215546Sopenharmony_ci# The rationale is that type conversions + the low precision opcode are more
bf215546Sopenharmony_ci# expensive that the same arithmetic opcode at higher precision.
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# This must be done in late optimizations, because we need normal optimizations to
bf215546Sopenharmony_ci# first eliminate temporary up-conversions such as in op1(f2fmp(f2f32(op2()))).
bf215546Sopenharmony_ci#
bf215546Sopenharmony_ci# Unary opcodes
bf215546Sopenharmony_cifor op in ['fabs', 'fceil', 'fcos', 'fddx', 'fddx_coarse', 'fddx_fine', 'fddy',
bf215546Sopenharmony_ci           'fddy_coarse', 'fddy_fine', 'fexp2', 'ffloor', 'ffract', 'flog2', 'fneg',
bf215546Sopenharmony_ci           'frcp', 'fround_even', 'frsq', 'fsat', 'fsign', 'fsin', 'fsqrt']:
bf215546Sopenharmony_ci    late_optimizations += [(('~f2f32', (op, ('f2fmp', a))), (op, a))]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Binary opcodes
bf215546Sopenharmony_cifor op in ['fadd', 'fdiv', 'fmax', 'fmin', 'fmod', 'fmul', 'fpow', 'frem']:
bf215546Sopenharmony_ci    late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b))), (op, a, b))]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Ternary opcodes
bf215546Sopenharmony_cifor op in ['ffma', 'flrp']:
bf215546Sopenharmony_ci    late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b), ('f2fmp', c))), (op, a, b, c))]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Comparison opcodes
bf215546Sopenharmony_cifor op in ['feq', 'fge', 'flt', 'fneu']:
bf215546Sopenharmony_ci    late_optimizations += [(('~' + op, ('f2fmp', a), ('f2fmp', b)), (op, a, b))]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci# Do this last, so that the f2fmp patterns above have effect.
bf215546Sopenharmony_cilate_optimizations += [
bf215546Sopenharmony_ci  # Convert *2*mp instructions to concrete *2*16 instructions. At this point
bf215546Sopenharmony_ci  # any conversions that could have been removed will have been removed in
bf215546Sopenharmony_ci  # nir_opt_algebraic so any remaining ones are required.
bf215546Sopenharmony_ci  (('f2fmp', a), ('f2f16', a)),
bf215546Sopenharmony_ci  (('f2imp', a), ('f2i16', a)),
bf215546Sopenharmony_ci  (('f2ump', a), ('f2u16', a)),
bf215546Sopenharmony_ci  (('i2imp', a), ('i2i16', a)),
bf215546Sopenharmony_ci  (('i2fmp', a), ('i2f16', a)),
bf215546Sopenharmony_ci  (('i2imp', a), ('u2u16', a)),
bf215546Sopenharmony_ci  (('u2fmp', a), ('u2f16', a)),
bf215546Sopenharmony_ci  (('fisfinite', a), ('flt', ('fabs', a), float("inf"))),
bf215546Sopenharmony_ci]
bf215546Sopenharmony_ci
bf215546Sopenharmony_cidistribute_src_mods = [
bf215546Sopenharmony_ci   # Try to remove some spurious negations rather than pushing them down.
bf215546Sopenharmony_ci   (('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)),
bf215546Sopenharmony_ci   (('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
bf215546Sopenharmony_ci   (('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b)),
bf215546Sopenharmony_ci   (('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b)),
bf215546Sopenharmony_ci   (('fdot4_replicated', ('fneg', a), ('fneg', b)), ('fdot4_replicated', a, b)),
bf215546Sopenharmony_ci   (('fneg', ('fneg', a)), a),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)),
bf215546Sopenharmony_ci   (('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fneg', ('ffma(is_used_once)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))),
bf215546Sopenharmony_ci   (('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c)),
bf215546Sopenharmony_ci   (('fneg', ('~fadd(is_used_once)', a, b)), ('fadd', ('fneg', a), ('fneg', b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # Note that fmin <-> fmax.  I don't think there is a way to distribute
bf215546Sopenharmony_ci   # fabs() into fmin or fmax.
bf215546Sopenharmony_ci   (('fneg', ('fmin(is_used_once)', a, b)), ('fmax', ('fneg', a), ('fneg', b))),
bf215546Sopenharmony_ci   (('fneg', ('fmax(is_used_once)', a, b)), ('fmin', ('fneg', a), ('fneg', b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fneg', ('fdot2_replicated(is_used_once)', a, b)), ('fdot2_replicated', ('fneg', a), b)),
bf215546Sopenharmony_ci   (('fneg', ('fdot3_replicated(is_used_once)', a, b)), ('fdot3_replicated', ('fneg', a), b)),
bf215546Sopenharmony_ci   (('fneg', ('fdot4_replicated(is_used_once)', a, b)), ('fdot4_replicated', ('fneg', a), b)),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   # fdph works mostly like fdot, but to get the correct result, the negation
bf215546Sopenharmony_ci   # must be applied to the second source.
bf215546Sopenharmony_ci   (('fneg', ('fdph_replicated(is_used_once)', a, b)), ('fdph_replicated', a, ('fneg', b))),
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   (('fneg', ('fsign(is_used_once)', a)), ('fsign', ('fneg', a))),
bf215546Sopenharmony_ci   (('fabs', ('fsign(is_used_once)', a)), ('fsign', ('fabs', a))),
bf215546Sopenharmony_ci]
bf215546Sopenharmony_ci
bf215546Sopenharmony_ciprint(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render())
bf215546Sopenharmony_ciprint(nir_algebraic.AlgebraicPass("nir_opt_algebraic_before_ffma",
bf215546Sopenharmony_ci                                  before_ffma_optimizations).render())
bf215546Sopenharmony_ciprint(nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
bf215546Sopenharmony_ci                                  late_optimizations).render())
bf215546Sopenharmony_ciprint(nir_algebraic.AlgebraicPass("nir_opt_algebraic_distribute_src_mods",
bf215546Sopenharmony_ci                                  distribute_src_mods).render())