1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25#include "nir.h" 26#include "nir_builder.h" 27 28/* 29 * lowers: 30 * 31 * packDouble2x32(foo) -> packDouble2x32Split(foo.x, foo.y) 32 * unpackDouble2x32(foo) -> vec2(unpackDouble2x32_x(foo), unpackDouble2x32_y(foo)) 33 * packInt2x32(foo) -> packInt2x32Split(foo.x, foo.y) 34 * unpackInt2x32(foo) -> vec2(unpackInt2x32_x(foo), unpackInt2x32_y(foo)) 35 */ 36 37static nir_ssa_def * 38lower_pack_64_from_32(nir_builder *b, nir_ssa_def *src) 39{ 40 return nir_pack_64_2x32_split(b, nir_channel(b, src, 0), 41 nir_channel(b, src, 1)); 42} 43 44static nir_ssa_def * 45lower_unpack_64_to_32(nir_builder *b, nir_ssa_def *src) 46{ 47 return nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), 48 nir_unpack_64_2x32_split_y(b, src)); 49} 50 51static nir_ssa_def * 52lower_pack_32_from_16(nir_builder *b, nir_ssa_def *src) 53{ 54 return nir_pack_32_2x16_split(b, nir_channel(b, src, 0), 55 nir_channel(b, src, 1)); 56} 57 58static nir_ssa_def * 59lower_unpack_32_to_16(nir_builder *b, nir_ssa_def *src) 60{ 61 return nir_vec2(b, nir_unpack_32_2x16_split_x(b, src), 62 nir_unpack_32_2x16_split_y(b, src)); 63} 64 65static nir_ssa_def * 66lower_pack_64_from_16(nir_builder *b, nir_ssa_def *src) 67{ 68 nir_ssa_def *xy = nir_pack_32_2x16_split(b, nir_channel(b, src, 0), 69 nir_channel(b, src, 1)); 70 71 nir_ssa_def *zw = nir_pack_32_2x16_split(b, nir_channel(b, src, 2), 72 nir_channel(b, src, 3)); 73 74 return nir_pack_64_2x32_split(b, xy, zw); 75} 76 77static nir_ssa_def * 78lower_unpack_64_to_16(nir_builder *b, nir_ssa_def *src) 79{ 80 nir_ssa_def *xy = nir_unpack_64_2x32_split_x(b, src); 81 nir_ssa_def *zw = nir_unpack_64_2x32_split_y(b, src); 82 83 return nir_vec4(b, nir_unpack_32_2x16_split_x(b, xy), 84 nir_unpack_32_2x16_split_y(b, xy), 85 nir_unpack_32_2x16_split_x(b, zw), 86 nir_unpack_32_2x16_split_y(b, zw)); 87} 88 89static nir_ssa_def * 90lower_pack_32_from_8(nir_builder *b, nir_ssa_def *src) 91{ 92 return nir_pack_32_4x8_split(b, nir_channel(b, src, 0), 93 nir_channel(b, src, 1), 94 nir_channel(b, src, 2), 95 nir_channel(b, src, 3)); 96} 97 98static bool 99lower_pack_instr(nir_builder *b, nir_instr *instr, void *data) 100{ 101 if (instr->type != nir_instr_type_alu) 102 return false; 103 104 nir_alu_instr *alu_instr = (nir_alu_instr *) instr; 105 106 if (alu_instr->op != nir_op_pack_64_2x32 && 107 alu_instr->op != nir_op_unpack_64_2x32 && 108 alu_instr->op != nir_op_pack_64_4x16 && 109 alu_instr->op != nir_op_unpack_64_4x16 && 110 alu_instr->op != nir_op_pack_32_2x16 && 111 alu_instr->op != nir_op_unpack_32_2x16 && 112 alu_instr->op != nir_op_pack_32_4x8) 113 return false; 114 115 b->cursor = nir_before_instr(&alu_instr->instr); 116 117 nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0); 118 nir_ssa_def *dest; 119 120 switch (alu_instr->op) { 121 case nir_op_pack_64_2x32: 122 dest = lower_pack_64_from_32(b, src); 123 break; 124 case nir_op_unpack_64_2x32: 125 dest = lower_unpack_64_to_32(b, src); 126 break; 127 case nir_op_pack_64_4x16: 128 dest = lower_pack_64_from_16(b, src); 129 break; 130 case nir_op_unpack_64_4x16: 131 dest = lower_unpack_64_to_16(b, src); 132 break; 133 case nir_op_pack_32_2x16: 134 dest = lower_pack_32_from_16(b, src); 135 break; 136 case nir_op_unpack_32_2x16: 137 dest = lower_unpack_32_to_16(b, src); 138 break; 139 case nir_op_pack_32_4x8: 140 dest = lower_pack_32_from_8(b, src); 141 break; 142 default: 143 unreachable("Impossible opcode"); 144 } 145 nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest); 146 nir_instr_remove(&alu_instr->instr); 147 148 return true; 149} 150 151bool 152nir_lower_pack(nir_shader *shader) 153{ 154 return nir_shader_instructions_pass(shader, lower_pack_instr, 155 nir_metadata_block_index | nir_metadata_dominance, NULL); 156} 157