1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2013-2015 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "brw_vec4_surface_builder.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ciusing namespace brw; 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_cinamespace { 29bf215546Sopenharmony_ci namespace array_utils { 30bf215546Sopenharmony_ci /** 31bf215546Sopenharmony_ci * Copy one every \p src_stride logical components of the argument into 32bf215546Sopenharmony_ci * one every \p dst_stride logical components of the result. 33bf215546Sopenharmony_ci */ 34bf215546Sopenharmony_ci static src_reg 35bf215546Sopenharmony_ci emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size, 36bf215546Sopenharmony_ci unsigned dst_stride, unsigned src_stride) 37bf215546Sopenharmony_ci { 38bf215546Sopenharmony_ci if (src_stride == 1 && dst_stride == 1) { 39bf215546Sopenharmony_ci return src; 40bf215546Sopenharmony_ci } else { 41bf215546Sopenharmony_ci const dst_reg dst = bld.vgrf(src.type, 42bf215546Sopenharmony_ci DIV_ROUND_UP(size * dst_stride, 4)); 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci for (unsigned i = 0; i < size; ++i) 45bf215546Sopenharmony_ci bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4), 46bf215546Sopenharmony_ci 1 << (i * dst_stride % 4)), 47bf215546Sopenharmony_ci swizzle(offset(src, 8, i * src_stride / 4), 48bf215546Sopenharmony_ci brw_swizzle_for_mask(1 << (i * src_stride % 4)))); 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci return src_reg(dst); 51bf215546Sopenharmony_ci } 52bf215546Sopenharmony_ci } 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci /** 55bf215546Sopenharmony_ci * Convert a VEC4 into an array of registers with the layout expected by 56bf215546Sopenharmony_ci * the recipient shared unit. If \p has_simd4x2 is true the argument is 57bf215546Sopenharmony_ci * left unmodified in SIMD4x2 form, otherwise it will be rearranged into 58bf215546Sopenharmony_ci * a SIMD8 vector. 59bf215546Sopenharmony_ci */ 60bf215546Sopenharmony_ci static src_reg 61bf215546Sopenharmony_ci emit_insert(const vec4_builder &bld, const src_reg &src, 62bf215546Sopenharmony_ci unsigned n, bool has_simd4x2) 63bf215546Sopenharmony_ci { 64bf215546Sopenharmony_ci if (src.file == BAD_FILE || n == 0) { 65bf215546Sopenharmony_ci return src_reg(); 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci } else { 68bf215546Sopenharmony_ci /* Pad unused components with zeroes. */ 69bf215546Sopenharmony_ci const unsigned mask = (1 << n) - 1; 70bf215546Sopenharmony_ci const dst_reg tmp = bld.vgrf(src.type); 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci bld.MOV(writemask(tmp, mask), src); 73bf215546Sopenharmony_ci if (n < 4) 74bf215546Sopenharmony_ci bld.MOV(writemask(tmp, ~mask), brw_imm_d(0)); 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1); 77bf215546Sopenharmony_ci } 78bf215546Sopenharmony_ci } 79bf215546Sopenharmony_ci } 80bf215546Sopenharmony_ci} 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_cinamespace brw { 83bf215546Sopenharmony_ci namespace surface_access { 84bf215546Sopenharmony_ci namespace { 85bf215546Sopenharmony_ci using namespace array_utils; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci /** 88bf215546Sopenharmony_ci * Generate a send opcode for a surface message and return the 89bf215546Sopenharmony_ci * result. 90bf215546Sopenharmony_ci */ 91bf215546Sopenharmony_ci src_reg 92bf215546Sopenharmony_ci emit_send(const vec4_builder &bld, enum opcode op, 93bf215546Sopenharmony_ci const src_reg &header, 94bf215546Sopenharmony_ci const src_reg &addr, unsigned addr_sz, 95bf215546Sopenharmony_ci const src_reg &src, unsigned src_sz, 96bf215546Sopenharmony_ci const src_reg &surface, 97bf215546Sopenharmony_ci unsigned arg, unsigned ret_sz, 98bf215546Sopenharmony_ci brw_predicate pred = BRW_PREDICATE_NONE) 99bf215546Sopenharmony_ci { 100bf215546Sopenharmony_ci /* Calculate the total number of components of the payload. */ 101bf215546Sopenharmony_ci const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1); 102bf215546Sopenharmony_ci const unsigned sz = header_sz + addr_sz + src_sz; 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci /* Construct the payload. */ 105bf215546Sopenharmony_ci const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz); 106bf215546Sopenharmony_ci unsigned n = 0; 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci if (header_sz) 109bf215546Sopenharmony_ci bld.exec_all().MOV(offset(payload, 8, n++), 110bf215546Sopenharmony_ci retype(header, BRW_REGISTER_TYPE_UD)); 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci for (unsigned i = 0; i < addr_sz; i++) 113bf215546Sopenharmony_ci bld.MOV(offset(payload, 8, n++), 114bf215546Sopenharmony_ci offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i)); 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci for (unsigned i = 0; i < src_sz; i++) 117bf215546Sopenharmony_ci bld.MOV(offset(payload, 8, n++), 118bf215546Sopenharmony_ci offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i)); 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci /* Reduce the dynamically uniform surface index to a single 121bf215546Sopenharmony_ci * scalar. 122bf215546Sopenharmony_ci */ 123bf215546Sopenharmony_ci const src_reg usurface = bld.emit_uniformize(surface); 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci /* Emit the message send instruction. */ 126bf215546Sopenharmony_ci const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz); 127bf215546Sopenharmony_ci vec4_instruction *inst = 128bf215546Sopenharmony_ci bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg)); 129bf215546Sopenharmony_ci inst->mlen = sz; 130bf215546Sopenharmony_ci inst->size_written = ret_sz * REG_SIZE; 131bf215546Sopenharmony_ci inst->header_size = header_sz; 132bf215546Sopenharmony_ci inst->predicate = pred; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci return src_reg(dst); 135bf215546Sopenharmony_ci } 136bf215546Sopenharmony_ci } 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci /** 139bf215546Sopenharmony_ci * Emit an untyped surface read opcode. \p dims determines the number 140bf215546Sopenharmony_ci * of components of the address and \p size the number of components of 141bf215546Sopenharmony_ci * the returned value. 142bf215546Sopenharmony_ci */ 143bf215546Sopenharmony_ci src_reg 144bf215546Sopenharmony_ci emit_untyped_read(const vec4_builder &bld, 145bf215546Sopenharmony_ci const src_reg &surface, const src_reg &addr, 146bf215546Sopenharmony_ci unsigned dims, unsigned size, 147bf215546Sopenharmony_ci brw_predicate pred) 148bf215546Sopenharmony_ci { 149bf215546Sopenharmony_ci return emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_READ, src_reg(), 150bf215546Sopenharmony_ci emit_insert(bld, addr, dims, true), 1, 151bf215546Sopenharmony_ci src_reg(), 0, 152bf215546Sopenharmony_ci surface, size, 1, pred); 153bf215546Sopenharmony_ci } 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci /** 156bf215546Sopenharmony_ci * Emit an untyped surface write opcode. \p dims determines the number 157bf215546Sopenharmony_ci * of components of the address and \p size the number of components of 158bf215546Sopenharmony_ci * the argument. 159bf215546Sopenharmony_ci */ 160bf215546Sopenharmony_ci void 161bf215546Sopenharmony_ci emit_untyped_write(const vec4_builder &bld, const src_reg &surface, 162bf215546Sopenharmony_ci const src_reg &addr, const src_reg &src, 163bf215546Sopenharmony_ci unsigned dims, unsigned size, 164bf215546Sopenharmony_ci brw_predicate pred) 165bf215546Sopenharmony_ci { 166bf215546Sopenharmony_ci const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75; 167bf215546Sopenharmony_ci emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(), 168bf215546Sopenharmony_ci emit_insert(bld, addr, dims, has_simd4x2), 169bf215546Sopenharmony_ci has_simd4x2 ? 1 : dims, 170bf215546Sopenharmony_ci emit_insert(bld, src, size, has_simd4x2), 171bf215546Sopenharmony_ci has_simd4x2 ? 1 : size, 172bf215546Sopenharmony_ci surface, size, 0, pred); 173bf215546Sopenharmony_ci } 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci /** 176bf215546Sopenharmony_ci * Emit an untyped surface atomic opcode. \p dims determines the number 177bf215546Sopenharmony_ci * of components of the address and \p rsize the number of components of 178bf215546Sopenharmony_ci * the returned value (either zero or one). 179bf215546Sopenharmony_ci */ 180bf215546Sopenharmony_ci src_reg 181bf215546Sopenharmony_ci emit_untyped_atomic(const vec4_builder &bld, 182bf215546Sopenharmony_ci const src_reg &surface, const src_reg &addr, 183bf215546Sopenharmony_ci const src_reg &src0, const src_reg &src1, 184bf215546Sopenharmony_ci unsigned dims, unsigned rsize, unsigned op, 185bf215546Sopenharmony_ci brw_predicate pred) 186bf215546Sopenharmony_ci { 187bf215546Sopenharmony_ci const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci /* Zip the components of both sources, they are represented as the X 190bf215546Sopenharmony_ci * and Y components of the same vector. 191bf215546Sopenharmony_ci */ 192bf215546Sopenharmony_ci const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); 193bf215546Sopenharmony_ci const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD); 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci if (size >= 1) { 196bf215546Sopenharmony_ci bld.MOV(writemask(srcs, WRITEMASK_X), 197bf215546Sopenharmony_ci swizzle(src0, BRW_SWIZZLE_XXXX)); 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci if (size >= 2) { 201bf215546Sopenharmony_ci bld.MOV(writemask(srcs, WRITEMASK_Y), 202bf215546Sopenharmony_ci swizzle(src1, BRW_SWIZZLE_XXXX)); 203bf215546Sopenharmony_ci } 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci return emit_send(bld, VEC4_OPCODE_UNTYPED_ATOMIC, src_reg(), 206bf215546Sopenharmony_ci emit_insert(bld, addr, dims, has_simd4x2), 207bf215546Sopenharmony_ci has_simd4x2 ? 1 : dims, 208bf215546Sopenharmony_ci emit_insert(bld, src_reg(srcs), size, has_simd4x2), 209bf215546Sopenharmony_ci has_simd4x2 && size ? 1 : size, 210bf215546Sopenharmony_ci surface, op, rsize, pred); 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci } 213bf215546Sopenharmony_ci} 214