1/*
2 * Copyright © 2013-2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "brw_vec4_surface_builder.h"
25
26using namespace brw;
27
28namespace {
29   namespace array_utils {
30      /**
31       * Copy one every \p src_stride logical components of the argument into
32       * one every \p dst_stride logical components of the result.
33       */
34      static src_reg
35      emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
36                  unsigned dst_stride, unsigned src_stride)
37      {
38         if (src_stride == 1 && dst_stride == 1) {
39            return src;
40         } else {
41            const dst_reg dst = bld.vgrf(src.type,
42                                         DIV_ROUND_UP(size * dst_stride, 4));
43
44            for (unsigned i = 0; i < size; ++i)
45               bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
46                                 1 << (i * dst_stride % 4)),
47                       swizzle(offset(src, 8, i * src_stride / 4),
48                               brw_swizzle_for_mask(1 << (i * src_stride % 4))));
49
50            return src_reg(dst);
51         }
52      }
53
54      /**
55       * Convert a VEC4 into an array of registers with the layout expected by
56       * the recipient shared unit.  If \p has_simd4x2 is true the argument is
57       * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
58       * a SIMD8 vector.
59       */
60      static src_reg
61      emit_insert(const vec4_builder &bld, const src_reg &src,
62                  unsigned n, bool has_simd4x2)
63      {
64         if (src.file == BAD_FILE || n == 0) {
65            return src_reg();
66
67         } else {
68            /* Pad unused components with zeroes. */
69            const unsigned mask = (1 << n) - 1;
70            const dst_reg tmp = bld.vgrf(src.type);
71
72            bld.MOV(writemask(tmp, mask), src);
73            if (n < 4)
74               bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
75
76            return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
77         }
78      }
79   }
80}
81
82namespace brw {
83   namespace surface_access {
84      namespace {
85         using namespace array_utils;
86
87         /**
88          * Generate a send opcode for a surface message and return the
89          * result.
90          */
91         src_reg
92         emit_send(const vec4_builder &bld, enum opcode op,
93                   const src_reg &header,
94                   const src_reg &addr, unsigned addr_sz,
95                   const src_reg &src, unsigned src_sz,
96                   const src_reg &surface,
97                   unsigned arg, unsigned ret_sz,
98                   brw_predicate pred = BRW_PREDICATE_NONE)
99         {
100            /* Calculate the total number of components of the payload. */
101            const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
102            const unsigned sz = header_sz + addr_sz + src_sz;
103
104            /* Construct the payload. */
105            const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
106            unsigned n = 0;
107
108            if (header_sz)
109               bld.exec_all().MOV(offset(payload, 8, n++),
110                                  retype(header, BRW_REGISTER_TYPE_UD));
111
112            for (unsigned i = 0; i < addr_sz; i++)
113               bld.MOV(offset(payload, 8, n++),
114                       offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
115
116            for (unsigned i = 0; i < src_sz; i++)
117               bld.MOV(offset(payload, 8, n++),
118                       offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
119
120            /* Reduce the dynamically uniform surface index to a single
121             * scalar.
122             */
123            const src_reg usurface = bld.emit_uniformize(surface);
124
125            /* Emit the message send instruction. */
126            const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
127            vec4_instruction *inst =
128               bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
129            inst->mlen = sz;
130            inst->size_written = ret_sz * REG_SIZE;
131            inst->header_size = header_sz;
132            inst->predicate = pred;
133
134            return src_reg(dst);
135         }
136      }
137
138      /**
139       * Emit an untyped surface read opcode.  \p dims determines the number
140       * of components of the address and \p size the number of components of
141       * the returned value.
142       */
143      src_reg
144      emit_untyped_read(const vec4_builder &bld,
145                        const src_reg &surface, const src_reg &addr,
146                        unsigned dims, unsigned size,
147                        brw_predicate pred)
148      {
149         return emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
150                          emit_insert(bld, addr, dims, true), 1,
151                          src_reg(), 0,
152                          surface, size, 1, pred);
153      }
154
155      /**
156       * Emit an untyped surface write opcode.  \p dims determines the number
157       * of components of the address and \p size the number of components of
158       * the argument.
159       */
160      void
161      emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
162                         const src_reg &addr, const src_reg &src,
163                         unsigned dims, unsigned size,
164                         brw_predicate pred)
165      {
166         const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75;
167         emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
168                   emit_insert(bld, addr, dims, has_simd4x2),
169                   has_simd4x2 ? 1 : dims,
170                   emit_insert(bld, src, size, has_simd4x2),
171                   has_simd4x2 ? 1 : size,
172                   surface, size, 0, pred);
173      }
174
175      /**
176       * Emit an untyped surface atomic opcode.  \p dims determines the number
177       * of components of the address and \p rsize the number of components of
178       * the returned value (either zero or one).
179       */
180      src_reg
181      emit_untyped_atomic(const vec4_builder &bld,
182                          const src_reg &surface, const src_reg &addr,
183                          const src_reg &src0, const src_reg &src1,
184                          unsigned dims, unsigned rsize, unsigned op,
185                          brw_predicate pred)
186      {
187         const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75;
188
189         /* Zip the components of both sources, they are represented as the X
190          * and Y components of the same vector.
191          */
192         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
193         const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
194
195         if (size >= 1) {
196            bld.MOV(writemask(srcs, WRITEMASK_X),
197                    swizzle(src0, BRW_SWIZZLE_XXXX));
198         }
199
200         if (size >= 2) {
201            bld.MOV(writemask(srcs, WRITEMASK_Y),
202                    swizzle(src1, BRW_SWIZZLE_XXXX));
203         }
204
205         return emit_send(bld, VEC4_OPCODE_UNTYPED_ATOMIC, src_reg(),
206                          emit_insert(bld, addr, dims, has_simd4x2),
207                          has_simd4x2 ? 1 : dims,
208                          emit_insert(bld, src_reg(srcs), size, has_simd4x2),
209                          has_simd4x2 && size ? 1 : size,
210                          surface, op, rsize, pred);
211      }
212   }
213}
214