1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2013-2015 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "brw_vec4_surface_builder.h"
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ciusing namespace brw;
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_cinamespace {
29bf215546Sopenharmony_ci   namespace array_utils {
30bf215546Sopenharmony_ci      /**
31bf215546Sopenharmony_ci       * Copy one every \p src_stride logical components of the argument into
32bf215546Sopenharmony_ci       * one every \p dst_stride logical components of the result.
33bf215546Sopenharmony_ci       */
34bf215546Sopenharmony_ci      static src_reg
35bf215546Sopenharmony_ci      emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
36bf215546Sopenharmony_ci                  unsigned dst_stride, unsigned src_stride)
37bf215546Sopenharmony_ci      {
38bf215546Sopenharmony_ci         if (src_stride == 1 && dst_stride == 1) {
39bf215546Sopenharmony_ci            return src;
40bf215546Sopenharmony_ci         } else {
41bf215546Sopenharmony_ci            const dst_reg dst = bld.vgrf(src.type,
42bf215546Sopenharmony_ci                                         DIV_ROUND_UP(size * dst_stride, 4));
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci            for (unsigned i = 0; i < size; ++i)
45bf215546Sopenharmony_ci               bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
46bf215546Sopenharmony_ci                                 1 << (i * dst_stride % 4)),
47bf215546Sopenharmony_ci                       swizzle(offset(src, 8, i * src_stride / 4),
48bf215546Sopenharmony_ci                               brw_swizzle_for_mask(1 << (i * src_stride % 4))));
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci            return src_reg(dst);
51bf215546Sopenharmony_ci         }
52bf215546Sopenharmony_ci      }
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci      /**
55bf215546Sopenharmony_ci       * Convert a VEC4 into an array of registers with the layout expected by
56bf215546Sopenharmony_ci       * the recipient shared unit.  If \p has_simd4x2 is true the argument is
57bf215546Sopenharmony_ci       * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
58bf215546Sopenharmony_ci       * a SIMD8 vector.
59bf215546Sopenharmony_ci       */
60bf215546Sopenharmony_ci      static src_reg
61bf215546Sopenharmony_ci      emit_insert(const vec4_builder &bld, const src_reg &src,
62bf215546Sopenharmony_ci                  unsigned n, bool has_simd4x2)
63bf215546Sopenharmony_ci      {
64bf215546Sopenharmony_ci         if (src.file == BAD_FILE || n == 0) {
65bf215546Sopenharmony_ci            return src_reg();
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci         } else {
68bf215546Sopenharmony_ci            /* Pad unused components with zeroes. */
69bf215546Sopenharmony_ci            const unsigned mask = (1 << n) - 1;
70bf215546Sopenharmony_ci            const dst_reg tmp = bld.vgrf(src.type);
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci            bld.MOV(writemask(tmp, mask), src);
73bf215546Sopenharmony_ci            if (n < 4)
74bf215546Sopenharmony_ci               bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci            return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
77bf215546Sopenharmony_ci         }
78bf215546Sopenharmony_ci      }
79bf215546Sopenharmony_ci   }
80bf215546Sopenharmony_ci}
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_cinamespace brw {
83bf215546Sopenharmony_ci   namespace surface_access {
84bf215546Sopenharmony_ci      namespace {
85bf215546Sopenharmony_ci         using namespace array_utils;
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci         /**
88bf215546Sopenharmony_ci          * Generate a send opcode for a surface message and return the
89bf215546Sopenharmony_ci          * result.
90bf215546Sopenharmony_ci          */
91bf215546Sopenharmony_ci         src_reg
92bf215546Sopenharmony_ci         emit_send(const vec4_builder &bld, enum opcode op,
93bf215546Sopenharmony_ci                   const src_reg &header,
94bf215546Sopenharmony_ci                   const src_reg &addr, unsigned addr_sz,
95bf215546Sopenharmony_ci                   const src_reg &src, unsigned src_sz,
96bf215546Sopenharmony_ci                   const src_reg &surface,
97bf215546Sopenharmony_ci                   unsigned arg, unsigned ret_sz,
98bf215546Sopenharmony_ci                   brw_predicate pred = BRW_PREDICATE_NONE)
99bf215546Sopenharmony_ci         {
100bf215546Sopenharmony_ci            /* Calculate the total number of components of the payload. */
101bf215546Sopenharmony_ci            const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
102bf215546Sopenharmony_ci            const unsigned sz = header_sz + addr_sz + src_sz;
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_ci            /* Construct the payload. */
105bf215546Sopenharmony_ci            const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
106bf215546Sopenharmony_ci            unsigned n = 0;
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci            if (header_sz)
109bf215546Sopenharmony_ci               bld.exec_all().MOV(offset(payload, 8, n++),
110bf215546Sopenharmony_ci                                  retype(header, BRW_REGISTER_TYPE_UD));
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci            for (unsigned i = 0; i < addr_sz; i++)
113bf215546Sopenharmony_ci               bld.MOV(offset(payload, 8, n++),
114bf215546Sopenharmony_ci                       offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci            for (unsigned i = 0; i < src_sz; i++)
117bf215546Sopenharmony_ci               bld.MOV(offset(payload, 8, n++),
118bf215546Sopenharmony_ci                       offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci            /* Reduce the dynamically uniform surface index to a single
121bf215546Sopenharmony_ci             * scalar.
122bf215546Sopenharmony_ci             */
123bf215546Sopenharmony_ci            const src_reg usurface = bld.emit_uniformize(surface);
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_ci            /* Emit the message send instruction. */
126bf215546Sopenharmony_ci            const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
127bf215546Sopenharmony_ci            vec4_instruction *inst =
128bf215546Sopenharmony_ci               bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
129bf215546Sopenharmony_ci            inst->mlen = sz;
130bf215546Sopenharmony_ci            inst->size_written = ret_sz * REG_SIZE;
131bf215546Sopenharmony_ci            inst->header_size = header_sz;
132bf215546Sopenharmony_ci            inst->predicate = pred;
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci            return src_reg(dst);
135bf215546Sopenharmony_ci         }
136bf215546Sopenharmony_ci      }
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci      /**
139bf215546Sopenharmony_ci       * Emit an untyped surface read opcode.  \p dims determines the number
140bf215546Sopenharmony_ci       * of components of the address and \p size the number of components of
141bf215546Sopenharmony_ci       * the returned value.
142bf215546Sopenharmony_ci       */
143bf215546Sopenharmony_ci      src_reg
144bf215546Sopenharmony_ci      emit_untyped_read(const vec4_builder &bld,
145bf215546Sopenharmony_ci                        const src_reg &surface, const src_reg &addr,
146bf215546Sopenharmony_ci                        unsigned dims, unsigned size,
147bf215546Sopenharmony_ci                        brw_predicate pred)
148bf215546Sopenharmony_ci      {
149bf215546Sopenharmony_ci         return emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
150bf215546Sopenharmony_ci                          emit_insert(bld, addr, dims, true), 1,
151bf215546Sopenharmony_ci                          src_reg(), 0,
152bf215546Sopenharmony_ci                          surface, size, 1, pred);
153bf215546Sopenharmony_ci      }
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci      /**
156bf215546Sopenharmony_ci       * Emit an untyped surface write opcode.  \p dims determines the number
157bf215546Sopenharmony_ci       * of components of the address and \p size the number of components of
158bf215546Sopenharmony_ci       * the argument.
159bf215546Sopenharmony_ci       */
160bf215546Sopenharmony_ci      void
161bf215546Sopenharmony_ci      emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
162bf215546Sopenharmony_ci                         const src_reg &addr, const src_reg &src,
163bf215546Sopenharmony_ci                         unsigned dims, unsigned size,
164bf215546Sopenharmony_ci                         brw_predicate pred)
165bf215546Sopenharmony_ci      {
166bf215546Sopenharmony_ci         const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75;
167bf215546Sopenharmony_ci         emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
168bf215546Sopenharmony_ci                   emit_insert(bld, addr, dims, has_simd4x2),
169bf215546Sopenharmony_ci                   has_simd4x2 ? 1 : dims,
170bf215546Sopenharmony_ci                   emit_insert(bld, src, size, has_simd4x2),
171bf215546Sopenharmony_ci                   has_simd4x2 ? 1 : size,
172bf215546Sopenharmony_ci                   surface, size, 0, pred);
173bf215546Sopenharmony_ci      }
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci      /**
176bf215546Sopenharmony_ci       * Emit an untyped surface atomic opcode.  \p dims determines the number
177bf215546Sopenharmony_ci       * of components of the address and \p rsize the number of components of
178bf215546Sopenharmony_ci       * the returned value (either zero or one).
179bf215546Sopenharmony_ci       */
180bf215546Sopenharmony_ci      src_reg
181bf215546Sopenharmony_ci      emit_untyped_atomic(const vec4_builder &bld,
182bf215546Sopenharmony_ci                          const src_reg &surface, const src_reg &addr,
183bf215546Sopenharmony_ci                          const src_reg &src0, const src_reg &src1,
184bf215546Sopenharmony_ci                          unsigned dims, unsigned rsize, unsigned op,
185bf215546Sopenharmony_ci                          brw_predicate pred)
186bf215546Sopenharmony_ci      {
187bf215546Sopenharmony_ci         const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75;
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci         /* Zip the components of both sources, they are represented as the X
190bf215546Sopenharmony_ci          * and Y components of the same vector.
191bf215546Sopenharmony_ci          */
192bf215546Sopenharmony_ci         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
193bf215546Sopenharmony_ci         const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci         if (size >= 1) {
196bf215546Sopenharmony_ci            bld.MOV(writemask(srcs, WRITEMASK_X),
197bf215546Sopenharmony_ci                    swizzle(src0, BRW_SWIZZLE_XXXX));
198bf215546Sopenharmony_ci         }
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci         if (size >= 2) {
201bf215546Sopenharmony_ci            bld.MOV(writemask(srcs, WRITEMASK_Y),
202bf215546Sopenharmony_ci                    swizzle(src1, BRW_SWIZZLE_XXXX));
203bf215546Sopenharmony_ci         }
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci         return emit_send(bld, VEC4_OPCODE_UNTYPED_ATOMIC, src_reg(),
206bf215546Sopenharmony_ci                          emit_insert(bld, addr, dims, has_simd4x2),
207bf215546Sopenharmony_ci                          has_simd4x2 ? 1 : dims,
208bf215546Sopenharmony_ci                          emit_insert(bld, src_reg(srcs), size, has_simd4x2),
209bf215546Sopenharmony_ci                          has_simd4x2 && size ? 1 : size,
210bf215546Sopenharmony_ci                          surface, op, rsize, pred);
211bf215546Sopenharmony_ci      }
212bf215546Sopenharmony_ci   }
213bf215546Sopenharmony_ci}
214