1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include <stdbool.h>
28bf215546Sopenharmony_ci#include "main/image.h"
29bf215546Sopenharmony_ci#include "main/pbo.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include "state_tracker/st_nir.h"
32bf215546Sopenharmony_ci#include "state_tracker/st_format.h"
33bf215546Sopenharmony_ci#include "state_tracker/st_pbo.h"
34bf215546Sopenharmony_ci#include "state_tracker/st_texture.h"
35bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h"
36bf215546Sopenharmony_ci#include "compiler/nir/nir_format_convert.h"
37bf215546Sopenharmony_ci#include "compiler/glsl/gl_nir.h"
38bf215546Sopenharmony_ci#include "compiler/glsl/gl_nir_linker.h"
39bf215546Sopenharmony_ci#include "util/u_sampler.h"
40bf215546Sopenharmony_ci#include "util/streaming-load-memcpy.h"
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci#define BGR_FORMAT(NAME) \
43bf215546Sopenharmony_ci    {{ \
44bf215546Sopenharmony_ci     [0] = PIPE_FORMAT_##NAME##_SNORM, \
45bf215546Sopenharmony_ci     [1] = PIPE_FORMAT_##NAME##_SINT, \
46bf215546Sopenharmony_ci    }, \
47bf215546Sopenharmony_ci    { \
48bf215546Sopenharmony_ci     [0] = PIPE_FORMAT_##NAME##_UNORM, \
49bf215546Sopenharmony_ci     [1] = PIPE_FORMAT_##NAME##_UINT, \
50bf215546Sopenharmony_ci    }}
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci#define FORMAT(NAME, NAME16, NAME32) \
53bf215546Sopenharmony_ci   {{ \
54bf215546Sopenharmony_ci    [1] = PIPE_FORMAT_##NAME##_SNORM, \
55bf215546Sopenharmony_ci    [2] = PIPE_FORMAT_##NAME16##_SNORM, \
56bf215546Sopenharmony_ci    [4] = PIPE_FORMAT_##NAME32##_SNORM, \
57bf215546Sopenharmony_ci   }, \
58bf215546Sopenharmony_ci   { \
59bf215546Sopenharmony_ci    [1] = PIPE_FORMAT_##NAME##_UNORM, \
60bf215546Sopenharmony_ci    [2] = PIPE_FORMAT_##NAME16##_UNORM, \
61bf215546Sopenharmony_ci    [4] = PIPE_FORMAT_##NAME32##_UNORM, \
62bf215546Sopenharmony_ci   }}
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci/* don't try these at home */
65bf215546Sopenharmony_cistatic enum pipe_format
66bf215546Sopenharmony_ciget_convert_format(struct gl_context *ctx,
67bf215546Sopenharmony_ci                enum pipe_format src_format,
68bf215546Sopenharmony_ci                GLenum format, GLenum type,
69bf215546Sopenharmony_ci                bool *need_bgra_swizzle)
70bf215546Sopenharmony_ci{
71bf215546Sopenharmony_ci   struct st_context *st = st_context(ctx);
72bf215546Sopenharmony_ci   GLint bpp = _mesa_bytes_per_pixel(format, type);
73bf215546Sopenharmony_ci   if (_mesa_is_depth_format(format) ||
74bf215546Sopenharmony_ci       format == GL_GREEN_INTEGER ||
75bf215546Sopenharmony_ci       format == GL_BLUE_INTEGER) {
76bf215546Sopenharmony_ci      switch (bpp) {
77bf215546Sopenharmony_ci      case 1:
78bf215546Sopenharmony_ci         return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R8_UINT : PIPE_FORMAT_R8_SINT;
79bf215546Sopenharmony_ci      case 2:
80bf215546Sopenharmony_ci         return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R16_UINT : PIPE_FORMAT_R16_SINT;
81bf215546Sopenharmony_ci      case 4:
82bf215546Sopenharmony_ci         return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R32_UINT : PIPE_FORMAT_R32_SINT;
83bf215546Sopenharmony_ci      }
84bf215546Sopenharmony_ci   }
85bf215546Sopenharmony_ci   mesa_format mformat = _mesa_tex_format_from_format_and_type(ctx, format, type);
86bf215546Sopenharmony_ci   enum pipe_format pformat = st_mesa_format_to_pipe_format(st, mformat);
87bf215546Sopenharmony_ci   if (!pformat) {
88bf215546Sopenharmony_ci      GLint dst_components = _mesa_components_in_format(format);
89bf215546Sopenharmony_ci      bpp /= dst_components;
90bf215546Sopenharmony_ci      if (format == GL_BGR || format == GL_BGRA) {
91bf215546Sopenharmony_ci         pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR ? GL_RGB : GL_RGBA, type, 0);
92bf215546Sopenharmony_ci         if (!pformat)
93bf215546Sopenharmony_ci            pformat = get_convert_format(ctx, src_format, format == GL_BGR ? GL_RGB : GL_RGBA, type, need_bgra_swizzle);
94bf215546Sopenharmony_ci         assert(pformat);
95bf215546Sopenharmony_ci         *need_bgra_swizzle = true;
96bf215546Sopenharmony_ci      } else if (format == GL_BGR_INTEGER || format == GL_BGRA_INTEGER) {
97bf215546Sopenharmony_ci         pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, 0);
98bf215546Sopenharmony_ci         if (!pformat)
99bf215546Sopenharmony_ci            pformat = get_convert_format(ctx, src_format, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, need_bgra_swizzle);
100bf215546Sopenharmony_ci         assert(pformat);
101bf215546Sopenharmony_ci         *need_bgra_swizzle = true;
102bf215546Sopenharmony_ci      } else {
103bf215546Sopenharmony_ci         /* [signed,unsigned][bpp] */
104bf215546Sopenharmony_ci         enum pipe_format rgb[5][2][5] = {
105bf215546Sopenharmony_ci            [1] = FORMAT(R8, R16, R32),
106bf215546Sopenharmony_ci            [2] = FORMAT(R8G8, R16G16, R32G32),
107bf215546Sopenharmony_ci            [3] = FORMAT(R8G8B8, R16G16B16, R32G32B32),
108bf215546Sopenharmony_ci            [4] = FORMAT(R8G8B8A8, R16G16B16A16, R32G32B32A32),
109bf215546Sopenharmony_ci         };
110bf215546Sopenharmony_ci         pformat = rgb[dst_components][_mesa_is_type_unsigned(type)][bpp];
111bf215546Sopenharmony_ci      }
112bf215546Sopenharmony_ci      assert(util_format_get_nr_components(pformat) == dst_components);
113bf215546Sopenharmony_ci   }
114bf215546Sopenharmony_ci   assert(pformat);
115bf215546Sopenharmony_ci   return pformat;
116bf215546Sopenharmony_ci}
117bf215546Sopenharmony_ci#undef BGR_FORMAT
118bf215546Sopenharmony_ci#undef FORMAT
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_cistruct pbo_shader_data {
122bf215546Sopenharmony_ci   nir_ssa_def *offset;
123bf215546Sopenharmony_ci   nir_ssa_def *range;
124bf215546Sopenharmony_ci   nir_ssa_def *invert;
125bf215546Sopenharmony_ci   nir_ssa_def *blocksize;
126bf215546Sopenharmony_ci   nir_ssa_def *alignment;
127bf215546Sopenharmony_ci   nir_ssa_def *dst_bit_size;
128bf215546Sopenharmony_ci   nir_ssa_def *channels;
129bf215546Sopenharmony_ci   nir_ssa_def *normalized;
130bf215546Sopenharmony_ci   nir_ssa_def *integer;
131bf215546Sopenharmony_ci   nir_ssa_def *clamp_uint;
132bf215546Sopenharmony_ci   nir_ssa_def *r11g11b10_or_sint;
133bf215546Sopenharmony_ci   nir_ssa_def *r9g9b9e5;
134bf215546Sopenharmony_ci   nir_ssa_def *bits1;
135bf215546Sopenharmony_ci   nir_ssa_def *bits2;
136bf215546Sopenharmony_ci   nir_ssa_def *bits3;
137bf215546Sopenharmony_ci   nir_ssa_def *bits4;
138bf215546Sopenharmony_ci   nir_ssa_def *swap;
139bf215546Sopenharmony_ci   nir_ssa_def *bits; //vec4
140bf215546Sopenharmony_ci};
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci/* must be under 16bytes / sizeof(vec4) / 128 bits) */
144bf215546Sopenharmony_cistruct pbo_data {
145bf215546Sopenharmony_ci   union {
146bf215546Sopenharmony_ci       struct {
147bf215546Sopenharmony_ci          struct {
148bf215546Sopenharmony_ci             uint16_t x, y;
149bf215546Sopenharmony_ci          };
150bf215546Sopenharmony_ci          struct {
151bf215546Sopenharmony_ci             uint16_t width, height;
152bf215546Sopenharmony_ci          };
153bf215546Sopenharmony_ci          struct {
154bf215546Sopenharmony_ci             uint16_t depth;
155bf215546Sopenharmony_ci             uint8_t invert : 1;
156bf215546Sopenharmony_ci             uint8_t blocksize : 7;
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci             uint8_t clamp_uint : 1;
159bf215546Sopenharmony_ci             uint8_t r11g11b10_or_sint : 1;
160bf215546Sopenharmony_ci             uint8_t r9g9b9e5 : 1;
161bf215546Sopenharmony_ci             uint8_t swap : 1;
162bf215546Sopenharmony_ci             uint16_t alignment : 2;
163bf215546Sopenharmony_ci             uint8_t dst_bit_size : 2; //8, 16, 32, 64
164bf215546Sopenharmony_ci          };
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci          struct {
167bf215546Sopenharmony_ci             uint8_t channels : 2;
168bf215546Sopenharmony_ci             uint8_t bits1 : 6;
169bf215546Sopenharmony_ci             uint8_t normalized : 1;
170bf215546Sopenharmony_ci             uint8_t integer : 1;
171bf215546Sopenharmony_ci             uint8_t bits2 : 6;
172bf215546Sopenharmony_ci             uint8_t bits3 : 6;
173bf215546Sopenharmony_ci             uint8_t pad1 : 2;
174bf215546Sopenharmony_ci             uint8_t bits4 : 6;
175bf215546Sopenharmony_ci             uint8_t pad2 : 2;
176bf215546Sopenharmony_ci          };
177bf215546Sopenharmony_ci      };
178bf215546Sopenharmony_ci      float vec[4];
179bf215546Sopenharmony_ci   };
180bf215546Sopenharmony_ci};
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci#define STRUCT_OFFSET(name) (offsetof(struct pbo_data, name) * 8)
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci#define STRUCT_BLOCK(offset, ...) \
186bf215546Sopenharmony_ci   do { \
187bf215546Sopenharmony_ci      assert(offset % 8 == 0); \
188bf215546Sopenharmony_ci      nir_ssa_def *block##offset = nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, (offset), 1, 8)); \
189bf215546Sopenharmony_ci      __VA_ARGS__ \
190bf215546Sopenharmony_ci   } while (0)
191bf215546Sopenharmony_ci#define STRUCT_MEMBER(blockoffset, name, offset, size, op, clamp) \
192bf215546Sopenharmony_ci   do { \
193bf215546Sopenharmony_ci      assert(offset + size <= 8); \
194bf215546Sopenharmony_ci      nir_ssa_def *val = nir_iand_imm(b, block##blockoffset, u_bit_consecutive(offset, size)); \
195bf215546Sopenharmony_ci      if (offset) \
196bf215546Sopenharmony_ci         val = nir_ushr_imm(b, val, offset); \
197bf215546Sopenharmony_ci      sd->name = op; \
198bf215546Sopenharmony_ci      if (clamp) \
199bf215546Sopenharmony_ci         sd->name = nir_umin(b, sd->name, nir_imm_int(b, clamp)); \
200bf215546Sopenharmony_ci   } while (0)
201bf215546Sopenharmony_ci#define STRUCT_MEMBER_SHIFTED_2BIT(blockoffset, name, offset, shift, clamp) \
202bf215546Sopenharmony_ci   STRUCT_MEMBER(blockoffset, name, offset, 2, nir_ishl(b, nir_imm_int(b, shift), val), clamp)
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci#define STRUCT_MEMBER_BOOL(blockoffset, name, offset) \
205bf215546Sopenharmony_ci   STRUCT_MEMBER(blockoffset, name, offset, 1, nir_ieq_imm(b, val, 1), 0)
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci/* this function extracts the conversion data from pbo_data using the
208bf215546Sopenharmony_ci * size annotations for each grouping. data is compacted into bitfields,
209bf215546Sopenharmony_ci * so bitwise operations must be used to "unpact" everything
210bf215546Sopenharmony_ci */
211bf215546Sopenharmony_cistatic void
212bf215546Sopenharmony_ciinit_pbo_shader_data(nir_builder *b, struct pbo_shader_data *sd)
213bf215546Sopenharmony_ci{
214bf215546Sopenharmony_ci   nir_variable *ubo = nir_variable_create(b->shader, nir_var_uniform, glsl_uvec4_type(), "offset");
215bf215546Sopenharmony_ci   nir_ssa_def *ubo_load = nir_load_var(b, ubo);
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci   sd->offset = nir_umin(b, nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(x), 2, 16)), nir_imm_int(b, 65535));
218bf215546Sopenharmony_ci   sd->range = nir_umin(b, nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(width), 3, 16)), nir_imm_int(b, 65535));
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci   STRUCT_BLOCK(80,
221bf215546Sopenharmony_ci      STRUCT_MEMBER_BOOL(80, invert, 0);
222bf215546Sopenharmony_ci      STRUCT_MEMBER(80, blocksize, 1, 7, nir_iadd_imm(b, val, 1), 128);
223bf215546Sopenharmony_ci   );
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci   STRUCT_BLOCK(88,
226bf215546Sopenharmony_ci      STRUCT_MEMBER_BOOL(88, clamp_uint, 0);
227bf215546Sopenharmony_ci      STRUCT_MEMBER_BOOL(88, r11g11b10_or_sint, 1);
228bf215546Sopenharmony_ci      STRUCT_MEMBER_BOOL(88, r9g9b9e5, 2);
229bf215546Sopenharmony_ci      STRUCT_MEMBER_BOOL(88, swap, 3);
230bf215546Sopenharmony_ci      STRUCT_MEMBER_SHIFTED_2BIT(88, alignment, 4, 1, 8);
231bf215546Sopenharmony_ci      STRUCT_MEMBER_SHIFTED_2BIT(88, dst_bit_size, 6, 8, 64);
232bf215546Sopenharmony_ci   );
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci   STRUCT_BLOCK(96,
235bf215546Sopenharmony_ci      STRUCT_MEMBER(96, channels, 0, 2, nir_iadd_imm(b, val, 1), 4);
236bf215546Sopenharmony_ci      STRUCT_MEMBER(96, bits1, 2, 6, val, 32);
237bf215546Sopenharmony_ci   );
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci   STRUCT_BLOCK(104,
240bf215546Sopenharmony_ci      STRUCT_MEMBER_BOOL(104, normalized, 0);
241bf215546Sopenharmony_ci      STRUCT_MEMBER_BOOL(104, integer, 1);
242bf215546Sopenharmony_ci      STRUCT_MEMBER(104, bits2, 2, 6, val, 32);
243bf215546Sopenharmony_ci   );
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci   STRUCT_BLOCK(112,
247bf215546Sopenharmony_ci      STRUCT_MEMBER(112, bits3, 0, 6, val, 32);
248bf215546Sopenharmony_ci   );
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci   STRUCT_BLOCK(120,
251bf215546Sopenharmony_ci      STRUCT_MEMBER(120, bits4, 0, 6, val, 32);
252bf215546Sopenharmony_ci   );
253bf215546Sopenharmony_ci   sd->bits = nir_vec4(b, sd->bits1, sd->bits2, sd->bits3, sd->bits4);
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci   /* clamp swap in the shader to enable better optimizing */
256bf215546Sopenharmony_ci   /* TODO?
257bf215546Sopenharmony_ci   sd->swap = nir_bcsel(b, nir_ior(b,
258bf215546Sopenharmony_ci                                   nir_ieq_imm(b, sd->blocksize, 8),
259bf215546Sopenharmony_ci                                   nir_bcsel(b,
260bf215546Sopenharmony_ci                                             nir_ieq_imm(b, sd->bits1, 8),
261bf215546Sopenharmony_ci                                             nir_bcsel(b,
262bf215546Sopenharmony_ci                                                       nir_uge(b, sd->channels, nir_imm_int(b, 2)),
263bf215546Sopenharmony_ci                                                       nir_bcsel(b,
264bf215546Sopenharmony_ci                                                                 nir_uge(b, sd->channels, nir_imm_int(b, 3)),
265bf215546Sopenharmony_ci                                                                 nir_bcsel(b,
266bf215546Sopenharmony_ci                                                                           nir_ieq(b, sd->channels, nir_imm_int(b, 4)),
267bf215546Sopenharmony_ci                                                                           nir_ball(b, nir_ieq(b, sd->bits, nir_imm_ivec4(b, 8, 8, 8, 8))),
268bf215546Sopenharmony_ci                                                                           nir_ball(b, nir_ieq(b, nir_channels(b, sd->bits, 7), nir_imm_ivec3(b, 8, 8, 8)))),
269bf215546Sopenharmony_ci                                                                 nir_ball(b, nir_ieq(b, nir_channels(b, sd->bits, 3), nir_imm_ivec2(b, 8, 8)))),
270bf215546Sopenharmony_ci                                                       nir_imm_bool(b, 0)),
271bf215546Sopenharmony_ci                                             nir_imm_bool(b, 0))),
272bf215546Sopenharmony_ci                           nir_imm_bool(b, 0),
273bf215546Sopenharmony_ci                           sd->swap);
274bf215546Sopenharmony_ci     */
275bf215546Sopenharmony_ci}
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_cistatic unsigned
278bf215546Sopenharmony_cifill_pbo_data(struct pbo_data *pd, enum pipe_format src_format, enum pipe_format dst_format, bool swap)
279bf215546Sopenharmony_ci{
280bf215546Sopenharmony_ci   unsigned bits[4] = {0};
281bf215546Sopenharmony_ci   bool weird_packed = false;
282bf215546Sopenharmony_ci   const struct util_format_description *dst_desc = util_format_description(dst_format);
283bf215546Sopenharmony_ci   bool is_8bit = true;
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   for (unsigned c = 0; c < 4; c++) {
286bf215546Sopenharmony_ci      bits[c] = dst_desc->channel[c].size;
287bf215546Sopenharmony_ci      if (c < dst_desc->nr_channels) {
288bf215546Sopenharmony_ci         weird_packed |= bits[c] != bits[0] || bits[c] % 8 != 0;
289bf215546Sopenharmony_ci         if (bits[c] != 8)
290bf215546Sopenharmony_ci            is_8bit = false;
291bf215546Sopenharmony_ci      }
292bf215546Sopenharmony_ci   }
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci   if (is_8bit || dst_desc->block.bits == 8)
295bf215546Sopenharmony_ci      swap = false;
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci   unsigned dst_bit_size = 0;
298bf215546Sopenharmony_ci   if (weird_packed) {
299bf215546Sopenharmony_ci      dst_bit_size = dst_desc->block.bits;
300bf215546Sopenharmony_ci   } else {
301bf215546Sopenharmony_ci      dst_bit_size = dst_desc->block.bits / dst_desc->nr_channels;
302bf215546Sopenharmony_ci   }
303bf215546Sopenharmony_ci   assert(dst_bit_size);
304bf215546Sopenharmony_ci   assert(dst_bit_size <= 64);
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci   pd->dst_bit_size = dst_bit_size >> 4;
307bf215546Sopenharmony_ci   pd->channels = dst_desc->nr_channels - 1;
308bf215546Sopenharmony_ci   pd->normalized = dst_desc->is_unorm || dst_desc->is_snorm;
309bf215546Sopenharmony_ci   pd->clamp_uint = dst_desc->is_unorm ||
310bf215546Sopenharmony_ci                    (util_format_is_pure_sint(dst_format) &&
311bf215546Sopenharmony_ci                     !util_format_is_pure_sint(src_format) &&
312bf215546Sopenharmony_ci                     !util_format_is_snorm(src_format)) ||
313bf215546Sopenharmony_ci                    util_format_is_pure_uint(dst_format);
314bf215546Sopenharmony_ci   pd->integer = util_format_is_pure_uint(dst_format) || util_format_is_pure_sint(dst_format);
315bf215546Sopenharmony_ci   pd->r11g11b10_or_sint = dst_format == PIPE_FORMAT_R11G11B10_FLOAT || util_format_is_pure_sint(dst_format);
316bf215546Sopenharmony_ci   pd->r9g9b9e5 = dst_format == PIPE_FORMAT_R9G9B9E5_FLOAT;
317bf215546Sopenharmony_ci   pd->bits1 = bits[0];
318bf215546Sopenharmony_ci   pd->bits2 = bits[1];
319bf215546Sopenharmony_ci   pd->bits3 = bits[2];
320bf215546Sopenharmony_ci   pd->bits4 = bits[3];
321bf215546Sopenharmony_ci   pd->swap = swap;
322bf215546Sopenharmony_ci
323bf215546Sopenharmony_ci   return weird_packed ? 1 : dst_desc->nr_channels;
324bf215546Sopenharmony_ci}
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_cistatic nir_ssa_def *
327bf215546Sopenharmony_ciget_buffer_offset(nir_builder *b, nir_ssa_def *coord, struct pbo_shader_data *sd)
328bf215546Sopenharmony_ci{
329bf215546Sopenharmony_ci/* from _mesa_image_offset():
330bf215546Sopenharmony_ci      offset = topOfImage
331bf215546Sopenharmony_ci               + (skippixels + column) * bytes_per_pixel
332bf215546Sopenharmony_ci               + (skiprows + row) * bytes_per_row
333bf215546Sopenharmony_ci               + (skipimages + img) * bytes_per_image;
334bf215546Sopenharmony_ci */
335bf215546Sopenharmony_ci   nir_ssa_def *bytes_per_row = nir_imul(b, nir_channel(b, sd->range, 0), sd->blocksize);
336bf215546Sopenharmony_ci   bytes_per_row = nir_bcsel(b, nir_ult(b, sd->alignment, nir_imm_int(b, 2)),
337bf215546Sopenharmony_ci                             bytes_per_row,
338bf215546Sopenharmony_ci                             nir_iand(b,
339bf215546Sopenharmony_ci                                      nir_isub(b, nir_iadd(b, bytes_per_row, sd->alignment), nir_imm_int(b, 1)),
340bf215546Sopenharmony_ci                                      nir_inot(b, nir_isub(b, sd->alignment, nir_imm_int(b, 1)))));
341bf215546Sopenharmony_ci   nir_ssa_def *bytes_per_image = nir_imul(b, bytes_per_row, nir_channel(b, sd->range, 1));
342bf215546Sopenharmony_ci   bytes_per_row = nir_bcsel(b, sd->invert,
343bf215546Sopenharmony_ci                             nir_isub(b, nir_imm_int(b, 0), bytes_per_row),
344bf215546Sopenharmony_ci                             bytes_per_row);
345bf215546Sopenharmony_ci   return nir_iadd(b,
346bf215546Sopenharmony_ci                   nir_imul(b, nir_channel(b, coord, 0), sd->blocksize),
347bf215546Sopenharmony_ci                   nir_iadd(b,
348bf215546Sopenharmony_ci                            nir_imul(b, nir_channel(b, coord, 1), bytes_per_row),
349bf215546Sopenharmony_ci                            nir_imul(b, nir_channel(b, coord, 2), bytes_per_image)));
350bf215546Sopenharmony_ci}
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_cistatic inline void
353bf215546Sopenharmony_ciwrite_ssbo(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset)
354bf215546Sopenharmony_ci{
355bf215546Sopenharmony_ci   nir_store_ssbo(b, pixel, nir_imm_zero(b, 1, 32), buffer_offset,
356bf215546Sopenharmony_ci                  .align_mul = pixel->bit_size / 8,
357bf215546Sopenharmony_ci                  .write_mask = (1 << pixel->num_components) - 1);
358bf215546Sopenharmony_ci}
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_cistatic void
361bf215546Sopenharmony_ciwrite_conversion(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd)
362bf215546Sopenharmony_ci{
363bf215546Sopenharmony_ci   nir_push_if(b, nir_ilt(b, sd->dst_bit_size, nir_imm_int(b, 32)));
364bf215546Sopenharmony_ci      nir_push_if(b, nir_ieq_imm(b, sd->dst_bit_size, 16));
365bf215546Sopenharmony_ci         write_ssbo(b, nir_u2u16(b, pixel), buffer_offset);
366bf215546Sopenharmony_ci      nir_push_else(b, NULL);
367bf215546Sopenharmony_ci         write_ssbo(b, nir_u2u8(b, pixel), buffer_offset);
368bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
369bf215546Sopenharmony_ci   nir_push_else(b, NULL);
370bf215546Sopenharmony_ci      write_ssbo(b, pixel, buffer_offset);
371bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
372bf215546Sopenharmony_ci}
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_cistatic nir_ssa_def *
375bf215546Sopenharmony_ciswap2(nir_builder *b, nir_ssa_def *src)
376bf215546Sopenharmony_ci{
377bf215546Sopenharmony_ci   /* dst[i] = (src[i] >> 8) | ((src[i] << 8) & 0xff00); */
378bf215546Sopenharmony_ci   return nir_ior(b,
379bf215546Sopenharmony_ci                  nir_ushr_imm(b, src, 8),
380bf215546Sopenharmony_ci                  nir_iand_imm(b, nir_ishl(b, src, nir_imm_int(b, 8)), 0xff00));
381bf215546Sopenharmony_ci}
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_cistatic nir_ssa_def *
384bf215546Sopenharmony_ciswap4(nir_builder *b, nir_ssa_def *src)
385bf215546Sopenharmony_ci{
386bf215546Sopenharmony_ci   /* a = (b >> 24) | ((b >> 8) & 0xff00) | ((b << 8) & 0xff0000) | ((b << 24) & 0xff000000); */
387bf215546Sopenharmony_ci   return nir_ior(b,
388bf215546Sopenharmony_ci                  /* (b >> 24) */
389bf215546Sopenharmony_ci                  nir_ushr_imm(b, src, 24),
390bf215546Sopenharmony_ci                  nir_ior(b,
391bf215546Sopenharmony_ci                          /* ((b >> 8) & 0xff00) */
392bf215546Sopenharmony_ci                          nir_iand(b, nir_ushr_imm(b, src, 8), nir_imm_int(b, 0xff00)),
393bf215546Sopenharmony_ci                          nir_ior(b,
394bf215546Sopenharmony_ci                                  /* ((b << 8) & 0xff0000) */
395bf215546Sopenharmony_ci                                  nir_iand(b, nir_ishl(b, src, nir_imm_int(b, 8)), nir_imm_int(b, 0xff0000)),
396bf215546Sopenharmony_ci                                  /* ((b << 24) & 0xff000000) */
397bf215546Sopenharmony_ci                                  nir_iand(b, nir_ishl(b, src, nir_imm_int(b, 24)), nir_imm_int(b, 0xff000000)))));
398bf215546Sopenharmony_ci}
399bf215546Sopenharmony_ci
400bf215546Sopenharmony_ci/* explode the cf to handle channel counts in the shader */
401bf215546Sopenharmony_cistatic void
402bf215546Sopenharmony_cigrab_components(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd, bool weird_packed)
403bf215546Sopenharmony_ci{
404bf215546Sopenharmony_ci   if (weird_packed) {
405bf215546Sopenharmony_ci      nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32));
406bf215546Sopenharmony_ci         write_conversion(b, nir_channels(b, pixel, 3), buffer_offset, sd);
407bf215546Sopenharmony_ci      nir_push_else(b, NULL);
408bf215546Sopenharmony_ci         write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd);
409bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
410bf215546Sopenharmony_ci   } else {
411bf215546Sopenharmony_ci      nir_push_if(b, nir_ieq_imm(b, sd->channels, 1));
412bf215546Sopenharmony_ci         write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd);
413bf215546Sopenharmony_ci      nir_push_else(b, NULL);
414bf215546Sopenharmony_ci         nir_push_if(b, nir_ieq_imm(b, sd->channels, 2));
415bf215546Sopenharmony_ci            write_conversion(b, nir_channels(b, pixel, (1 << 2) - 1), buffer_offset, sd);
416bf215546Sopenharmony_ci         nir_push_else(b, NULL);
417bf215546Sopenharmony_ci            nir_push_if(b, nir_ieq_imm(b, sd->channels, 3));
418bf215546Sopenharmony_ci               write_conversion(b, nir_channels(b, pixel, (1 << 3) - 1), buffer_offset, sd);
419bf215546Sopenharmony_ci            nir_push_else(b, NULL);
420bf215546Sopenharmony_ci               write_conversion(b, nir_channels(b, pixel, (1 << 4) - 1), buffer_offset, sd);
421bf215546Sopenharmony_ci            nir_pop_if(b, NULL);
422bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
423bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
424bf215546Sopenharmony_ci   }
425bf215546Sopenharmony_ci}
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_ci/* if byteswap is enabled, handle that and then write the components */
428bf215546Sopenharmony_cistatic void
429bf215546Sopenharmony_cihandle_swap(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset,
430bf215546Sopenharmony_ci            struct pbo_shader_data *sd, unsigned num_components, bool weird_packed)
431bf215546Sopenharmony_ci{
432bf215546Sopenharmony_ci   nir_push_if(b, sd->swap); {
433bf215546Sopenharmony_ci      nir_push_if(b, nir_ieq_imm(b, nir_udiv_imm(b, sd->blocksize, num_components), 2)); {
434bf215546Sopenharmony_ci         /* this is a single high/low swap per component */
435bf215546Sopenharmony_ci         nir_ssa_def *components[4];
436bf215546Sopenharmony_ci         for (unsigned i = 0; i < 4; i++)
437bf215546Sopenharmony_ci            components[i] = swap2(b, nir_channel(b, pixel, i));
438bf215546Sopenharmony_ci         nir_ssa_def *v = nir_vec(b, components, 4);
439bf215546Sopenharmony_ci         grab_components(b, v, buffer_offset, sd, weird_packed);
440bf215546Sopenharmony_ci      } nir_push_else(b, NULL); {
441bf215546Sopenharmony_ci         /* this is a pair of high/low swaps for each half of the component */
442bf215546Sopenharmony_ci         nir_ssa_def *components[4];
443bf215546Sopenharmony_ci         for (unsigned i = 0; i < 4; i++)
444bf215546Sopenharmony_ci            components[i] = swap4(b, nir_channel(b, pixel, i));
445bf215546Sopenharmony_ci         nir_ssa_def *v = nir_vec(b, components, 4);
446bf215546Sopenharmony_ci         grab_components(b, v, buffer_offset, sd, weird_packed);
447bf215546Sopenharmony_ci      } nir_pop_if(b, NULL);
448bf215546Sopenharmony_ci   } nir_push_else(b, NULL); {
449bf215546Sopenharmony_ci      /* swap disabled */
450bf215546Sopenharmony_ci      grab_components(b, pixel, buffer_offset, sd, weird_packed);
451bf215546Sopenharmony_ci   } nir_pop_if(b, NULL);
452bf215546Sopenharmony_ci}
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_cistatic nir_ssa_def *
455bf215546Sopenharmony_cicheck_for_weird_packing(nir_builder *b, struct pbo_shader_data *sd, unsigned component)
456bf215546Sopenharmony_ci{
457bf215546Sopenharmony_ci   nir_ssa_def *c = nir_channel(b, sd->bits, component - 1);
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci   return nir_bcsel(b,
460bf215546Sopenharmony_ci                    nir_ige(b, sd->channels, nir_imm_int(b, component)),
461bf215546Sopenharmony_ci                    nir_ior(b,
462bf215546Sopenharmony_ci                            nir_ine(b, c, sd->bits1),
463bf215546Sopenharmony_ci                            nir_ine(b, nir_imod(b, c, nir_imm_int(b, 8)), nir_imm_int(b, 0))),
464bf215546Sopenharmony_ci                    nir_imm_bool(b, 0));
465bf215546Sopenharmony_ci}
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci/* convenience function for clamping signed integers */
468bf215546Sopenharmony_cistatic inline nir_ssa_def *
469bf215546Sopenharmony_cinir_imin_imax(nir_builder *build, nir_ssa_def *src, nir_ssa_def *clamp_to_min, nir_ssa_def *clamp_to_max)
470bf215546Sopenharmony_ci{
471bf215546Sopenharmony_ci   return nir_imax(build, nir_imin(build, src, clamp_to_min), clamp_to_max);
472bf215546Sopenharmony_ci}
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_cistatic inline nir_ssa_def *
475bf215546Sopenharmony_cinir_format_float_to_unorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor)
476bf215546Sopenharmony_ci{
477bf215546Sopenharmony_ci   /* Clamp to the range [0, 1] */
478bf215546Sopenharmony_ci   f = nir_fsat(b, f);
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci   return nir_f2u32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
481bf215546Sopenharmony_ci}
482bf215546Sopenharmony_ci
483bf215546Sopenharmony_cistatic inline nir_ssa_def *
484bf215546Sopenharmony_cinir_format_float_to_snorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor)
485bf215546Sopenharmony_ci{
486bf215546Sopenharmony_ci   /* Clamp to the range [-1, 1] */
487bf215546Sopenharmony_ci   f = nir_fmin(b, nir_fmax(b, f, nir_imm_float(b, -1)), nir_imm_float(b, 1));
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci   return nir_f2i32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
490bf215546Sopenharmony_ci}
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_cistatic nir_ssa_def *
493bf215546Sopenharmony_ciclamp_and_mask(nir_builder *b, nir_ssa_def *src, nir_ssa_def *channels)
494bf215546Sopenharmony_ci{
495bf215546Sopenharmony_ci   nir_ssa_def *one = nir_imm_ivec4(b, 1, 0, 0, 0);
496bf215546Sopenharmony_ci   nir_ssa_def *two = nir_imm_ivec4(b, 1, 1, 0, 0);
497bf215546Sopenharmony_ci   nir_ssa_def *three = nir_imm_ivec4(b, 1, 1, 1, 0);
498bf215546Sopenharmony_ci   nir_ssa_def *four = nir_imm_ivec4(b, 1, 1, 1, 1);
499bf215546Sopenharmony_ci   /* avoid underflow by clamping to channel count */
500bf215546Sopenharmony_ci   src = nir_bcsel(b,
501bf215546Sopenharmony_ci                   nir_ieq(b, channels, one),
502bf215546Sopenharmony_ci                   nir_isub(b, src, one),
503bf215546Sopenharmony_ci                   nir_bcsel(b,
504bf215546Sopenharmony_ci                             nir_ieq_imm(b, channels, 2),
505bf215546Sopenharmony_ci                             nir_isub(b, src, two),
506bf215546Sopenharmony_ci                             nir_bcsel(b,
507bf215546Sopenharmony_ci                                       nir_ieq_imm(b, channels, 3),
508bf215546Sopenharmony_ci                                       nir_isub(b, src, three),
509bf215546Sopenharmony_ci                                       nir_isub(b, src, four))));
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_ci   return nir_mask(b, src, 32);
512bf215546Sopenharmony_ci}
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_cistatic void
515bf215546Sopenharmony_ciconvert_swap_write(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset,
516bf215546Sopenharmony_ci                   unsigned num_components,
517bf215546Sopenharmony_ci                   struct pbo_shader_data *sd)
518bf215546Sopenharmony_ci{
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci   nir_ssa_def *weird_packed = nir_ior(b,
521bf215546Sopenharmony_ci                                       nir_ior(b,
522bf215546Sopenharmony_ci                                               check_for_weird_packing(b, sd, 4),
523bf215546Sopenharmony_ci                                               check_for_weird_packing(b, sd, 3)),
524bf215546Sopenharmony_ci                                       check_for_weird_packing(b, sd, 2));
525bf215546Sopenharmony_ci   if (num_components == 1) {
526bf215546Sopenharmony_ci      nir_push_if(b, weird_packed);
527bf215546Sopenharmony_ci         nir_push_if(b, sd->r11g11b10_or_sint);
528bf215546Sopenharmony_ci            handle_swap(b, nir_pad_vec4(b, nir_format_pack_11f11f10f(b, pixel)), buffer_offset, sd, 1, true);
529bf215546Sopenharmony_ci         nir_push_else(b, NULL);
530bf215546Sopenharmony_ci            nir_push_if(b, sd->r9g9b9e5);
531bf215546Sopenharmony_ci               handle_swap(b, nir_pad_vec4(b, nir_format_pack_r9g9b9e5(b, pixel)), buffer_offset, sd, 1, true);
532bf215546Sopenharmony_ci            nir_push_else(b, NULL);
533bf215546Sopenharmony_ci               nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32)); { //PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
534bf215546Sopenharmony_ci                  nir_ssa_def *pack[2];
535bf215546Sopenharmony_ci                  pack[0] = nir_format_pack_uint_unmasked_ssa(b, nir_channel(b, pixel, 0), nir_channel(b, sd->bits, 0));
536bf215546Sopenharmony_ci                  pack[1] = nir_format_pack_uint_unmasked_ssa(b, nir_channels(b, pixel, 6), nir_channels(b, sd->bits, 6));
537bf215546Sopenharmony_ci                  handle_swap(b, nir_pad_vec4(b, nir_vec2(b, pack[0], pack[1])), buffer_offset, sd, 2, true);
538bf215546Sopenharmony_ci               } nir_push_else(b, NULL);
539bf215546Sopenharmony_ci                  handle_swap(b, nir_pad_vec4(b, nir_format_pack_uint_unmasked_ssa(b, pixel, sd->bits)), buffer_offset, sd, 1, true);
540bf215546Sopenharmony_ci               nir_pop_if(b, NULL);
541bf215546Sopenharmony_ci            nir_pop_if(b, NULL);
542bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
543bf215546Sopenharmony_ci      nir_push_else(b, NULL);
544bf215546Sopenharmony_ci         handle_swap(b, pixel, buffer_offset, sd, num_components, false);
545bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
546bf215546Sopenharmony_ci   } else {
547bf215546Sopenharmony_ci      nir_push_if(b, weird_packed);
548bf215546Sopenharmony_ci         handle_swap(b, pixel, buffer_offset, sd, num_components, true);
549bf215546Sopenharmony_ci      nir_push_else(b, NULL);
550bf215546Sopenharmony_ci         handle_swap(b, pixel, buffer_offset, sd, num_components, false);
551bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
552bf215546Sopenharmony_ci   }
553bf215546Sopenharmony_ci}
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_cistatic void
556bf215546Sopenharmony_cido_shader_conversion(nir_builder *b, nir_ssa_def *pixel,
557bf215546Sopenharmony_ci                     unsigned num_components,
558bf215546Sopenharmony_ci                     nir_ssa_def *coord, struct pbo_shader_data *sd)
559bf215546Sopenharmony_ci{
560bf215546Sopenharmony_ci   nir_ssa_def *buffer_offset = get_buffer_offset(b, coord, sd);
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci   nir_ssa_def *signed_bit_mask = clamp_and_mask(b, sd->bits, sd->channels);
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci#define CONVERT_SWAP_WRITE(PIXEL) \
565bf215546Sopenharmony_ci   convert_swap_write(b, PIXEL, buffer_offset, num_components, sd);
566bf215546Sopenharmony_ci   nir_push_if(b, sd->normalized);
567bf215546Sopenharmony_ci      nir_push_if(b, sd->clamp_uint); //unorm
568bf215546Sopenharmony_ci         CONVERT_SWAP_WRITE(nir_format_float_to_unorm_with_factor(b, pixel, nir_u2f32(b, nir_mask(b, sd->bits, 32))));
569bf215546Sopenharmony_ci      nir_push_else(b, NULL);
570bf215546Sopenharmony_ci         CONVERT_SWAP_WRITE(nir_format_float_to_snorm_with_factor(b, pixel, nir_u2f32(b, signed_bit_mask)));
571bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
572bf215546Sopenharmony_ci   nir_push_else(b, NULL);
573bf215546Sopenharmony_ci      nir_push_if(b, sd->integer);
574bf215546Sopenharmony_ci         nir_push_if(b, sd->r11g11b10_or_sint); //sint
575bf215546Sopenharmony_ci            nir_push_if(b, sd->clamp_uint); //uint -> sint
576bf215546Sopenharmony_ci               CONVERT_SWAP_WRITE(nir_umin(b, pixel, signed_bit_mask));
577bf215546Sopenharmony_ci            nir_push_else(b, NULL);
578bf215546Sopenharmony_ci               CONVERT_SWAP_WRITE(nir_imin_imax(b, pixel, signed_bit_mask, nir_isub(b, nir_ineg(b, signed_bit_mask), nir_imm_int(b, 1))));
579bf215546Sopenharmony_ci            nir_pop_if(b, NULL);
580bf215546Sopenharmony_ci         nir_push_else(b, NULL);
581bf215546Sopenharmony_ci            nir_push_if(b, sd->clamp_uint); //uint
582bf215546Sopenharmony_ci               /* nir_format_clamp_uint */
583bf215546Sopenharmony_ci               CONVERT_SWAP_WRITE(nir_umin(b, pixel, nir_mask(b, sd->bits, 32)));
584bf215546Sopenharmony_ci            nir_pop_if(b, NULL);
585bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
586bf215546Sopenharmony_ci      nir_push_else(b, NULL);
587bf215546Sopenharmony_ci         nir_push_if(b, nir_ieq_imm(b, sd->bits1, 16)); //half
588bf215546Sopenharmony_ci            CONVERT_SWAP_WRITE(nir_format_float_to_half(b, pixel));
589bf215546Sopenharmony_ci         nir_push_else(b, NULL);
590bf215546Sopenharmony_ci            CONVERT_SWAP_WRITE(pixel);
591bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
592bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
593bf215546Sopenharmony_ci}
594bf215546Sopenharmony_ci
595bf215546Sopenharmony_cistatic void *
596bf215546Sopenharmony_cicreate_conversion_shader(struct st_context *st, enum pipe_texture_target target, unsigned num_components)
597bf215546Sopenharmony_ci{
598bf215546Sopenharmony_ci   const nir_shader_compiler_options *options = st_get_nir_compiler_options(st, MESA_SHADER_COMPUTE);
599bf215546Sopenharmony_ci   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "%s", "convert");
600bf215546Sopenharmony_ci   b.shader->info.workgroup_size[0] = target != PIPE_TEXTURE_1D ? 8 : 64;
601bf215546Sopenharmony_ci   b.shader->info.workgroup_size[1] = target != PIPE_TEXTURE_1D ? 8 : 1;
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci   b.shader->info.workgroup_size[2] = 1;
604bf215546Sopenharmony_ci   b.shader->info.textures_used[0] = 1;
605bf215546Sopenharmony_ci   b.shader->info.num_ssbos = 1;
606bf215546Sopenharmony_ci   b.shader->num_uniforms = 2;
607bf215546Sopenharmony_ci   nir_variable_create(b.shader, nir_var_mem_ssbo, glsl_array_type(glsl_float_type(), 0, 4), "ssbo");
608bf215546Sopenharmony_ci   nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, st_pbo_sampler_type_for_target(target, ST_PBO_CONVERT_FLOAT), "sampler");
609bf215546Sopenharmony_ci   unsigned coord_components = glsl_get_sampler_coordinate_components(sampler->type);
610bf215546Sopenharmony_ci   sampler->data.explicit_binding = 1;
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci   struct pbo_shader_data sd;
613bf215546Sopenharmony_ci   init_pbo_shader_data(&b, &sd);
614bf215546Sopenharmony_ci
615bf215546Sopenharmony_ci   nir_ssa_def *bsize = nir_imm_ivec4(&b,
616bf215546Sopenharmony_ci                                      b.shader->info.workgroup_size[0],
617bf215546Sopenharmony_ci                                      b.shader->info.workgroup_size[1],
618bf215546Sopenharmony_ci                                      b.shader->info.workgroup_size[2],
619bf215546Sopenharmony_ci                                      0);
620bf215546Sopenharmony_ci   nir_ssa_def *wid = nir_load_workgroup_id(&b, 32);
621bf215546Sopenharmony_ci   nir_ssa_def *iid = nir_load_local_invocation_id(&b);
622bf215546Sopenharmony_ci   nir_ssa_def *tile = nir_imul(&b, wid, bsize);
623bf215546Sopenharmony_ci   nir_ssa_def *global_id = nir_iadd(&b, tile, iid);
624bf215546Sopenharmony_ci   nir_ssa_def *start = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), sd.offset);
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci   nir_ssa_def *coord;
627bf215546Sopenharmony_ci   if (coord_components < 3)
628bf215546Sopenharmony_ci      coord = start;
629bf215546Sopenharmony_ci   else {
630bf215546Sopenharmony_ci      /* pad offset vec with global_id to get correct z offset */
631bf215546Sopenharmony_ci      assert(coord_components == 3);
632bf215546Sopenharmony_ci      coord = nir_vec3(&b, nir_channel(&b, start, 0),
633bf215546Sopenharmony_ci                           nir_channel(&b, start, 1),
634bf215546Sopenharmony_ci                           nir_channel(&b, global_id, 2));
635bf215546Sopenharmony_ci   }
636bf215546Sopenharmony_ci   coord = nir_trim_vector(&b, coord, coord_components);
637bf215546Sopenharmony_ci   nir_ssa_def *offset = coord_components > 2 ?
638bf215546Sopenharmony_ci                         nir_pad_vector_imm_int(&b, sd.offset, 0, 3) :
639bf215546Sopenharmony_ci                         nir_trim_vector(&b, sd.offset, coord_components);
640bf215546Sopenharmony_ci   nir_ssa_def *range = nir_trim_vector(&b, sd.range, coord_components);
641bf215546Sopenharmony_ci   nir_ssa_def *max = nir_iadd(&b, offset, range);
642bf215546Sopenharmony_ci   nir_push_if(&b, nir_ball(&b, nir_ilt(&b, coord, max)));
643bf215546Sopenharmony_ci   nir_tex_instr *txf = nir_tex_instr_create(b.shader, 3);
644bf215546Sopenharmony_ci   txf->is_array = glsl_sampler_type_is_array(sampler->type);
645bf215546Sopenharmony_ci   txf->op = nir_texop_txf;
646bf215546Sopenharmony_ci   txf->sampler_dim = glsl_get_sampler_dim(sampler->type);
647bf215546Sopenharmony_ci   txf->dest_type = nir_type_float32;
648bf215546Sopenharmony_ci   txf->coord_components = coord_components;
649bf215546Sopenharmony_ci   txf->texture_index = 0;
650bf215546Sopenharmony_ci   txf->sampler_index = 0;
651bf215546Sopenharmony_ci   txf->src[0].src_type = nir_tex_src_coord;
652bf215546Sopenharmony_ci   txf->src[0].src = nir_src_for_ssa(coord);
653bf215546Sopenharmony_ci   txf->src[1].src_type = nir_tex_src_lod;
654bf215546Sopenharmony_ci   txf->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
655bf215546Sopenharmony_ci   txf->src[2].src_type = nir_tex_src_texture_deref;
656bf215546Sopenharmony_ci   nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler);
657bf215546Sopenharmony_ci   txf->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
658bf215546Sopenharmony_ci
659bf215546Sopenharmony_ci   nir_ssa_dest_init(&txf->instr, &txf->dest, 4, 32, NULL);
660bf215546Sopenharmony_ci   nir_builder_instr_insert(&b, &txf->instr);
661bf215546Sopenharmony_ci
662bf215546Sopenharmony_ci   /* pass the grid offset as the coord to get the zero-indexed buffer offset */
663bf215546Sopenharmony_ci   do_shader_conversion(&b, &txf->dest.ssa, num_components, global_id, &sd);
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci   nir_pop_if(&b, NULL);
666bf215546Sopenharmony_ci
667bf215546Sopenharmony_ci   nir_validate_shader(b.shader, NULL);
668bf215546Sopenharmony_ci   gl_nir_opts(b.shader);
669bf215546Sopenharmony_ci   return st_nir_finish_builtin_shader(st, b.shader);
670bf215546Sopenharmony_ci}
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_cistatic void
673bf215546Sopenharmony_ciinvert_swizzle(uint8_t *out, const uint8_t *in)
674bf215546Sopenharmony_ci{
675bf215546Sopenharmony_ci   /* First, default to all zeroes to prevent uninitialized junk */
676bf215546Sopenharmony_ci   for (unsigned c = 0; c < 4; ++c)
677bf215546Sopenharmony_ci      out[c] = PIPE_SWIZZLE_0;
678bf215546Sopenharmony_ci
679bf215546Sopenharmony_ci   /* Now "do" what the swizzle says */
680bf215546Sopenharmony_ci   for (unsigned c = 0; c < 4; ++c) {
681bf215546Sopenharmony_ci      unsigned char i = in[c];
682bf215546Sopenharmony_ci
683bf215546Sopenharmony_ci      /* Who cares? */
684bf215546Sopenharmony_ci      assert(PIPE_SWIZZLE_X == 0);
685bf215546Sopenharmony_ci      if (i > PIPE_SWIZZLE_W)
686bf215546Sopenharmony_ci         continue;
687bf215546Sopenharmony_ci      /* Invert */
688bf215546Sopenharmony_ci      unsigned idx = i - PIPE_SWIZZLE_X;
689bf215546Sopenharmony_ci      out[idx] = PIPE_SWIZZLE_X + c;
690bf215546Sopenharmony_ci   }
691bf215546Sopenharmony_ci}
692bf215546Sopenharmony_ci
693bf215546Sopenharmony_cistatic uint32_t
694bf215546Sopenharmony_cicompute_shader_key(enum pipe_texture_target target, unsigned num_components)
695bf215546Sopenharmony_ci{
696bf215546Sopenharmony_ci   uint8_t key_target[] = {
697bf215546Sopenharmony_ci      [PIPE_BUFFER] = UINT8_MAX,
698bf215546Sopenharmony_ci      [PIPE_TEXTURE_1D] = 1,
699bf215546Sopenharmony_ci      [PIPE_TEXTURE_2D] = 2,
700bf215546Sopenharmony_ci      [PIPE_TEXTURE_3D] = 3,
701bf215546Sopenharmony_ci      [PIPE_TEXTURE_CUBE] = 4,
702bf215546Sopenharmony_ci      [PIPE_TEXTURE_RECT] = UINT8_MAX,
703bf215546Sopenharmony_ci      [PIPE_TEXTURE_1D_ARRAY] = 5,
704bf215546Sopenharmony_ci      [PIPE_TEXTURE_2D_ARRAY] = 6,
705bf215546Sopenharmony_ci      [PIPE_TEXTURE_CUBE_ARRAY] = UINT8_MAX,
706bf215546Sopenharmony_ci   };
707bf215546Sopenharmony_ci   assert(target < ARRAY_SIZE(key_target));
708bf215546Sopenharmony_ci   assert(key_target[target] != UINT8_MAX);
709bf215546Sopenharmony_ci   return key_target[target] | (num_components << 3);
710bf215546Sopenharmony_ci}
711bf215546Sopenharmony_ci
712bf215546Sopenharmony_cistatic unsigned
713bf215546Sopenharmony_ciget_dim_from_target(enum pipe_texture_target target)
714bf215546Sopenharmony_ci{
715bf215546Sopenharmony_ci   switch (target) {
716bf215546Sopenharmony_ci   case PIPE_TEXTURE_1D:
717bf215546Sopenharmony_ci      return 1;
718bf215546Sopenharmony_ci   case PIPE_TEXTURE_2D_ARRAY:
719bf215546Sopenharmony_ci   case PIPE_TEXTURE_3D:
720bf215546Sopenharmony_ci      return 3;
721bf215546Sopenharmony_ci   default:
722bf215546Sopenharmony_ci      return 2;
723bf215546Sopenharmony_ci   }
724bf215546Sopenharmony_ci}
725bf215546Sopenharmony_ci
726bf215546Sopenharmony_cistatic enum pipe_texture_target
727bf215546Sopenharmony_ciget_target_from_texture(struct pipe_resource *src)
728bf215546Sopenharmony_ci{
729bf215546Sopenharmony_ci   enum pipe_texture_target view_target;
730bf215546Sopenharmony_ci   switch (src->target) {
731bf215546Sopenharmony_ci   case PIPE_TEXTURE_RECT:
732bf215546Sopenharmony_ci      view_target = PIPE_TEXTURE_2D;
733bf215546Sopenharmony_ci      break;
734bf215546Sopenharmony_ci   case PIPE_TEXTURE_CUBE:
735bf215546Sopenharmony_ci   case PIPE_TEXTURE_CUBE_ARRAY:
736bf215546Sopenharmony_ci      view_target = PIPE_TEXTURE_2D_ARRAY;
737bf215546Sopenharmony_ci      break;
738bf215546Sopenharmony_ci   default:
739bf215546Sopenharmony_ci      view_target = src->target;
740bf215546Sopenharmony_ci      break;
741bf215546Sopenharmony_ci   }
742bf215546Sopenharmony_ci   return view_target;
743bf215546Sopenharmony_ci}
744bf215546Sopenharmony_ci
745bf215546Sopenharmony_ci/* force swizzling behavior for sampling */
746bf215546Sopenharmony_cienum swizzle_clamp {
747bf215546Sopenharmony_ci   /* force component selection for named format */
748bf215546Sopenharmony_ci   SWIZZLE_CLAMP_LUMINANCE = 1,
749bf215546Sopenharmony_ci   SWIZZLE_CLAMP_ALPHA = 2,
750bf215546Sopenharmony_ci   SWIZZLE_CLAMP_LUMINANCE_ALPHA = 3,
751bf215546Sopenharmony_ci   SWIZZLE_CLAMP_INTENSITY = 4,
752bf215546Sopenharmony_ci   SWIZZLE_CLAMP_RGBX = 5,
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ci   /* select only 1 component */
755bf215546Sopenharmony_ci   SWIZZLE_CLAMP_GREEN = 8,
756bf215546Sopenharmony_ci   SWIZZLE_CLAMP_BLUE = 16,
757bf215546Sopenharmony_ci
758bf215546Sopenharmony_ci   /* reverse ordering for format emulation */
759bf215546Sopenharmony_ci   SWIZZLE_CLAMP_BGRA = 32,
760bf215546Sopenharmony_ci};
761bf215546Sopenharmony_ci
762bf215546Sopenharmony_cistatic bool
763bf215546Sopenharmony_cican_copy_direct(const struct gl_pixelstore_attrib *pack)
764bf215546Sopenharmony_ci{
765bf215546Sopenharmony_ci   return !(pack->RowLength ||
766bf215546Sopenharmony_ci            pack->SkipPixels ||
767bf215546Sopenharmony_ci            pack->SkipRows ||
768bf215546Sopenharmony_ci            pack->ImageHeight ||
769bf215546Sopenharmony_ci            pack->SkipImages);
770bf215546Sopenharmony_ci}
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_cistatic struct pipe_resource *
773bf215546Sopenharmony_cidownload_texture_compute(struct st_context *st,
774bf215546Sopenharmony_ci                         const struct gl_pixelstore_attrib *pack,
775bf215546Sopenharmony_ci                         GLint xoffset, GLint yoffset, GLint zoffset,
776bf215546Sopenharmony_ci                         GLsizei width, GLsizei height, GLint depth,
777bf215546Sopenharmony_ci                         unsigned level, unsigned layer,
778bf215546Sopenharmony_ci                         GLenum format, GLenum type,
779bf215546Sopenharmony_ci                         enum pipe_format src_format,
780bf215546Sopenharmony_ci                         enum pipe_texture_target view_target,
781bf215546Sopenharmony_ci                         struct pipe_resource *src,
782bf215546Sopenharmony_ci                         enum pipe_format dst_format,
783bf215546Sopenharmony_ci                         enum swizzle_clamp swizzle_clamp)
784bf215546Sopenharmony_ci{
785bf215546Sopenharmony_ci   struct pipe_context *pipe = st->pipe;
786bf215546Sopenharmony_ci   struct pipe_screen *screen = st->screen;
787bf215546Sopenharmony_ci   struct pipe_resource *dst = NULL;
788bf215546Sopenharmony_ci   unsigned dim = get_dim_from_target(view_target);
789bf215546Sopenharmony_ci
790bf215546Sopenharmony_ci   /* clamp 3d offsets based on slice */
791bf215546Sopenharmony_ci   if (view_target == PIPE_TEXTURE_3D)
792bf215546Sopenharmony_ci      zoffset += layer;
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_ci   unsigned num_components = 0;
795bf215546Sopenharmony_ci   /* Upload constants */
796bf215546Sopenharmony_ci   {
797bf215546Sopenharmony_ci      struct pipe_constant_buffer cb;
798bf215546Sopenharmony_ci      assert(view_target != PIPE_TEXTURE_1D_ARRAY || !zoffset);
799bf215546Sopenharmony_ci      struct pbo_data pd = {
800bf215546Sopenharmony_ci         .x = xoffset,
801bf215546Sopenharmony_ci         .y = view_target == PIPE_TEXTURE_1D_ARRAY ? 0 : yoffset,
802bf215546Sopenharmony_ci         .width = width, .height = height, .depth = depth,
803bf215546Sopenharmony_ci         .invert = pack->Invert,
804bf215546Sopenharmony_ci         .blocksize = util_format_get_blocksize(dst_format) - 1,
805bf215546Sopenharmony_ci         .alignment = ffs(MAX2(pack->Alignment, 1)) - 1,
806bf215546Sopenharmony_ci      };
807bf215546Sopenharmony_ci      num_components = fill_pbo_data(&pd, src_format, dst_format, pack->SwapBytes == 1);
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_ci      cb.buffer = NULL;
810bf215546Sopenharmony_ci      cb.user_buffer = &pd;
811bf215546Sopenharmony_ci      cb.buffer_offset = 0;
812bf215546Sopenharmony_ci      cb.buffer_size = sizeof(pd);
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_ci      pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &cb);
815bf215546Sopenharmony_ci   }
816bf215546Sopenharmony_ci
817bf215546Sopenharmony_ci   uint32_t hash_key = compute_shader_key(view_target, num_components);
818bf215546Sopenharmony_ci   assert(hash_key != 0);
819bf215546Sopenharmony_ci
820bf215546Sopenharmony_ci   struct hash_entry *he = _mesa_hash_table_search(st->pbo.shaders, (void*)(uintptr_t)hash_key);
821bf215546Sopenharmony_ci   void *cs;
822bf215546Sopenharmony_ci   if (!he) {
823bf215546Sopenharmony_ci      cs = create_conversion_shader(st, view_target, num_components);
824bf215546Sopenharmony_ci      he = _mesa_hash_table_insert(st->pbo.shaders, (void*)(uintptr_t)hash_key, cs);
825bf215546Sopenharmony_ci   }
826bf215546Sopenharmony_ci   cs = he->data;
827bf215546Sopenharmony_ci   assert(cs);
828bf215546Sopenharmony_ci   struct cso_context *cso = st->cso_context;
829bf215546Sopenharmony_ci
830bf215546Sopenharmony_ci   cso_save_compute_state(cso, CSO_BIT_COMPUTE_SHADER | CSO_BIT_COMPUTE_SAMPLERS);
831bf215546Sopenharmony_ci   cso_set_compute_shader_handle(cso, cs);
832bf215546Sopenharmony_ci
833bf215546Sopenharmony_ci   /* Set up the sampler_view */
834bf215546Sopenharmony_ci   {
835bf215546Sopenharmony_ci      struct pipe_sampler_view templ;
836bf215546Sopenharmony_ci      struct pipe_sampler_view *sampler_view;
837bf215546Sopenharmony_ci      struct pipe_sampler_state sampler = {0};
838bf215546Sopenharmony_ci      sampler.normalized_coords = true;
839bf215546Sopenharmony_ci      const struct pipe_sampler_state *samplers[1] = {&sampler};
840bf215546Sopenharmony_ci      const struct util_format_description *desc = util_format_description(dst_format);
841bf215546Sopenharmony_ci
842bf215546Sopenharmony_ci      u_sampler_view_default_template(&templ, src, src_format);
843bf215546Sopenharmony_ci      if (util_format_is_depth_or_stencil(dst_format)) {
844bf215546Sopenharmony_ci         templ.swizzle_r = PIPE_SWIZZLE_X;
845bf215546Sopenharmony_ci         templ.swizzle_g = PIPE_SWIZZLE_X;
846bf215546Sopenharmony_ci         templ.swizzle_b = PIPE_SWIZZLE_X;
847bf215546Sopenharmony_ci         templ.swizzle_a = PIPE_SWIZZLE_X;
848bf215546Sopenharmony_ci      } else {
849bf215546Sopenharmony_ci         uint8_t invswizzle[4];
850bf215546Sopenharmony_ci         const uint8_t *swizzle;
851bf215546Sopenharmony_ci
852bf215546Sopenharmony_ci         /* these swizzle output bits require explicit component selection/ordering */
853bf215546Sopenharmony_ci         if (swizzle_clamp & SWIZZLE_CLAMP_GREEN) {
854bf215546Sopenharmony_ci            for (unsigned i = 0; i < 4; i++)
855bf215546Sopenharmony_ci               invswizzle[i] = PIPE_SWIZZLE_Y;
856bf215546Sopenharmony_ci         } else if (swizzle_clamp & SWIZZLE_CLAMP_BLUE) {
857bf215546Sopenharmony_ci            for (unsigned i = 0; i < 4; i++)
858bf215546Sopenharmony_ci               invswizzle[i] = PIPE_SWIZZLE_Z;
859bf215546Sopenharmony_ci         } else {
860bf215546Sopenharmony_ci            if (swizzle_clamp & SWIZZLE_CLAMP_BGRA) {
861bf215546Sopenharmony_ci               if (util_format_get_nr_components(dst_format) == 3)
862bf215546Sopenharmony_ci                  swizzle = util_format_description(PIPE_FORMAT_B8G8R8_UNORM)->swizzle;
863bf215546Sopenharmony_ci               else
864bf215546Sopenharmony_ci                  swizzle = util_format_description(PIPE_FORMAT_B8G8R8A8_UNORM)->swizzle;
865bf215546Sopenharmony_ci            } else {
866bf215546Sopenharmony_ci               swizzle = desc->swizzle;
867bf215546Sopenharmony_ci            }
868bf215546Sopenharmony_ci            invert_swizzle(invswizzle, swizzle);
869bf215546Sopenharmony_ci         }
870bf215546Sopenharmony_ci         swizzle_clamp &= ~(SWIZZLE_CLAMP_BGRA | SWIZZLE_CLAMP_GREEN | SWIZZLE_CLAMP_BLUE);
871bf215546Sopenharmony_ci
872bf215546Sopenharmony_ci         /* these swizzle input modes clamp unused components to 0 and (sometimes) alpha to 1 */
873bf215546Sopenharmony_ci         switch (swizzle_clamp) {
874bf215546Sopenharmony_ci         case SWIZZLE_CLAMP_LUMINANCE:
875bf215546Sopenharmony_ci            if (util_format_is_luminance(dst_format))
876bf215546Sopenharmony_ci               break;
877bf215546Sopenharmony_ci            for (unsigned i = 0; i < 4; i++) {
878bf215546Sopenharmony_ci               if (invswizzle[i] != PIPE_SWIZZLE_X)
879bf215546Sopenharmony_ci                  invswizzle[i] = invswizzle[i] == PIPE_SWIZZLE_W ? PIPE_SWIZZLE_1 : PIPE_SWIZZLE_0;
880bf215546Sopenharmony_ci            }
881bf215546Sopenharmony_ci            break;
882bf215546Sopenharmony_ci         case SWIZZLE_CLAMP_ALPHA:
883bf215546Sopenharmony_ci            for (unsigned i = 0; i < 4; i++) {
884bf215546Sopenharmony_ci               if (invswizzle[i] != PIPE_SWIZZLE_W)
885bf215546Sopenharmony_ci                  invswizzle[i] = PIPE_SWIZZLE_0;
886bf215546Sopenharmony_ci            }
887bf215546Sopenharmony_ci            break;
888bf215546Sopenharmony_ci         case SWIZZLE_CLAMP_LUMINANCE_ALPHA:
889bf215546Sopenharmony_ci            if (util_format_is_luminance_alpha(dst_format))
890bf215546Sopenharmony_ci               break;
891bf215546Sopenharmony_ci            for (unsigned i = 0; i < 4; i++) {
892bf215546Sopenharmony_ci               if (invswizzle[i] != PIPE_SWIZZLE_X && invswizzle[i] != PIPE_SWIZZLE_W)
893bf215546Sopenharmony_ci                  invswizzle[i] = PIPE_SWIZZLE_0;
894bf215546Sopenharmony_ci            }
895bf215546Sopenharmony_ci            break;
896bf215546Sopenharmony_ci         case SWIZZLE_CLAMP_INTENSITY:
897bf215546Sopenharmony_ci            for (unsigned i = 0; i < 4; i++) {
898bf215546Sopenharmony_ci               if (invswizzle[i] == PIPE_SWIZZLE_W)
899bf215546Sopenharmony_ci                  invswizzle[i] = PIPE_SWIZZLE_1;
900bf215546Sopenharmony_ci               else if (invswizzle[i] != PIPE_SWIZZLE_X)
901bf215546Sopenharmony_ci                  invswizzle[i] = PIPE_SWIZZLE_0;
902bf215546Sopenharmony_ci            }
903bf215546Sopenharmony_ci            break;
904bf215546Sopenharmony_ci         case SWIZZLE_CLAMP_RGBX:
905bf215546Sopenharmony_ci            for (unsigned i = 0; i < 4; i++) {
906bf215546Sopenharmony_ci               if (invswizzle[i] == PIPE_SWIZZLE_W)
907bf215546Sopenharmony_ci                  invswizzle[i] = PIPE_SWIZZLE_1;
908bf215546Sopenharmony_ci            }
909bf215546Sopenharmony_ci            break;
910bf215546Sopenharmony_ci         default: break;
911bf215546Sopenharmony_ci         }
912bf215546Sopenharmony_ci         templ.swizzle_r = invswizzle[0];
913bf215546Sopenharmony_ci         templ.swizzle_g = invswizzle[1];
914bf215546Sopenharmony_ci         templ.swizzle_b = invswizzle[2];
915bf215546Sopenharmony_ci         templ.swizzle_a = invswizzle[3];
916bf215546Sopenharmony_ci      }
917bf215546Sopenharmony_ci      templ.target = view_target;
918bf215546Sopenharmony_ci      templ.u.tex.first_level = level;
919bf215546Sopenharmony_ci      templ.u.tex.last_level = level;
920bf215546Sopenharmony_ci
921bf215546Sopenharmony_ci      /* array textures expect to have array index provided */
922bf215546Sopenharmony_ci      if (view_target != PIPE_TEXTURE_3D && src->array_size) {
923bf215546Sopenharmony_ci         templ.u.tex.first_layer = layer;
924bf215546Sopenharmony_ci         if (view_target == PIPE_TEXTURE_1D_ARRAY) {
925bf215546Sopenharmony_ci            templ.u.tex.first_layer += yoffset;
926bf215546Sopenharmony_ci            templ.u.tex.last_layer = templ.u.tex.first_layer + height - 1;
927bf215546Sopenharmony_ci         } else {
928bf215546Sopenharmony_ci            templ.u.tex.first_layer += zoffset;
929bf215546Sopenharmony_ci            templ.u.tex.last_layer = templ.u.tex.first_layer + depth - 1;
930bf215546Sopenharmony_ci         }
931bf215546Sopenharmony_ci      }
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci      sampler_view = pipe->create_sampler_view(pipe, src, &templ);
934bf215546Sopenharmony_ci      if (sampler_view == NULL)
935bf215546Sopenharmony_ci         goto fail;
936bf215546Sopenharmony_ci
937bf215546Sopenharmony_ci      pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, false,
938bf215546Sopenharmony_ci                              &sampler_view);
939bf215546Sopenharmony_ci      st->state.num_sampler_views[PIPE_SHADER_COMPUTE] =
940bf215546Sopenharmony_ci         MAX2(st->state.num_sampler_views[PIPE_SHADER_COMPUTE], 1);
941bf215546Sopenharmony_ci
942bf215546Sopenharmony_ci      pipe_sampler_view_reference(&sampler_view, NULL);
943bf215546Sopenharmony_ci
944bf215546Sopenharmony_ci      cso_set_samplers(cso, PIPE_SHADER_COMPUTE, 1, samplers);
945bf215546Sopenharmony_ci   }
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci   /* Set up destination buffer */
948bf215546Sopenharmony_ci   unsigned img_stride = src->target == PIPE_TEXTURE_3D ||
949bf215546Sopenharmony_ci                         src->target == PIPE_TEXTURE_2D_ARRAY ||
950bf215546Sopenharmony_ci                         src->target == PIPE_TEXTURE_CUBE_ARRAY ?
951bf215546Sopenharmony_ci                         /* only use image stride for 3d images to avoid pulling in IMAGE_HEIGHT pixelstore */
952bf215546Sopenharmony_ci                         _mesa_image_image_stride(pack, width, height, format, type) :
953bf215546Sopenharmony_ci                         _mesa_image_row_stride(pack, width, format, type) * height;
954bf215546Sopenharmony_ci   unsigned buffer_size = (depth + (dim == 3 ? pack->SkipImages : 0)) * img_stride;
955bf215546Sopenharmony_ci   {
956bf215546Sopenharmony_ci      struct pipe_shader_buffer buffer;
957bf215546Sopenharmony_ci      memset(&buffer, 0, sizeof(buffer));
958bf215546Sopenharmony_ci      if (can_copy_direct(pack) && pack->BufferObj) {
959bf215546Sopenharmony_ci         dst = pack->BufferObj->buffer;
960bf215546Sopenharmony_ci         assert(pack->BufferObj->Size >= buffer_size);
961bf215546Sopenharmony_ci      } else {
962bf215546Sopenharmony_ci         dst = pipe_buffer_create(screen, PIPE_BIND_SHADER_BUFFER, PIPE_USAGE_STAGING, buffer_size);
963bf215546Sopenharmony_ci         if (!dst)
964bf215546Sopenharmony_ci            goto fail;
965bf215546Sopenharmony_ci      }
966bf215546Sopenharmony_ci      buffer.buffer = dst;
967bf215546Sopenharmony_ci      buffer.buffer_size = buffer_size;
968bf215546Sopenharmony_ci
969bf215546Sopenharmony_ci      pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, &buffer, 0x1);
970bf215546Sopenharmony_ci   }
971bf215546Sopenharmony_ci
972bf215546Sopenharmony_ci   struct pipe_grid_info info = { 0 };
973bf215546Sopenharmony_ci   info.block[0] = src->target != PIPE_TEXTURE_1D ? 8 : 64;
974bf215546Sopenharmony_ci   info.block[1] = src->target != PIPE_TEXTURE_1D ? 8 : 1;
975bf215546Sopenharmony_ci   info.last_block[0] = width % info.block[0];
976bf215546Sopenharmony_ci   info.last_block[1] = height % info.block[1];
977bf215546Sopenharmony_ci   info.block[2] = 1;
978bf215546Sopenharmony_ci   info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
979bf215546Sopenharmony_ci   info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
980bf215546Sopenharmony_ci   info.grid[2] = depth;
981bf215546Sopenharmony_ci
982bf215546Sopenharmony_ci   pipe->launch_grid(pipe, &info);
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_cifail:
985bf215546Sopenharmony_ci   cso_restore_compute_state(cso);
986bf215546Sopenharmony_ci
987bf215546Sopenharmony_ci   /* Unbind all because st/mesa won't do it if the current shader doesn't
988bf215546Sopenharmony_ci    * use them.
989bf215546Sopenharmony_ci    */
990bf215546Sopenharmony_ci   pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 0,
991bf215546Sopenharmony_ci                           st->state.num_sampler_views[PIPE_SHADER_COMPUTE],
992bf215546Sopenharmony_ci                           false, NULL);
993bf215546Sopenharmony_ci   st->state.num_sampler_views[PIPE_SHADER_COMPUTE] = 0;
994bf215546Sopenharmony_ci   pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, NULL, 0);
995bf215546Sopenharmony_ci
996bf215546Sopenharmony_ci   st->dirty |= ST_NEW_CS_CONSTANTS |
997bf215546Sopenharmony_ci                ST_NEW_CS_SSBOS |
998bf215546Sopenharmony_ci                ST_NEW_CS_SAMPLER_VIEWS;
999bf215546Sopenharmony_ci
1000bf215546Sopenharmony_ci   return dst;
1001bf215546Sopenharmony_ci}
1002bf215546Sopenharmony_ci
1003bf215546Sopenharmony_cistatic void
1004bf215546Sopenharmony_cicopy_converted_buffer(struct gl_context * ctx,
1005bf215546Sopenharmony_ci                    struct gl_pixelstore_attrib *pack,
1006bf215546Sopenharmony_ci                    enum pipe_texture_target view_target,
1007bf215546Sopenharmony_ci                    struct pipe_resource *dst, enum pipe_format dst_format,
1008bf215546Sopenharmony_ci                    GLint xoffset, GLint yoffset, GLint zoffset,
1009bf215546Sopenharmony_ci                    GLsizei width, GLsizei height, GLint depth,
1010bf215546Sopenharmony_ci                    GLenum format, GLenum type, void *pixels)
1011bf215546Sopenharmony_ci{
1012bf215546Sopenharmony_ci   struct pipe_transfer *xfer;
1013bf215546Sopenharmony_ci   struct st_context *st = st_context(ctx);
1014bf215546Sopenharmony_ci   unsigned dim = get_dim_from_target(view_target);
1015bf215546Sopenharmony_ci   uint8_t *map = pipe_buffer_map(st->pipe, dst, PIPE_MAP_READ | PIPE_MAP_ONCE, &xfer);
1016bf215546Sopenharmony_ci   if (!map)
1017bf215546Sopenharmony_ci      return;
1018bf215546Sopenharmony_ci
1019bf215546Sopenharmony_ci   pixels = _mesa_map_pbo_dest(ctx, pack, pixels);
1020bf215546Sopenharmony_ci   /* compute shader doesn't handle these to cut down on uniform size */
1021bf215546Sopenharmony_ci   if (!can_copy_direct(pack)) {
1022bf215546Sopenharmony_ci      if (view_target == PIPE_TEXTURE_1D_ARRAY) {
1023bf215546Sopenharmony_ci         depth = height;
1024bf215546Sopenharmony_ci         height = 1;
1025bf215546Sopenharmony_ci         zoffset = yoffset;
1026bf215546Sopenharmony_ci         yoffset = 0;
1027bf215546Sopenharmony_ci      }
1028bf215546Sopenharmony_ci      struct gl_pixelstore_attrib packing = *pack;
1029bf215546Sopenharmony_ci      memset(&packing.RowLength, 0, offsetof(struct gl_pixelstore_attrib, SwapBytes) - offsetof(struct gl_pixelstore_attrib, RowLength));
1030bf215546Sopenharmony_ci      for (unsigned z = 0; z < depth; z++) {
1031bf215546Sopenharmony_ci         for (unsigned y = 0; y < height; y++) {
1032bf215546Sopenharmony_ci            GLubyte *dst = _mesa_image_address(dim, pack, pixels,
1033bf215546Sopenharmony_ci                                       width, height, format, type,
1034bf215546Sopenharmony_ci                                       z, y, 0);
1035bf215546Sopenharmony_ci            GLubyte *srcpx = _mesa_image_address(dim, &packing, map,
1036bf215546Sopenharmony_ci                                                 width, height, format, type,
1037bf215546Sopenharmony_ci                                                 z, y, 0);
1038bf215546Sopenharmony_ci            util_streaming_load_memcpy(dst, srcpx, util_format_get_stride(dst_format, width));
1039bf215546Sopenharmony_ci         }
1040bf215546Sopenharmony_ci      }
1041bf215546Sopenharmony_ci   } else {
1042bf215546Sopenharmony_ci      /* direct copy for all other cases */
1043bf215546Sopenharmony_ci      util_streaming_load_memcpy(pixels, map, dst->width0);
1044bf215546Sopenharmony_ci   }
1045bf215546Sopenharmony_ci
1046bf215546Sopenharmony_ci   _mesa_unmap_pbo_dest(ctx, pack);
1047bf215546Sopenharmony_ci   pipe_buffer_unmap(st->pipe, xfer);
1048bf215546Sopenharmony_ci}
1049bf215546Sopenharmony_ci
1050bf215546Sopenharmony_cibool
1051bf215546Sopenharmony_cist_GetTexSubImage_shader(struct gl_context * ctx,
1052bf215546Sopenharmony_ci                         GLint xoffset, GLint yoffset, GLint zoffset,
1053bf215546Sopenharmony_ci                         GLsizei width, GLsizei height, GLint depth,
1054bf215546Sopenharmony_ci                         GLenum format, GLenum type, void * pixels,
1055bf215546Sopenharmony_ci                         struct gl_texture_image *texImage)
1056bf215546Sopenharmony_ci{
1057bf215546Sopenharmony_ci   struct st_context *st = st_context(ctx);
1058bf215546Sopenharmony_ci   struct pipe_screen *screen = st->screen;
1059bf215546Sopenharmony_ci   struct gl_texture_object *stObj = texImage->TexObject;
1060bf215546Sopenharmony_ci   struct pipe_resource *src = texImage->pt;
1061bf215546Sopenharmony_ci   struct pipe_resource *dst = NULL;
1062bf215546Sopenharmony_ci   enum pipe_format dst_format, src_format;
1063bf215546Sopenharmony_ci   unsigned level = (texImage->pt != stObj->pt ? 0 : texImage->Level) + texImage->TexObject->Attrib.MinLevel;
1064bf215546Sopenharmony_ci   unsigned layer = texImage->Face + texImage->TexObject->Attrib.MinLayer;
1065bf215546Sopenharmony_ci   enum pipe_texture_target view_target;
1066bf215546Sopenharmony_ci
1067bf215546Sopenharmony_ci   assert(!_mesa_is_format_etc2(texImage->TexFormat) &&
1068bf215546Sopenharmony_ci          !_mesa_is_format_astc_2d(texImage->TexFormat) &&
1069bf215546Sopenharmony_ci          texImage->TexFormat != MESA_FORMAT_ETC1_RGB8);
1070bf215546Sopenharmony_ci
1071bf215546Sopenharmony_ci   /* See if the texture format already matches the format and type,
1072bf215546Sopenharmony_ci    * in which case the memcpy-based fast path will be used. */
1073bf215546Sopenharmony_ci   if (_mesa_format_matches_format_and_type(texImage->TexFormat, format,
1074bf215546Sopenharmony_ci                                            type, ctx->Pack.SwapBytes, NULL)) {
1075bf215546Sopenharmony_ci      return false;
1076bf215546Sopenharmony_ci   }
1077bf215546Sopenharmony_ci   enum swizzle_clamp swizzle_clamp = 0;
1078bf215546Sopenharmony_ci   src_format = st_pbo_get_src_format(screen, stObj->surface_based ? stObj->surface_format : src->format, src);
1079bf215546Sopenharmony_ci   if (src_format == PIPE_FORMAT_NONE)
1080bf215546Sopenharmony_ci      return false;
1081bf215546Sopenharmony_ci
1082bf215546Sopenharmony_ci   if (texImage->_BaseFormat != _mesa_get_format_base_format(texImage->TexFormat)) {
1083bf215546Sopenharmony_ci      /* special handling for drivers that don't support these formats natively */
1084bf215546Sopenharmony_ci      if (texImage->_BaseFormat == GL_LUMINANCE)
1085bf215546Sopenharmony_ci         swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE;
1086bf215546Sopenharmony_ci      else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA)
1087bf215546Sopenharmony_ci         swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE_ALPHA;
1088bf215546Sopenharmony_ci      else if (texImage->_BaseFormat == GL_ALPHA)
1089bf215546Sopenharmony_ci         swizzle_clamp = SWIZZLE_CLAMP_ALPHA;
1090bf215546Sopenharmony_ci      else if (texImage->_BaseFormat == GL_INTENSITY)
1091bf215546Sopenharmony_ci         swizzle_clamp = SWIZZLE_CLAMP_INTENSITY;
1092bf215546Sopenharmony_ci      else if (texImage->_BaseFormat == GL_RGB)
1093bf215546Sopenharmony_ci         swizzle_clamp = SWIZZLE_CLAMP_RGBX;
1094bf215546Sopenharmony_ci   }
1095bf215546Sopenharmony_ci
1096bf215546Sopenharmony_ci   dst_format = st_pbo_get_dst_format(ctx, PIPE_BUFFER, src_format, false, format, type, 0);
1097bf215546Sopenharmony_ci
1098bf215546Sopenharmony_ci   if (dst_format == PIPE_FORMAT_NONE) {
1099bf215546Sopenharmony_ci      bool need_bgra_swizzle = false;
1100bf215546Sopenharmony_ci      dst_format = get_convert_format(ctx, src_format, format, type, &need_bgra_swizzle);
1101bf215546Sopenharmony_ci      if (dst_format == PIPE_FORMAT_NONE)
1102bf215546Sopenharmony_ci         return false;
1103bf215546Sopenharmony_ci      /* special swizzling for component selection */
1104bf215546Sopenharmony_ci      if (need_bgra_swizzle)
1105bf215546Sopenharmony_ci         swizzle_clamp |= SWIZZLE_CLAMP_BGRA;
1106bf215546Sopenharmony_ci      else if (format == GL_GREEN_INTEGER)
1107bf215546Sopenharmony_ci         swizzle_clamp |= SWIZZLE_CLAMP_GREEN;
1108bf215546Sopenharmony_ci      else if (format == GL_BLUE_INTEGER)
1109bf215546Sopenharmony_ci         swizzle_clamp |= SWIZZLE_CLAMP_BLUE;
1110bf215546Sopenharmony_ci   }
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci   /* check with the driver to see if memcpy is likely to be faster */
1113bf215546Sopenharmony_ci   if (!screen->is_compute_copy_faster(screen, src_format, dst_format, width, height, depth, true))
1114bf215546Sopenharmony_ci      return false;
1115bf215546Sopenharmony_ci
1116bf215546Sopenharmony_ci   view_target = get_target_from_texture(src);
1117bf215546Sopenharmony_ci   /* I don't know why this works
1118bf215546Sopenharmony_ci    * only for the texture rects
1119bf215546Sopenharmony_ci    * but that's how it is
1120bf215546Sopenharmony_ci    */
1121bf215546Sopenharmony_ci   if ((src->target != PIPE_TEXTURE_RECT &&
1122bf215546Sopenharmony_ci       /* this would need multiple samplerviews */
1123bf215546Sopenharmony_ci       ((util_format_is_depth_and_stencil(src_format) && util_format_is_depth_and_stencil(dst_format)) ||
1124bf215546Sopenharmony_ci       /* these format just doesn't work and science can't explain why */
1125bf215546Sopenharmony_ci       dst_format == PIPE_FORMAT_Z32_FLOAT)) ||
1126bf215546Sopenharmony_ci       /* L8 -> L32_FLOAT is another thinker */
1127bf215546Sopenharmony_ci       (!util_format_is_float(src_format) && dst_format == PIPE_FORMAT_L32_FLOAT))
1128bf215546Sopenharmony_ci      return false;
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_ci   dst = download_texture_compute(st, &ctx->Pack, xoffset, yoffset, zoffset, width, height, depth,
1131bf215546Sopenharmony_ci                                  level, layer, format, type, src_format, view_target, src, dst_format,
1132bf215546Sopenharmony_ci                                  swizzle_clamp);
1133bf215546Sopenharmony_ci
1134bf215546Sopenharmony_ci   if (!can_copy_direct(&ctx->Pack) || !ctx->Pack.BufferObj) {
1135bf215546Sopenharmony_ci      copy_converted_buffer(ctx, &ctx->Pack, view_target, dst, dst_format, xoffset, yoffset, zoffset,
1136bf215546Sopenharmony_ci                          width, height, depth, format, type, pixels);
1137bf215546Sopenharmony_ci
1138bf215546Sopenharmony_ci      pipe_resource_reference(&dst, NULL);
1139bf215546Sopenharmony_ci   }
1140bf215546Sopenharmony_ci
1141bf215546Sopenharmony_ci   return true;
1142bf215546Sopenharmony_ci}
1143bf215546Sopenharmony_ci
1144