1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
25  */
26 
27 #include <stdbool.h>
28 #include "main/image.h"
29 #include "main/pbo.h"
30 
31 #include "state_tracker/st_nir.h"
32 #include "state_tracker/st_format.h"
33 #include "state_tracker/st_pbo.h"
34 #include "state_tracker/st_texture.h"
35 #include "compiler/nir/nir_builder.h"
36 #include "compiler/nir/nir_format_convert.h"
37 #include "compiler/glsl/gl_nir.h"
38 #include "compiler/glsl/gl_nir_linker.h"
39 #include "util/u_sampler.h"
40 #include "util/streaming-load-memcpy.h"
41 
42 #define BGR_FORMAT(NAME) \
43     {{ \
44      [0] = PIPE_FORMAT_##NAME##_SNORM, \
45      [1] = PIPE_FORMAT_##NAME##_SINT, \
46     }, \
47     { \
48      [0] = PIPE_FORMAT_##NAME##_UNORM, \
49      [1] = PIPE_FORMAT_##NAME##_UINT, \
50     }}
51 
52 #define FORMAT(NAME, NAME16, NAME32) \
53    {{ \
54     [1] = PIPE_FORMAT_##NAME##_SNORM, \
55     [2] = PIPE_FORMAT_##NAME16##_SNORM, \
56     [4] = PIPE_FORMAT_##NAME32##_SNORM, \
57    }, \
58    { \
59     [1] = PIPE_FORMAT_##NAME##_UNORM, \
60     [2] = PIPE_FORMAT_##NAME16##_UNORM, \
61     [4] = PIPE_FORMAT_##NAME32##_UNORM, \
62    }}
63 
64 /* don't try these at home */
65 static enum pipe_format
get_convert_format(struct gl_context *ctx, enum pipe_format src_format, GLenum format, GLenum type, bool *need_bgra_swizzle)66 get_convert_format(struct gl_context *ctx,
67                 enum pipe_format src_format,
68                 GLenum format, GLenum type,
69                 bool *need_bgra_swizzle)
70 {
71    struct st_context *st = st_context(ctx);
72    GLint bpp = _mesa_bytes_per_pixel(format, type);
73    if (_mesa_is_depth_format(format) ||
74        format == GL_GREEN_INTEGER ||
75        format == GL_BLUE_INTEGER) {
76       switch (bpp) {
77       case 1:
78          return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R8_UINT : PIPE_FORMAT_R8_SINT;
79       case 2:
80          return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R16_UINT : PIPE_FORMAT_R16_SINT;
81       case 4:
82          return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R32_UINT : PIPE_FORMAT_R32_SINT;
83       }
84    }
85    mesa_format mformat = _mesa_tex_format_from_format_and_type(ctx, format, type);
86    enum pipe_format pformat = st_mesa_format_to_pipe_format(st, mformat);
87    if (!pformat) {
88       GLint dst_components = _mesa_components_in_format(format);
89       bpp /= dst_components;
90       if (format == GL_BGR || format == GL_BGRA) {
91          pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR ? GL_RGB : GL_RGBA, type, 0);
92          if (!pformat)
93             pformat = get_convert_format(ctx, src_format, format == GL_BGR ? GL_RGB : GL_RGBA, type, need_bgra_swizzle);
94          assert(pformat);
95          *need_bgra_swizzle = true;
96       } else if (format == GL_BGR_INTEGER || format == GL_BGRA_INTEGER) {
97          pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, 0);
98          if (!pformat)
99             pformat = get_convert_format(ctx, src_format, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, need_bgra_swizzle);
100          assert(pformat);
101          *need_bgra_swizzle = true;
102       } else {
103          /* [signed,unsigned][bpp] */
104          enum pipe_format rgb[5][2][5] = {
105             [1] = FORMAT(R8, R16, R32),
106             [2] = FORMAT(R8G8, R16G16, R32G32),
107             [3] = FORMAT(R8G8B8, R16G16B16, R32G32B32),
108             [4] = FORMAT(R8G8B8A8, R16G16B16A16, R32G32B32A32),
109          };
110          pformat = rgb[dst_components][_mesa_is_type_unsigned(type)][bpp];
111       }
112       assert(util_format_get_nr_components(pformat) == dst_components);
113    }
114    assert(pformat);
115    return pformat;
116 }
117 #undef BGR_FORMAT
118 #undef FORMAT
119 
120 
121 struct pbo_shader_data {
122    nir_ssa_def *offset;
123    nir_ssa_def *range;
124    nir_ssa_def *invert;
125    nir_ssa_def *blocksize;
126    nir_ssa_def *alignment;
127    nir_ssa_def *dst_bit_size;
128    nir_ssa_def *channels;
129    nir_ssa_def *normalized;
130    nir_ssa_def *integer;
131    nir_ssa_def *clamp_uint;
132    nir_ssa_def *r11g11b10_or_sint;
133    nir_ssa_def *r9g9b9e5;
134    nir_ssa_def *bits1;
135    nir_ssa_def *bits2;
136    nir_ssa_def *bits3;
137    nir_ssa_def *bits4;
138    nir_ssa_def *swap;
139    nir_ssa_def *bits; //vec4
140 };
141 
142 
143 /* must be under 16bytes / sizeof(vec4) / 128 bits) */
144 struct pbo_data {
145    union {
146        struct {
147           struct {
148              uint16_t x, y;
149           };
150           struct {
151              uint16_t width, height;
152           };
153           struct {
154              uint16_t depth;
155              uint8_t invert : 1;
156              uint8_t blocksize : 7;
157 
158              uint8_t clamp_uint : 1;
159              uint8_t r11g11b10_or_sint : 1;
160              uint8_t r9g9b9e5 : 1;
161              uint8_t swap : 1;
162              uint16_t alignment : 2;
163              uint8_t dst_bit_size : 2; //8, 16, 32, 64
164           };
165 
166           struct {
167              uint8_t channels : 2;
168              uint8_t bits1 : 6;
169              uint8_t normalized : 1;
170              uint8_t integer : 1;
171              uint8_t bits2 : 6;
172              uint8_t bits3 : 6;
173              uint8_t pad1 : 2;
174              uint8_t bits4 : 6;
175              uint8_t pad2 : 2;
176           };
177       };
178       float vec[4];
179    };
180 };
181 
182 
183 #define STRUCT_OFFSET(name) (offsetof(struct pbo_data, name) * 8)
184 
185 #define STRUCT_BLOCK(offset, ...) \
186    do { \
187       assert(offset % 8 == 0); \
188       nir_ssa_def *block##offset = nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, (offset), 1, 8)); \
189       __VA_ARGS__ \
190    } while (0)
191 #define STRUCT_MEMBER(blockoffset, name, offset, size, op, clamp) \
192    do { \
193       assert(offset + size <= 8); \
194       nir_ssa_def *val = nir_iand_imm(b, block##blockoffset, u_bit_consecutive(offset, size)); \
195       if (offset) \
196          val = nir_ushr_imm(b, val, offset); \
197       sd->name = op; \
198       if (clamp) \
199          sd->name = nir_umin(b, sd->name, nir_imm_int(b, clamp)); \
200    } while (0)
201 #define STRUCT_MEMBER_SHIFTED_2BIT(blockoffset, name, offset, shift, clamp) \
202    STRUCT_MEMBER(blockoffset, name, offset, 2, nir_ishl(b, nir_imm_int(b, shift), val), clamp)
203 
204 #define STRUCT_MEMBER_BOOL(blockoffset, name, offset) \
205    STRUCT_MEMBER(blockoffset, name, offset, 1, nir_ieq_imm(b, val, 1), 0)
206 
207 /* this function extracts the conversion data from pbo_data using the
208  * size annotations for each grouping. data is compacted into bitfields,
209  * so bitwise operations must be used to "unpact" everything
210  */
211 static void
init_pbo_shader_data(nir_builder *b, struct pbo_shader_data *sd)212 init_pbo_shader_data(nir_builder *b, struct pbo_shader_data *sd)
213 {
214    nir_variable *ubo = nir_variable_create(b->shader, nir_var_uniform, glsl_uvec4_type(), "offset");
215    nir_ssa_def *ubo_load = nir_load_var(b, ubo);
216 
217    sd->offset = nir_umin(b, nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(x), 2, 16)), nir_imm_int(b, 65535));
218    sd->range = nir_umin(b, nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(width), 3, 16)), nir_imm_int(b, 65535));
219 
220    STRUCT_BLOCK(80,
221       STRUCT_MEMBER_BOOL(80, invert, 0);
222       STRUCT_MEMBER(80, blocksize, 1, 7, nir_iadd_imm(b, val, 1), 128);
223    );
224 
225    STRUCT_BLOCK(88,
226       STRUCT_MEMBER_BOOL(88, clamp_uint, 0);
227       STRUCT_MEMBER_BOOL(88, r11g11b10_or_sint, 1);
228       STRUCT_MEMBER_BOOL(88, r9g9b9e5, 2);
229       STRUCT_MEMBER_BOOL(88, swap, 3);
230       STRUCT_MEMBER_SHIFTED_2BIT(88, alignment, 4, 1, 8);
231       STRUCT_MEMBER_SHIFTED_2BIT(88, dst_bit_size, 6, 8, 64);
232    );
233 
234    STRUCT_BLOCK(96,
235       STRUCT_MEMBER(96, channels, 0, 2, nir_iadd_imm(b, val, 1), 4);
236       STRUCT_MEMBER(96, bits1, 2, 6, val, 32);
237    );
238 
239    STRUCT_BLOCK(104,
240       STRUCT_MEMBER_BOOL(104, normalized, 0);
241       STRUCT_MEMBER_BOOL(104, integer, 1);
242       STRUCT_MEMBER(104, bits2, 2, 6, val, 32);
243    );
244 
245 
246    STRUCT_BLOCK(112,
247       STRUCT_MEMBER(112, bits3, 0, 6, val, 32);
248    );
249 
250    STRUCT_BLOCK(120,
251       STRUCT_MEMBER(120, bits4, 0, 6, val, 32);
252    );
253    sd->bits = nir_vec4(b, sd->bits1, sd->bits2, sd->bits3, sd->bits4);
254 
255    /* clamp swap in the shader to enable better optimizing */
256    /* TODO?
257    sd->swap = nir_bcsel(b, nir_ior(b,
258                                    nir_ieq_imm(b, sd->blocksize, 8),
259                                    nir_bcsel(b,
260                                              nir_ieq_imm(b, sd->bits1, 8),
261                                              nir_bcsel(b,
262                                                        nir_uge(b, sd->channels, nir_imm_int(b, 2)),
263                                                        nir_bcsel(b,
264                                                                  nir_uge(b, sd->channels, nir_imm_int(b, 3)),
265                                                                  nir_bcsel(b,
266                                                                            nir_ieq(b, sd->channels, nir_imm_int(b, 4)),
267                                                                            nir_ball(b, nir_ieq(b, sd->bits, nir_imm_ivec4(b, 8, 8, 8, 8))),
268                                                                            nir_ball(b, nir_ieq(b, nir_channels(b, sd->bits, 7), nir_imm_ivec3(b, 8, 8, 8)))),
269                                                                  nir_ball(b, nir_ieq(b, nir_channels(b, sd->bits, 3), nir_imm_ivec2(b, 8, 8)))),
270                                                        nir_imm_bool(b, 0)),
271                                              nir_imm_bool(b, 0))),
272                            nir_imm_bool(b, 0),
273                            sd->swap);
274      */
275 }
276 
277 static unsigned
fill_pbo_data(struct pbo_data *pd, enum pipe_format src_format, enum pipe_format dst_format, bool swap)278 fill_pbo_data(struct pbo_data *pd, enum pipe_format src_format, enum pipe_format dst_format, bool swap)
279 {
280    unsigned bits[4] = {0};
281    bool weird_packed = false;
282    const struct util_format_description *dst_desc = util_format_description(dst_format);
283    bool is_8bit = true;
284 
285    for (unsigned c = 0; c < 4; c++) {
286       bits[c] = dst_desc->channel[c].size;
287       if (c < dst_desc->nr_channels) {
288          weird_packed |= bits[c] != bits[0] || bits[c] % 8 != 0;
289          if (bits[c] != 8)
290             is_8bit = false;
291       }
292    }
293 
294    if (is_8bit || dst_desc->block.bits == 8)
295       swap = false;
296 
297    unsigned dst_bit_size = 0;
298    if (weird_packed) {
299       dst_bit_size = dst_desc->block.bits;
300    } else {
301       dst_bit_size = dst_desc->block.bits / dst_desc->nr_channels;
302    }
303    assert(dst_bit_size);
304    assert(dst_bit_size <= 64);
305 
306    pd->dst_bit_size = dst_bit_size >> 4;
307    pd->channels = dst_desc->nr_channels - 1;
308    pd->normalized = dst_desc->is_unorm || dst_desc->is_snorm;
309    pd->clamp_uint = dst_desc->is_unorm ||
310                     (util_format_is_pure_sint(dst_format) &&
311                      !util_format_is_pure_sint(src_format) &&
312                      !util_format_is_snorm(src_format)) ||
313                     util_format_is_pure_uint(dst_format);
314    pd->integer = util_format_is_pure_uint(dst_format) || util_format_is_pure_sint(dst_format);
315    pd->r11g11b10_or_sint = dst_format == PIPE_FORMAT_R11G11B10_FLOAT || util_format_is_pure_sint(dst_format);
316    pd->r9g9b9e5 = dst_format == PIPE_FORMAT_R9G9B9E5_FLOAT;
317    pd->bits1 = bits[0];
318    pd->bits2 = bits[1];
319    pd->bits3 = bits[2];
320    pd->bits4 = bits[3];
321    pd->swap = swap;
322 
323    return weird_packed ? 1 : dst_desc->nr_channels;
324 }
325 
326 static nir_ssa_def *
get_buffer_offset(nir_builder *b, nir_ssa_def *coord, struct pbo_shader_data *sd)327 get_buffer_offset(nir_builder *b, nir_ssa_def *coord, struct pbo_shader_data *sd)
328 {
329 /* from _mesa_image_offset():
330       offset = topOfImage
331                + (skippixels + column) * bytes_per_pixel
332                + (skiprows + row) * bytes_per_row
333                + (skipimages + img) * bytes_per_image;
334  */
335    nir_ssa_def *bytes_per_row = nir_imul(b, nir_channel(b, sd->range, 0), sd->blocksize);
336    bytes_per_row = nir_bcsel(b, nir_ult(b, sd->alignment, nir_imm_int(b, 2)),
337                              bytes_per_row,
338                              nir_iand(b,
339                                       nir_isub(b, nir_iadd(b, bytes_per_row, sd->alignment), nir_imm_int(b, 1)),
340                                       nir_inot(b, nir_isub(b, sd->alignment, nir_imm_int(b, 1)))));
341    nir_ssa_def *bytes_per_image = nir_imul(b, bytes_per_row, nir_channel(b, sd->range, 1));
342    bytes_per_row = nir_bcsel(b, sd->invert,
343                              nir_isub(b, nir_imm_int(b, 0), bytes_per_row),
344                              bytes_per_row);
345    return nir_iadd(b,
346                    nir_imul(b, nir_channel(b, coord, 0), sd->blocksize),
347                    nir_iadd(b,
348                             nir_imul(b, nir_channel(b, coord, 1), bytes_per_row),
349                             nir_imul(b, nir_channel(b, coord, 2), bytes_per_image)));
350 }
351 
352 static inline void
write_ssbo(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset)353 write_ssbo(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset)
354 {
355    nir_store_ssbo(b, pixel, nir_imm_zero(b, 1, 32), buffer_offset,
356                   .align_mul = pixel->bit_size / 8,
357                   .write_mask = (1 << pixel->num_components) - 1);
358 }
359 
360 static void
write_conversion(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd)361 write_conversion(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd)
362 {
363    nir_push_if(b, nir_ilt(b, sd->dst_bit_size, nir_imm_int(b, 32)));
364       nir_push_if(b, nir_ieq_imm(b, sd->dst_bit_size, 16));
365          write_ssbo(b, nir_u2u16(b, pixel), buffer_offset);
366       nir_push_else(b, NULL);
367          write_ssbo(b, nir_u2u8(b, pixel), buffer_offset);
368       nir_pop_if(b, NULL);
369    nir_push_else(b, NULL);
370       write_ssbo(b, pixel, buffer_offset);
371    nir_pop_if(b, NULL);
372 }
373 
374 static nir_ssa_def *
swap2(nir_builder *b, nir_ssa_def *src)375 swap2(nir_builder *b, nir_ssa_def *src)
376 {
377    /* dst[i] = (src[i] >> 8) | ((src[i] << 8) & 0xff00); */
378    return nir_ior(b,
379                   nir_ushr_imm(b, src, 8),
380                   nir_iand_imm(b, nir_ishl(b, src, nir_imm_int(b, 8)), 0xff00));
381 }
382 
383 static nir_ssa_def *
swap4(nir_builder *b, nir_ssa_def *src)384 swap4(nir_builder *b, nir_ssa_def *src)
385 {
386    /* a = (b >> 24) | ((b >> 8) & 0xff00) | ((b << 8) & 0xff0000) | ((b << 24) & 0xff000000); */
387    return nir_ior(b,
388                   /* (b >> 24) */
389                   nir_ushr_imm(b, src, 24),
390                   nir_ior(b,
391                           /* ((b >> 8) & 0xff00) */
392                           nir_iand(b, nir_ushr_imm(b, src, 8), nir_imm_int(b, 0xff00)),
393                           nir_ior(b,
394                                   /* ((b << 8) & 0xff0000) */
395                                   nir_iand(b, nir_ishl(b, src, nir_imm_int(b, 8)), nir_imm_int(b, 0xff0000)),
396                                   /* ((b << 24) & 0xff000000) */
397                                   nir_iand(b, nir_ishl(b, src, nir_imm_int(b, 24)), nir_imm_int(b, 0xff000000)))));
398 }
399 
400 /* explode the cf to handle channel counts in the shader */
401 static void
grab_components(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd, bool weird_packed)402 grab_components(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd, bool weird_packed)
403 {
404    if (weird_packed) {
405       nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32));
406          write_conversion(b, nir_channels(b, pixel, 3), buffer_offset, sd);
407       nir_push_else(b, NULL);
408          write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd);
409       nir_pop_if(b, NULL);
410    } else {
411       nir_push_if(b, nir_ieq_imm(b, sd->channels, 1));
412          write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd);
413       nir_push_else(b, NULL);
414          nir_push_if(b, nir_ieq_imm(b, sd->channels, 2));
415             write_conversion(b, nir_channels(b, pixel, (1 << 2) - 1), buffer_offset, sd);
416          nir_push_else(b, NULL);
417             nir_push_if(b, nir_ieq_imm(b, sd->channels, 3));
418                write_conversion(b, nir_channels(b, pixel, (1 << 3) - 1), buffer_offset, sd);
419             nir_push_else(b, NULL);
420                write_conversion(b, nir_channels(b, pixel, (1 << 4) - 1), buffer_offset, sd);
421             nir_pop_if(b, NULL);
422          nir_pop_if(b, NULL);
423       nir_pop_if(b, NULL);
424    }
425 }
426 
427 /* if byteswap is enabled, handle that and then write the components */
428 static void
handle_swap(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd, unsigned num_components, bool weird_packed)429 handle_swap(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset,
430             struct pbo_shader_data *sd, unsigned num_components, bool weird_packed)
431 {
432    nir_push_if(b, sd->swap); {
433       nir_push_if(b, nir_ieq_imm(b, nir_udiv_imm(b, sd->blocksize, num_components), 2)); {
434          /* this is a single high/low swap per component */
435          nir_ssa_def *components[4];
436          for (unsigned i = 0; i < 4; i++)
437             components[i] = swap2(b, nir_channel(b, pixel, i));
438          nir_ssa_def *v = nir_vec(b, components, 4);
439          grab_components(b, v, buffer_offset, sd, weird_packed);
440       } nir_push_else(b, NULL); {
441          /* this is a pair of high/low swaps for each half of the component */
442          nir_ssa_def *components[4];
443          for (unsigned i = 0; i < 4; i++)
444             components[i] = swap4(b, nir_channel(b, pixel, i));
445          nir_ssa_def *v = nir_vec(b, components, 4);
446          grab_components(b, v, buffer_offset, sd, weird_packed);
447       } nir_pop_if(b, NULL);
448    } nir_push_else(b, NULL); {
449       /* swap disabled */
450       grab_components(b, pixel, buffer_offset, sd, weird_packed);
451    } nir_pop_if(b, NULL);
452 }
453 
454 static nir_ssa_def *
check_for_weird_packing(nir_builder *b, struct pbo_shader_data *sd, unsigned component)455 check_for_weird_packing(nir_builder *b, struct pbo_shader_data *sd, unsigned component)
456 {
457    nir_ssa_def *c = nir_channel(b, sd->bits, component - 1);
458 
459    return nir_bcsel(b,
460                     nir_ige(b, sd->channels, nir_imm_int(b, component)),
461                     nir_ior(b,
462                             nir_ine(b, c, sd->bits1),
463                             nir_ine(b, nir_imod(b, c, nir_imm_int(b, 8)), nir_imm_int(b, 0))),
464                     nir_imm_bool(b, 0));
465 }
466 
467 /* convenience function for clamping signed integers */
468 static inline nir_ssa_def *
nir_imin_imax(nir_builder *build, nir_ssa_def *src, nir_ssa_def *clamp_to_min, nir_ssa_def *clamp_to_max)469 nir_imin_imax(nir_builder *build, nir_ssa_def *src, nir_ssa_def *clamp_to_min, nir_ssa_def *clamp_to_max)
470 {
471    return nir_imax(build, nir_imin(build, src, clamp_to_min), clamp_to_max);
472 }
473 
474 static inline nir_ssa_def *
nir_format_float_to_unorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor)475 nir_format_float_to_unorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor)
476 {
477    /* Clamp to the range [0, 1] */
478    f = nir_fsat(b, f);
479 
480    return nir_f2u32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
481 }
482 
483 static inline nir_ssa_def *
nir_format_float_to_snorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor)484 nir_format_float_to_snorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor)
485 {
486    /* Clamp to the range [-1, 1] */
487    f = nir_fmin(b, nir_fmax(b, f, nir_imm_float(b, -1)), nir_imm_float(b, 1));
488 
489    return nir_f2i32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
490 }
491 
492 static nir_ssa_def *
clamp_and_mask(nir_builder *b, nir_ssa_def *src, nir_ssa_def *channels)493 clamp_and_mask(nir_builder *b, nir_ssa_def *src, nir_ssa_def *channels)
494 {
495    nir_ssa_def *one = nir_imm_ivec4(b, 1, 0, 0, 0);
496    nir_ssa_def *two = nir_imm_ivec4(b, 1, 1, 0, 0);
497    nir_ssa_def *three = nir_imm_ivec4(b, 1, 1, 1, 0);
498    nir_ssa_def *four = nir_imm_ivec4(b, 1, 1, 1, 1);
499    /* avoid underflow by clamping to channel count */
500    src = nir_bcsel(b,
501                    nir_ieq(b, channels, one),
502                    nir_isub(b, src, one),
503                    nir_bcsel(b,
504                              nir_ieq_imm(b, channels, 2),
505                              nir_isub(b, src, two),
506                              nir_bcsel(b,
507                                        nir_ieq_imm(b, channels, 3),
508                                        nir_isub(b, src, three),
509                                        nir_isub(b, src, four))));
510 
511    return nir_mask(b, src, 32);
512 }
513 
514 static void
convert_swap_write(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, unsigned num_components, struct pbo_shader_data *sd)515 convert_swap_write(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset,
516                    unsigned num_components,
517                    struct pbo_shader_data *sd)
518 {
519 
520    nir_ssa_def *weird_packed = nir_ior(b,
521                                        nir_ior(b,
522                                                check_for_weird_packing(b, sd, 4),
523                                                check_for_weird_packing(b, sd, 3)),
524                                        check_for_weird_packing(b, sd, 2));
525    if (num_components == 1) {
526       nir_push_if(b, weird_packed);
527          nir_push_if(b, sd->r11g11b10_or_sint);
528             handle_swap(b, nir_pad_vec4(b, nir_format_pack_11f11f10f(b, pixel)), buffer_offset, sd, 1, true);
529          nir_push_else(b, NULL);
530             nir_push_if(b, sd->r9g9b9e5);
531                handle_swap(b, nir_pad_vec4(b, nir_format_pack_r9g9b9e5(b, pixel)), buffer_offset, sd, 1, true);
532             nir_push_else(b, NULL);
533                nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32)); { //PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
534                   nir_ssa_def *pack[2];
535                   pack[0] = nir_format_pack_uint_unmasked_ssa(b, nir_channel(b, pixel, 0), nir_channel(b, sd->bits, 0));
536                   pack[1] = nir_format_pack_uint_unmasked_ssa(b, nir_channels(b, pixel, 6), nir_channels(b, sd->bits, 6));
537                   handle_swap(b, nir_pad_vec4(b, nir_vec2(b, pack[0], pack[1])), buffer_offset, sd, 2, true);
538                } nir_push_else(b, NULL);
539                   handle_swap(b, nir_pad_vec4(b, nir_format_pack_uint_unmasked_ssa(b, pixel, sd->bits)), buffer_offset, sd, 1, true);
540                nir_pop_if(b, NULL);
541             nir_pop_if(b, NULL);
542          nir_pop_if(b, NULL);
543       nir_push_else(b, NULL);
544          handle_swap(b, pixel, buffer_offset, sd, num_components, false);
545       nir_pop_if(b, NULL);
546    } else {
547       nir_push_if(b, weird_packed);
548          handle_swap(b, pixel, buffer_offset, sd, num_components, true);
549       nir_push_else(b, NULL);
550          handle_swap(b, pixel, buffer_offset, sd, num_components, false);
551       nir_pop_if(b, NULL);
552    }
553 }
554 
555 static void
do_shader_conversion(nir_builder *b, nir_ssa_def *pixel, unsigned num_components, nir_ssa_def *coord, struct pbo_shader_data *sd)556 do_shader_conversion(nir_builder *b, nir_ssa_def *pixel,
557                      unsigned num_components,
558                      nir_ssa_def *coord, struct pbo_shader_data *sd)
559 {
560    nir_ssa_def *buffer_offset = get_buffer_offset(b, coord, sd);
561 
562    nir_ssa_def *signed_bit_mask = clamp_and_mask(b, sd->bits, sd->channels);
563 
564 #define CONVERT_SWAP_WRITE(PIXEL) \
565    convert_swap_write(b, PIXEL, buffer_offset, num_components, sd);
566    nir_push_if(b, sd->normalized);
567       nir_push_if(b, sd->clamp_uint); //unorm
568          CONVERT_SWAP_WRITE(nir_format_float_to_unorm_with_factor(b, pixel, nir_u2f32(b, nir_mask(b, sd->bits, 32))));
569       nir_push_else(b, NULL);
570          CONVERT_SWAP_WRITE(nir_format_float_to_snorm_with_factor(b, pixel, nir_u2f32(b, signed_bit_mask)));
571       nir_pop_if(b, NULL);
572    nir_push_else(b, NULL);
573       nir_push_if(b, sd->integer);
574          nir_push_if(b, sd->r11g11b10_or_sint); //sint
575             nir_push_if(b, sd->clamp_uint); //uint -> sint
576                CONVERT_SWAP_WRITE(nir_umin(b, pixel, signed_bit_mask));
577             nir_push_else(b, NULL);
578                CONVERT_SWAP_WRITE(nir_imin_imax(b, pixel, signed_bit_mask, nir_isub(b, nir_ineg(b, signed_bit_mask), nir_imm_int(b, 1))));
579             nir_pop_if(b, NULL);
580          nir_push_else(b, NULL);
581             nir_push_if(b, sd->clamp_uint); //uint
582                /* nir_format_clamp_uint */
583                CONVERT_SWAP_WRITE(nir_umin(b, pixel, nir_mask(b, sd->bits, 32)));
584             nir_pop_if(b, NULL);
585          nir_pop_if(b, NULL);
586       nir_push_else(b, NULL);
587          nir_push_if(b, nir_ieq_imm(b, sd->bits1, 16)); //half
588             CONVERT_SWAP_WRITE(nir_format_float_to_half(b, pixel));
589          nir_push_else(b, NULL);
590             CONVERT_SWAP_WRITE(pixel);
591          nir_pop_if(b, NULL);
592    nir_pop_if(b, NULL);
593 }
594 
595 static void *
create_conversion_shader(struct st_context *st, enum pipe_texture_target target, unsigned num_components)596 create_conversion_shader(struct st_context *st, enum pipe_texture_target target, unsigned num_components)
597 {
598    const nir_shader_compiler_options *options = st_get_nir_compiler_options(st, MESA_SHADER_COMPUTE);
599    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "%s", "convert");
600    b.shader->info.workgroup_size[0] = target != PIPE_TEXTURE_1D ? 8 : 64;
601    b.shader->info.workgroup_size[1] = target != PIPE_TEXTURE_1D ? 8 : 1;
602 
603    b.shader->info.workgroup_size[2] = 1;
604    b.shader->info.textures_used[0] = 1;
605    b.shader->info.num_ssbos = 1;
606    b.shader->num_uniforms = 2;
607    nir_variable_create(b.shader, nir_var_mem_ssbo, glsl_array_type(glsl_float_type(), 0, 4), "ssbo");
608    nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, st_pbo_sampler_type_for_target(target, ST_PBO_CONVERT_FLOAT), "sampler");
609    unsigned coord_components = glsl_get_sampler_coordinate_components(sampler->type);
610    sampler->data.explicit_binding = 1;
611 
612    struct pbo_shader_data sd;
613    init_pbo_shader_data(&b, &sd);
614 
615    nir_ssa_def *bsize = nir_imm_ivec4(&b,
616                                       b.shader->info.workgroup_size[0],
617                                       b.shader->info.workgroup_size[1],
618                                       b.shader->info.workgroup_size[2],
619                                       0);
620    nir_ssa_def *wid = nir_load_workgroup_id(&b, 32);
621    nir_ssa_def *iid = nir_load_local_invocation_id(&b);
622    nir_ssa_def *tile = nir_imul(&b, wid, bsize);
623    nir_ssa_def *global_id = nir_iadd(&b, tile, iid);
624    nir_ssa_def *start = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), sd.offset);
625 
626    nir_ssa_def *coord;
627    if (coord_components < 3)
628       coord = start;
629    else {
630       /* pad offset vec with global_id to get correct z offset */
631       assert(coord_components == 3);
632       coord = nir_vec3(&b, nir_channel(&b, start, 0),
633                            nir_channel(&b, start, 1),
634                            nir_channel(&b, global_id, 2));
635    }
636    coord = nir_trim_vector(&b, coord, coord_components);
637    nir_ssa_def *offset = coord_components > 2 ?
638                          nir_pad_vector_imm_int(&b, sd.offset, 0, 3) :
639                          nir_trim_vector(&b, sd.offset, coord_components);
640    nir_ssa_def *range = nir_trim_vector(&b, sd.range, coord_components);
641    nir_ssa_def *max = nir_iadd(&b, offset, range);
642    nir_push_if(&b, nir_ball(&b, nir_ilt(&b, coord, max)));
643    nir_tex_instr *txf = nir_tex_instr_create(b.shader, 3);
644    txf->is_array = glsl_sampler_type_is_array(sampler->type);
645    txf->op = nir_texop_txf;
646    txf->sampler_dim = glsl_get_sampler_dim(sampler->type);
647    txf->dest_type = nir_type_float32;
648    txf->coord_components = coord_components;
649    txf->texture_index = 0;
650    txf->sampler_index = 0;
651    txf->src[0].src_type = nir_tex_src_coord;
652    txf->src[0].src = nir_src_for_ssa(coord);
653    txf->src[1].src_type = nir_tex_src_lod;
654    txf->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
655    txf->src[2].src_type = nir_tex_src_texture_deref;
656    nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler);
657    txf->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
658 
659    nir_ssa_dest_init(&txf->instr, &txf->dest, 4, 32, NULL);
660    nir_builder_instr_insert(&b, &txf->instr);
661 
662    /* pass the grid offset as the coord to get the zero-indexed buffer offset */
663    do_shader_conversion(&b, &txf->dest.ssa, num_components, global_id, &sd);
664 
665    nir_pop_if(&b, NULL);
666 
667    nir_validate_shader(b.shader, NULL);
668    gl_nir_opts(b.shader);
669    return st_nir_finish_builtin_shader(st, b.shader);
670 }
671 
672 static void
invert_swizzle(uint8_t *out, const uint8_t *in)673 invert_swizzle(uint8_t *out, const uint8_t *in)
674 {
675    /* First, default to all zeroes to prevent uninitialized junk */
676    for (unsigned c = 0; c < 4; ++c)
677       out[c] = PIPE_SWIZZLE_0;
678 
679    /* Now "do" what the swizzle says */
680    for (unsigned c = 0; c < 4; ++c) {
681       unsigned char i = in[c];
682 
683       /* Who cares? */
684       assert(PIPE_SWIZZLE_X == 0);
685       if (i > PIPE_SWIZZLE_W)
686          continue;
687       /* Invert */
688       unsigned idx = i - PIPE_SWIZZLE_X;
689       out[idx] = PIPE_SWIZZLE_X + c;
690    }
691 }
692 
693 static uint32_t
compute_shader_key(enum pipe_texture_target target, unsigned num_components)694 compute_shader_key(enum pipe_texture_target target, unsigned num_components)
695 {
696    uint8_t key_target[] = {
697       [PIPE_BUFFER] = UINT8_MAX,
698       [PIPE_TEXTURE_1D] = 1,
699       [PIPE_TEXTURE_2D] = 2,
700       [PIPE_TEXTURE_3D] = 3,
701       [PIPE_TEXTURE_CUBE] = 4,
702       [PIPE_TEXTURE_RECT] = UINT8_MAX,
703       [PIPE_TEXTURE_1D_ARRAY] = 5,
704       [PIPE_TEXTURE_2D_ARRAY] = 6,
705       [PIPE_TEXTURE_CUBE_ARRAY] = UINT8_MAX,
706    };
707    assert(target < ARRAY_SIZE(key_target));
708    assert(key_target[target] != UINT8_MAX);
709    return key_target[target] | (num_components << 3);
710 }
711 
712 static unsigned
get_dim_from_target(enum pipe_texture_target target)713 get_dim_from_target(enum pipe_texture_target target)
714 {
715    switch (target) {
716    case PIPE_TEXTURE_1D:
717       return 1;
718    case PIPE_TEXTURE_2D_ARRAY:
719    case PIPE_TEXTURE_3D:
720       return 3;
721    default:
722       return 2;
723    }
724 }
725 
726 static enum pipe_texture_target
get_target_from_texture(struct pipe_resource *src)727 get_target_from_texture(struct pipe_resource *src)
728 {
729    enum pipe_texture_target view_target;
730    switch (src->target) {
731    case PIPE_TEXTURE_RECT:
732       view_target = PIPE_TEXTURE_2D;
733       break;
734    case PIPE_TEXTURE_CUBE:
735    case PIPE_TEXTURE_CUBE_ARRAY:
736       view_target = PIPE_TEXTURE_2D_ARRAY;
737       break;
738    default:
739       view_target = src->target;
740       break;
741    }
742    return view_target;
743 }
744 
745 /* force swizzling behavior for sampling */
746 enum swizzle_clamp {
747    /* force component selection for named format */
748    SWIZZLE_CLAMP_LUMINANCE = 1,
749    SWIZZLE_CLAMP_ALPHA = 2,
750    SWIZZLE_CLAMP_LUMINANCE_ALPHA = 3,
751    SWIZZLE_CLAMP_INTENSITY = 4,
752    SWIZZLE_CLAMP_RGBX = 5,
753 
754    /* select only 1 component */
755    SWIZZLE_CLAMP_GREEN = 8,
756    SWIZZLE_CLAMP_BLUE = 16,
757 
758    /* reverse ordering for format emulation */
759    SWIZZLE_CLAMP_BGRA = 32,
760 };
761 
762 static bool
can_copy_direct(const struct gl_pixelstore_attrib *pack)763 can_copy_direct(const struct gl_pixelstore_attrib *pack)
764 {
765    return !(pack->RowLength ||
766             pack->SkipPixels ||
767             pack->SkipRows ||
768             pack->ImageHeight ||
769             pack->SkipImages);
770 }
771 
772 static struct pipe_resource *
download_texture_compute(struct st_context *st, const struct gl_pixelstore_attrib *pack, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLint depth, unsigned level, unsigned layer, GLenum format, GLenum type, enum pipe_format src_format, enum pipe_texture_target view_target, struct pipe_resource *src, enum pipe_format dst_format, enum swizzle_clamp swizzle_clamp)773 download_texture_compute(struct st_context *st,
774                          const struct gl_pixelstore_attrib *pack,
775                          GLint xoffset, GLint yoffset, GLint zoffset,
776                          GLsizei width, GLsizei height, GLint depth,
777                          unsigned level, unsigned layer,
778                          GLenum format, GLenum type,
779                          enum pipe_format src_format,
780                          enum pipe_texture_target view_target,
781                          struct pipe_resource *src,
782                          enum pipe_format dst_format,
783                          enum swizzle_clamp swizzle_clamp)
784 {
785    struct pipe_context *pipe = st->pipe;
786    struct pipe_screen *screen = st->screen;
787    struct pipe_resource *dst = NULL;
788    unsigned dim = get_dim_from_target(view_target);
789 
790    /* clamp 3d offsets based on slice */
791    if (view_target == PIPE_TEXTURE_3D)
792       zoffset += layer;
793 
794    unsigned num_components = 0;
795    /* Upload constants */
796    {
797       struct pipe_constant_buffer cb;
798       assert(view_target != PIPE_TEXTURE_1D_ARRAY || !zoffset);
799       struct pbo_data pd = {
800          .x = xoffset,
801          .y = view_target == PIPE_TEXTURE_1D_ARRAY ? 0 : yoffset,
802          .width = width, .height = height, .depth = depth,
803          .invert = pack->Invert,
804          .blocksize = util_format_get_blocksize(dst_format) - 1,
805          .alignment = ffs(MAX2(pack->Alignment, 1)) - 1,
806       };
807       num_components = fill_pbo_data(&pd, src_format, dst_format, pack->SwapBytes == 1);
808 
809       cb.buffer = NULL;
810       cb.user_buffer = &pd;
811       cb.buffer_offset = 0;
812       cb.buffer_size = sizeof(pd);
813 
814       pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &cb);
815    }
816 
817    uint32_t hash_key = compute_shader_key(view_target, num_components);
818    assert(hash_key != 0);
819 
820    struct hash_entry *he = _mesa_hash_table_search(st->pbo.shaders, (void*)(uintptr_t)hash_key);
821    void *cs;
822    if (!he) {
823       cs = create_conversion_shader(st, view_target, num_components);
824       he = _mesa_hash_table_insert(st->pbo.shaders, (void*)(uintptr_t)hash_key, cs);
825    }
826    cs = he->data;
827    assert(cs);
828    struct cso_context *cso = st->cso_context;
829 
830    cso_save_compute_state(cso, CSO_BIT_COMPUTE_SHADER | CSO_BIT_COMPUTE_SAMPLERS);
831    cso_set_compute_shader_handle(cso, cs);
832 
833    /* Set up the sampler_view */
834    {
835       struct pipe_sampler_view templ;
836       struct pipe_sampler_view *sampler_view;
837       struct pipe_sampler_state sampler = {0};
838       sampler.normalized_coords = true;
839       const struct pipe_sampler_state *samplers[1] = {&sampler};
840       const struct util_format_description *desc = util_format_description(dst_format);
841 
842       u_sampler_view_default_template(&templ, src, src_format);
843       if (util_format_is_depth_or_stencil(dst_format)) {
844          templ.swizzle_r = PIPE_SWIZZLE_X;
845          templ.swizzle_g = PIPE_SWIZZLE_X;
846          templ.swizzle_b = PIPE_SWIZZLE_X;
847          templ.swizzle_a = PIPE_SWIZZLE_X;
848       } else {
849          uint8_t invswizzle[4];
850          const uint8_t *swizzle;
851 
852          /* these swizzle output bits require explicit component selection/ordering */
853          if (swizzle_clamp & SWIZZLE_CLAMP_GREEN) {
854             for (unsigned i = 0; i < 4; i++)
855                invswizzle[i] = PIPE_SWIZZLE_Y;
856          } else if (swizzle_clamp & SWIZZLE_CLAMP_BLUE) {
857             for (unsigned i = 0; i < 4; i++)
858                invswizzle[i] = PIPE_SWIZZLE_Z;
859          } else {
860             if (swizzle_clamp & SWIZZLE_CLAMP_BGRA) {
861                if (util_format_get_nr_components(dst_format) == 3)
862                   swizzle = util_format_description(PIPE_FORMAT_B8G8R8_UNORM)->swizzle;
863                else
864                   swizzle = util_format_description(PIPE_FORMAT_B8G8R8A8_UNORM)->swizzle;
865             } else {
866                swizzle = desc->swizzle;
867             }
868             invert_swizzle(invswizzle, swizzle);
869          }
870          swizzle_clamp &= ~(SWIZZLE_CLAMP_BGRA | SWIZZLE_CLAMP_GREEN | SWIZZLE_CLAMP_BLUE);
871 
872          /* these swizzle input modes clamp unused components to 0 and (sometimes) alpha to 1 */
873          switch (swizzle_clamp) {
874          case SWIZZLE_CLAMP_LUMINANCE:
875             if (util_format_is_luminance(dst_format))
876                break;
877             for (unsigned i = 0; i < 4; i++) {
878                if (invswizzle[i] != PIPE_SWIZZLE_X)
879                   invswizzle[i] = invswizzle[i] == PIPE_SWIZZLE_W ? PIPE_SWIZZLE_1 : PIPE_SWIZZLE_0;
880             }
881             break;
882          case SWIZZLE_CLAMP_ALPHA:
883             for (unsigned i = 0; i < 4; i++) {
884                if (invswizzle[i] != PIPE_SWIZZLE_W)
885                   invswizzle[i] = PIPE_SWIZZLE_0;
886             }
887             break;
888          case SWIZZLE_CLAMP_LUMINANCE_ALPHA:
889             if (util_format_is_luminance_alpha(dst_format))
890                break;
891             for (unsigned i = 0; i < 4; i++) {
892                if (invswizzle[i] != PIPE_SWIZZLE_X && invswizzle[i] != PIPE_SWIZZLE_W)
893                   invswizzle[i] = PIPE_SWIZZLE_0;
894             }
895             break;
896          case SWIZZLE_CLAMP_INTENSITY:
897             for (unsigned i = 0; i < 4; i++) {
898                if (invswizzle[i] == PIPE_SWIZZLE_W)
899                   invswizzle[i] = PIPE_SWIZZLE_1;
900                else if (invswizzle[i] != PIPE_SWIZZLE_X)
901                   invswizzle[i] = PIPE_SWIZZLE_0;
902             }
903             break;
904          case SWIZZLE_CLAMP_RGBX:
905             for (unsigned i = 0; i < 4; i++) {
906                if (invswizzle[i] == PIPE_SWIZZLE_W)
907                   invswizzle[i] = PIPE_SWIZZLE_1;
908             }
909             break;
910          default: break;
911          }
912          templ.swizzle_r = invswizzle[0];
913          templ.swizzle_g = invswizzle[1];
914          templ.swizzle_b = invswizzle[2];
915          templ.swizzle_a = invswizzle[3];
916       }
917       templ.target = view_target;
918       templ.u.tex.first_level = level;
919       templ.u.tex.last_level = level;
920 
921       /* array textures expect to have array index provided */
922       if (view_target != PIPE_TEXTURE_3D && src->array_size) {
923          templ.u.tex.first_layer = layer;
924          if (view_target == PIPE_TEXTURE_1D_ARRAY) {
925             templ.u.tex.first_layer += yoffset;
926             templ.u.tex.last_layer = templ.u.tex.first_layer + height - 1;
927          } else {
928             templ.u.tex.first_layer += zoffset;
929             templ.u.tex.last_layer = templ.u.tex.first_layer + depth - 1;
930          }
931       }
932 
933       sampler_view = pipe->create_sampler_view(pipe, src, &templ);
934       if (sampler_view == NULL)
935          goto fail;
936 
937       pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, false,
938                               &sampler_view);
939       st->state.num_sampler_views[PIPE_SHADER_COMPUTE] =
940          MAX2(st->state.num_sampler_views[PIPE_SHADER_COMPUTE], 1);
941 
942       pipe_sampler_view_reference(&sampler_view, NULL);
943 
944       cso_set_samplers(cso, PIPE_SHADER_COMPUTE, 1, samplers);
945    }
946 
947    /* Set up destination buffer */
948    unsigned img_stride = src->target == PIPE_TEXTURE_3D ||
949                          src->target == PIPE_TEXTURE_2D_ARRAY ||
950                          src->target == PIPE_TEXTURE_CUBE_ARRAY ?
951                          /* only use image stride for 3d images to avoid pulling in IMAGE_HEIGHT pixelstore */
952                          _mesa_image_image_stride(pack, width, height, format, type) :
953                          _mesa_image_row_stride(pack, width, format, type) * height;
954    unsigned buffer_size = (depth + (dim == 3 ? pack->SkipImages : 0)) * img_stride;
955    {
956       struct pipe_shader_buffer buffer;
957       memset(&buffer, 0, sizeof(buffer));
958       if (can_copy_direct(pack) && pack->BufferObj) {
959          dst = pack->BufferObj->buffer;
960          assert(pack->BufferObj->Size >= buffer_size);
961       } else {
962          dst = pipe_buffer_create(screen, PIPE_BIND_SHADER_BUFFER, PIPE_USAGE_STAGING, buffer_size);
963          if (!dst)
964             goto fail;
965       }
966       buffer.buffer = dst;
967       buffer.buffer_size = buffer_size;
968 
969       pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, &buffer, 0x1);
970    }
971 
972    struct pipe_grid_info info = { 0 };
973    info.block[0] = src->target != PIPE_TEXTURE_1D ? 8 : 64;
974    info.block[1] = src->target != PIPE_TEXTURE_1D ? 8 : 1;
975    info.last_block[0] = width % info.block[0];
976    info.last_block[1] = height % info.block[1];
977    info.block[2] = 1;
978    info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
979    info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
980    info.grid[2] = depth;
981 
982    pipe->launch_grid(pipe, &info);
983 
984 fail:
985    cso_restore_compute_state(cso);
986 
987    /* Unbind all because st/mesa won't do it if the current shader doesn't
988     * use them.
989     */
990    pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 0,
991                            st->state.num_sampler_views[PIPE_SHADER_COMPUTE],
992                            false, NULL);
993    st->state.num_sampler_views[PIPE_SHADER_COMPUTE] = 0;
994    pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, NULL, 0);
995 
996    st->dirty |= ST_NEW_CS_CONSTANTS |
997                 ST_NEW_CS_SSBOS |
998                 ST_NEW_CS_SAMPLER_VIEWS;
999 
1000    return dst;
1001 }
1002 
1003 static void
copy_converted_buffer(struct gl_context * ctx, struct gl_pixelstore_attrib *pack, enum pipe_texture_target view_target, struct pipe_resource *dst, enum pipe_format dst_format, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLint depth, GLenum format, GLenum type, void *pixels)1004 copy_converted_buffer(struct gl_context * ctx,
1005                     struct gl_pixelstore_attrib *pack,
1006                     enum pipe_texture_target view_target,
1007                     struct pipe_resource *dst, enum pipe_format dst_format,
1008                     GLint xoffset, GLint yoffset, GLint zoffset,
1009                     GLsizei width, GLsizei height, GLint depth,
1010                     GLenum format, GLenum type, void *pixels)
1011 {
1012    struct pipe_transfer *xfer;
1013    struct st_context *st = st_context(ctx);
1014    unsigned dim = get_dim_from_target(view_target);
1015    uint8_t *map = pipe_buffer_map(st->pipe, dst, PIPE_MAP_READ | PIPE_MAP_ONCE, &xfer);
1016    if (!map)
1017       return;
1018 
1019    pixels = _mesa_map_pbo_dest(ctx, pack, pixels);
1020    /* compute shader doesn't handle these to cut down on uniform size */
1021    if (!can_copy_direct(pack)) {
1022       if (view_target == PIPE_TEXTURE_1D_ARRAY) {
1023          depth = height;
1024          height = 1;
1025          zoffset = yoffset;
1026          yoffset = 0;
1027       }
1028       struct gl_pixelstore_attrib packing = *pack;
1029       memset(&packing.RowLength, 0, offsetof(struct gl_pixelstore_attrib, SwapBytes) - offsetof(struct gl_pixelstore_attrib, RowLength));
1030       for (unsigned z = 0; z < depth; z++) {
1031          for (unsigned y = 0; y < height; y++) {
1032             GLubyte *dst = _mesa_image_address(dim, pack, pixels,
1033                                        width, height, format, type,
1034                                        z, y, 0);
1035             GLubyte *srcpx = _mesa_image_address(dim, &packing, map,
1036                                                  width, height, format, type,
1037                                                  z, y, 0);
1038             util_streaming_load_memcpy(dst, srcpx, util_format_get_stride(dst_format, width));
1039          }
1040       }
1041    } else {
1042       /* direct copy for all other cases */
1043       util_streaming_load_memcpy(pixels, map, dst->width0);
1044    }
1045 
1046    _mesa_unmap_pbo_dest(ctx, pack);
1047    pipe_buffer_unmap(st->pipe, xfer);
1048 }
1049 
1050 bool
st_GetTexSubImage_shader(struct gl_context * ctx, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLint depth, GLenum format, GLenum type, void * pixels, struct gl_texture_image *texImage)1051 st_GetTexSubImage_shader(struct gl_context * ctx,
1052                          GLint xoffset, GLint yoffset, GLint zoffset,
1053                          GLsizei width, GLsizei height, GLint depth,
1054                          GLenum format, GLenum type, void * pixels,
1055                          struct gl_texture_image *texImage)
1056 {
1057    struct st_context *st = st_context(ctx);
1058    struct pipe_screen *screen = st->screen;
1059    struct gl_texture_object *stObj = texImage->TexObject;
1060    struct pipe_resource *src = texImage->pt;
1061    struct pipe_resource *dst = NULL;
1062    enum pipe_format dst_format, src_format;
1063    unsigned level = (texImage->pt != stObj->pt ? 0 : texImage->Level) + texImage->TexObject->Attrib.MinLevel;
1064    unsigned layer = texImage->Face + texImage->TexObject->Attrib.MinLayer;
1065    enum pipe_texture_target view_target;
1066 
1067    assert(!_mesa_is_format_etc2(texImage->TexFormat) &&
1068           !_mesa_is_format_astc_2d(texImage->TexFormat) &&
1069           texImage->TexFormat != MESA_FORMAT_ETC1_RGB8);
1070 
1071    /* See if the texture format already matches the format and type,
1072     * in which case the memcpy-based fast path will be used. */
1073    if (_mesa_format_matches_format_and_type(texImage->TexFormat, format,
1074                                             type, ctx->Pack.SwapBytes, NULL)) {
1075       return false;
1076    }
1077    enum swizzle_clamp swizzle_clamp = 0;
1078    src_format = st_pbo_get_src_format(screen, stObj->surface_based ? stObj->surface_format : src->format, src);
1079    if (src_format == PIPE_FORMAT_NONE)
1080       return false;
1081 
1082    if (texImage->_BaseFormat != _mesa_get_format_base_format(texImage->TexFormat)) {
1083       /* special handling for drivers that don't support these formats natively */
1084       if (texImage->_BaseFormat == GL_LUMINANCE)
1085          swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE;
1086       else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA)
1087          swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE_ALPHA;
1088       else if (texImage->_BaseFormat == GL_ALPHA)
1089          swizzle_clamp = SWIZZLE_CLAMP_ALPHA;
1090       else if (texImage->_BaseFormat == GL_INTENSITY)
1091          swizzle_clamp = SWIZZLE_CLAMP_INTENSITY;
1092       else if (texImage->_BaseFormat == GL_RGB)
1093          swizzle_clamp = SWIZZLE_CLAMP_RGBX;
1094    }
1095 
1096    dst_format = st_pbo_get_dst_format(ctx, PIPE_BUFFER, src_format, false, format, type, 0);
1097 
1098    if (dst_format == PIPE_FORMAT_NONE) {
1099       bool need_bgra_swizzle = false;
1100       dst_format = get_convert_format(ctx, src_format, format, type, &need_bgra_swizzle);
1101       if (dst_format == PIPE_FORMAT_NONE)
1102          return false;
1103       /* special swizzling for component selection */
1104       if (need_bgra_swizzle)
1105          swizzle_clamp |= SWIZZLE_CLAMP_BGRA;
1106       else if (format == GL_GREEN_INTEGER)
1107          swizzle_clamp |= SWIZZLE_CLAMP_GREEN;
1108       else if (format == GL_BLUE_INTEGER)
1109          swizzle_clamp |= SWIZZLE_CLAMP_BLUE;
1110    }
1111 
1112    /* check with the driver to see if memcpy is likely to be faster */
1113    if (!screen->is_compute_copy_faster(screen, src_format, dst_format, width, height, depth, true))
1114       return false;
1115 
1116    view_target = get_target_from_texture(src);
1117    /* I don't know why this works
1118     * only for the texture rects
1119     * but that's how it is
1120     */
1121    if ((src->target != PIPE_TEXTURE_RECT &&
1122        /* this would need multiple samplerviews */
1123        ((util_format_is_depth_and_stencil(src_format) && util_format_is_depth_and_stencil(dst_format)) ||
1124        /* these format just doesn't work and science can't explain why */
1125        dst_format == PIPE_FORMAT_Z32_FLOAT)) ||
1126        /* L8 -> L32_FLOAT is another thinker */
1127        (!util_format_is_float(src_format) && dst_format == PIPE_FORMAT_L32_FLOAT))
1128       return false;
1129 
1130    dst = download_texture_compute(st, &ctx->Pack, xoffset, yoffset, zoffset, width, height, depth,
1131                                   level, layer, format, type, src_format, view_target, src, dst_format,
1132                                   swizzle_clamp);
1133 
1134    if (!can_copy_direct(&ctx->Pack) || !ctx->Pack.BufferObj) {
1135       copy_converted_buffer(ctx, &ctx->Pack, view_target, dst, dst_format, xoffset, yoffset, zoffset,
1136                           width, height, depth, format, type, pixels);
1137 
1138       pipe_resource_reference(&dst, NULL);
1139    }
1140 
1141    return true;
1142 }
1143 
1144