1/*
2 * Copyright 2016 Red Hat.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "util/u_inlines.h"
24#include "util/u_math.h"
25#include "util/u_memory.h"
26#include "pipe/p_shader_tokens.h"
27#include "draw/draw_context.h"
28#include "draw/draw_vertex.h"
29#include "sp_context.h"
30#include "sp_screen.h"
31#include "sp_state.h"
32#include "sp_texture.h"
33#include "sp_tex_sample.h"
34#include "sp_tex_tile_cache.h"
35#include "tgsi/tgsi_parse.h"
36
37static void
38cs_prepare(const struct sp_compute_shader *cs,
39           struct tgsi_exec_machine *machine,
40           int local_x, int local_y, int local_z,
41           int g_w, int g_h, int g_d,
42           int b_w, int b_h, int b_d,
43           struct tgsi_sampler *sampler,
44           struct tgsi_image *image,
45           struct tgsi_buffer *buffer )
46{
47   int j;
48   /*
49    * Bind tokens/shader to the interpreter's machine state.
50    */
51   tgsi_exec_machine_bind_shader(machine,
52                                 cs->tokens,
53                                 sampler, image, buffer);
54
55   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
56      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
57      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
58         machine->SystemValue[i].xyzw[0].i[j] = local_x + j;
59         machine->SystemValue[i].xyzw[1].i[j] = local_y;
60         machine->SystemValue[i].xyzw[2].i[j] = local_z;
61      }
62   }
63
64   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
65      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
66      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
67         machine->SystemValue[i].xyzw[0].i[j] = g_w;
68         machine->SystemValue[i].xyzw[1].i[j] = g_h;
69         machine->SystemValue[i].xyzw[2].i[j] = g_d;
70      }
71   }
72
73   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
74      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
75      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
76         machine->SystemValue[i].xyzw[0].i[j] = b_w;
77         machine->SystemValue[i].xyzw[1].i[j] = b_h;
78         machine->SystemValue[i].xyzw[2].i[j] = b_d;
79      }
80   }
81}
82
83static bool
84cs_run(const struct sp_compute_shader *cs,
85       int g_w, int g_h, int g_d,
86       struct tgsi_exec_machine *machine, bool restart)
87{
88   if (!restart) {
89      if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
90         unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
91         int j;
92         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
93            machine->SystemValue[i].xyzw[0].i[j] = g_w;
94            machine->SystemValue[i].xyzw[1].i[j] = g_h;
95            machine->SystemValue[i].xyzw[2].i[j] = g_d;
96         }
97      }
98   }
99
100   tgsi_exec_machine_run(machine, restart ? machine->pc : 0);
101
102   if (machine->pc != -1)
103      return true;
104   return false;
105}
106
107static void
108run_workgroup(const struct sp_compute_shader *cs,
109              int g_w, int g_h, int g_d, int num_threads,
110              struct tgsi_exec_machine **machines)
111{
112   int i;
113   bool grp_hit_barrier, restart_threads = false;
114
115   do {
116      grp_hit_barrier = false;
117      for (i = 0; i < num_threads; i++) {
118         grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
119      }
120      restart_threads = false;
121      if (grp_hit_barrier) {
122         grp_hit_barrier = false;
123         restart_threads = true;
124      }
125   } while (restart_threads);
126}
127
128static void
129cs_delete(const struct sp_compute_shader *cs,
130          struct tgsi_exec_machine *machine)
131{
132   if (machine->Tokens == cs->tokens) {
133      tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
134   }
135}
136
137static void
138fill_grid_size(struct pipe_context *context,
139               const struct pipe_grid_info *info,
140               uint32_t grid_size[3])
141{
142   struct pipe_transfer *transfer;
143   uint32_t *params;
144   if (!info->indirect) {
145      grid_size[0] = info->grid[0];
146      grid_size[1] = info->grid[1];
147      grid_size[2] = info->grid[2];
148      return;
149   }
150   params = pipe_buffer_map_range(context, info->indirect,
151                                  info->indirect_offset,
152                                  3 * sizeof(uint32_t),
153                                  PIPE_MAP_READ,
154                                  &transfer);
155
156   if (!transfer)
157      return;
158
159   grid_size[0] = params[0];
160   grid_size[1] = params[1];
161   grid_size[2] = params[2];
162   pipe_buffer_unmap(context, transfer);
163}
164
165void
166softpipe_launch_grid(struct pipe_context *context,
167                     const struct pipe_grid_info *info)
168{
169   struct softpipe_context *softpipe = softpipe_context(context);
170   struct sp_compute_shader *cs = softpipe->cs;
171   int num_threads_in_group;
172   struct tgsi_exec_machine **machines;
173   int bwidth, bheight, bdepth;
174   int local_x, local_y, local_z, i;
175   int g_w, g_h, g_d;
176   uint32_t grid_size[3] = {0};
177   void *local_mem = NULL;
178
179   softpipe_update_compute_samplers(softpipe);
180   bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
181   bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
182   bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
183   num_threads_in_group = DIV_ROUND_UP(bwidth, TGSI_QUAD_SIZE) * bheight * bdepth;
184
185   fill_grid_size(context, info, grid_size);
186
187   if (cs->shader.req_local_mem) {
188      local_mem = CALLOC(1, cs->shader.req_local_mem);
189   }
190
191   machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
192   if (!machines) {
193      FREE(local_mem);
194      return;
195   }
196
197   /* initialise machines + GRID_SIZE + THREAD_ID  + BLOCK_SIZE */
198   int idx = 0;
199   for (local_z = 0; local_z < bdepth; local_z++) {
200      for (local_y = 0; local_y < bheight; local_y++) {
201         for (local_x = 0; local_x < bwidth; local_x += TGSI_QUAD_SIZE) {
202            machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);
203
204            machines[idx]->LocalMem = local_mem;
205            machines[idx]->LocalMemSize = cs->shader.req_local_mem;
206            machines[idx]->NonHelperMask = (1 << (MIN2(TGSI_QUAD_SIZE, bwidth - local_x))) - 1;
207            cs_prepare(cs, machines[idx],
208                       local_x, local_y, local_z,
209                       grid_size[0], grid_size[1], grid_size[2],
210                       bwidth, bheight, bdepth,
211                       (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
212                       (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
213                       (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
214            tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
215                                           softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
216                                           softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
217            idx++;
218         }
219      }
220   }
221
222   for (g_d = 0; g_d < grid_size[2]; g_d++) {
223      for (g_h = 0; g_h < grid_size[1]; g_h++) {
224         for (g_w = 0; g_w < grid_size[0]; g_w++) {
225            run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
226         }
227      }
228   }
229
230   if (softpipe->active_statistics_queries) {
231      softpipe->pipeline_statistics.cs_invocations +=
232          grid_size[0] * grid_size[1] * grid_size[2];
233   }
234
235   for (i = 0; i < num_threads_in_group; i++) {
236      cs_delete(cs, machines[i]);
237      tgsi_exec_machine_destroy(machines[i]);
238   }
239
240   FREE(local_mem);
241   FREE(machines);
242}
243