1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2016 Red Hat.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
8bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
9bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci#include "util/u_inlines.h"
24bf215546Sopenharmony_ci#include "util/u_math.h"
25bf215546Sopenharmony_ci#include "util/u_memory.h"
26bf215546Sopenharmony_ci#include "pipe/p_shader_tokens.h"
27bf215546Sopenharmony_ci#include "draw/draw_context.h"
28bf215546Sopenharmony_ci#include "draw/draw_vertex.h"
29bf215546Sopenharmony_ci#include "sp_context.h"
30bf215546Sopenharmony_ci#include "sp_screen.h"
31bf215546Sopenharmony_ci#include "sp_state.h"
32bf215546Sopenharmony_ci#include "sp_texture.h"
33bf215546Sopenharmony_ci#include "sp_tex_sample.h"
34bf215546Sopenharmony_ci#include "sp_tex_tile_cache.h"
35bf215546Sopenharmony_ci#include "tgsi/tgsi_parse.h"
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_cistatic void
38bf215546Sopenharmony_cics_prepare(const struct sp_compute_shader *cs,
39bf215546Sopenharmony_ci           struct tgsi_exec_machine *machine,
40bf215546Sopenharmony_ci           int local_x, int local_y, int local_z,
41bf215546Sopenharmony_ci           int g_w, int g_h, int g_d,
42bf215546Sopenharmony_ci           int b_w, int b_h, int b_d,
43bf215546Sopenharmony_ci           struct tgsi_sampler *sampler,
44bf215546Sopenharmony_ci           struct tgsi_image *image,
45bf215546Sopenharmony_ci           struct tgsi_buffer *buffer )
46bf215546Sopenharmony_ci{
47bf215546Sopenharmony_ci   int j;
48bf215546Sopenharmony_ci   /*
49bf215546Sopenharmony_ci    * Bind tokens/shader to the interpreter's machine state.
50bf215546Sopenharmony_ci    */
51bf215546Sopenharmony_ci   tgsi_exec_machine_bind_shader(machine,
52bf215546Sopenharmony_ci                                 cs->tokens,
53bf215546Sopenharmony_ci                                 sampler, image, buffer);
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
56bf215546Sopenharmony_ci      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
57bf215546Sopenharmony_ci      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
58bf215546Sopenharmony_ci         machine->SystemValue[i].xyzw[0].i[j] = local_x + j;
59bf215546Sopenharmony_ci         machine->SystemValue[i].xyzw[1].i[j] = local_y;
60bf215546Sopenharmony_ci         machine->SystemValue[i].xyzw[2].i[j] = local_z;
61bf215546Sopenharmony_ci      }
62bf215546Sopenharmony_ci   }
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
65bf215546Sopenharmony_ci      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
66bf215546Sopenharmony_ci      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
67bf215546Sopenharmony_ci         machine->SystemValue[i].xyzw[0].i[j] = g_w;
68bf215546Sopenharmony_ci         machine->SystemValue[i].xyzw[1].i[j] = g_h;
69bf215546Sopenharmony_ci         machine->SystemValue[i].xyzw[2].i[j] = g_d;
70bf215546Sopenharmony_ci      }
71bf215546Sopenharmony_ci   }
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
74bf215546Sopenharmony_ci      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
75bf215546Sopenharmony_ci      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
76bf215546Sopenharmony_ci         machine->SystemValue[i].xyzw[0].i[j] = b_w;
77bf215546Sopenharmony_ci         machine->SystemValue[i].xyzw[1].i[j] = b_h;
78bf215546Sopenharmony_ci         machine->SystemValue[i].xyzw[2].i[j] = b_d;
79bf215546Sopenharmony_ci      }
80bf215546Sopenharmony_ci   }
81bf215546Sopenharmony_ci}
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_cistatic bool
84bf215546Sopenharmony_cics_run(const struct sp_compute_shader *cs,
85bf215546Sopenharmony_ci       int g_w, int g_h, int g_d,
86bf215546Sopenharmony_ci       struct tgsi_exec_machine *machine, bool restart)
87bf215546Sopenharmony_ci{
88bf215546Sopenharmony_ci   if (!restart) {
89bf215546Sopenharmony_ci      if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
90bf215546Sopenharmony_ci         unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
91bf215546Sopenharmony_ci         int j;
92bf215546Sopenharmony_ci         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
93bf215546Sopenharmony_ci            machine->SystemValue[i].xyzw[0].i[j] = g_w;
94bf215546Sopenharmony_ci            machine->SystemValue[i].xyzw[1].i[j] = g_h;
95bf215546Sopenharmony_ci            machine->SystemValue[i].xyzw[2].i[j] = g_d;
96bf215546Sopenharmony_ci         }
97bf215546Sopenharmony_ci      }
98bf215546Sopenharmony_ci   }
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci   tgsi_exec_machine_run(machine, restart ? machine->pc : 0);
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci   if (machine->pc != -1)
103bf215546Sopenharmony_ci      return true;
104bf215546Sopenharmony_ci   return false;
105bf215546Sopenharmony_ci}
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_cistatic void
108bf215546Sopenharmony_cirun_workgroup(const struct sp_compute_shader *cs,
109bf215546Sopenharmony_ci              int g_w, int g_h, int g_d, int num_threads,
110bf215546Sopenharmony_ci              struct tgsi_exec_machine **machines)
111bf215546Sopenharmony_ci{
112bf215546Sopenharmony_ci   int i;
113bf215546Sopenharmony_ci   bool grp_hit_barrier, restart_threads = false;
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci   do {
116bf215546Sopenharmony_ci      grp_hit_barrier = false;
117bf215546Sopenharmony_ci      for (i = 0; i < num_threads; i++) {
118bf215546Sopenharmony_ci         grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
119bf215546Sopenharmony_ci      }
120bf215546Sopenharmony_ci      restart_threads = false;
121bf215546Sopenharmony_ci      if (grp_hit_barrier) {
122bf215546Sopenharmony_ci         grp_hit_barrier = false;
123bf215546Sopenharmony_ci         restart_threads = true;
124bf215546Sopenharmony_ci      }
125bf215546Sopenharmony_ci   } while (restart_threads);
126bf215546Sopenharmony_ci}
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_cistatic void
129bf215546Sopenharmony_cics_delete(const struct sp_compute_shader *cs,
130bf215546Sopenharmony_ci          struct tgsi_exec_machine *machine)
131bf215546Sopenharmony_ci{
132bf215546Sopenharmony_ci   if (machine->Tokens == cs->tokens) {
133bf215546Sopenharmony_ci      tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
134bf215546Sopenharmony_ci   }
135bf215546Sopenharmony_ci}
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_cistatic void
138bf215546Sopenharmony_cifill_grid_size(struct pipe_context *context,
139bf215546Sopenharmony_ci               const struct pipe_grid_info *info,
140bf215546Sopenharmony_ci               uint32_t grid_size[3])
141bf215546Sopenharmony_ci{
142bf215546Sopenharmony_ci   struct pipe_transfer *transfer;
143bf215546Sopenharmony_ci   uint32_t *params;
144bf215546Sopenharmony_ci   if (!info->indirect) {
145bf215546Sopenharmony_ci      grid_size[0] = info->grid[0];
146bf215546Sopenharmony_ci      grid_size[1] = info->grid[1];
147bf215546Sopenharmony_ci      grid_size[2] = info->grid[2];
148bf215546Sopenharmony_ci      return;
149bf215546Sopenharmony_ci   }
150bf215546Sopenharmony_ci   params = pipe_buffer_map_range(context, info->indirect,
151bf215546Sopenharmony_ci                                  info->indirect_offset,
152bf215546Sopenharmony_ci                                  3 * sizeof(uint32_t),
153bf215546Sopenharmony_ci                                  PIPE_MAP_READ,
154bf215546Sopenharmony_ci                                  &transfer);
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci   if (!transfer)
157bf215546Sopenharmony_ci      return;
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci   grid_size[0] = params[0];
160bf215546Sopenharmony_ci   grid_size[1] = params[1];
161bf215546Sopenharmony_ci   grid_size[2] = params[2];
162bf215546Sopenharmony_ci   pipe_buffer_unmap(context, transfer);
163bf215546Sopenharmony_ci}
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_civoid
166bf215546Sopenharmony_cisoftpipe_launch_grid(struct pipe_context *context,
167bf215546Sopenharmony_ci                     const struct pipe_grid_info *info)
168bf215546Sopenharmony_ci{
169bf215546Sopenharmony_ci   struct softpipe_context *softpipe = softpipe_context(context);
170bf215546Sopenharmony_ci   struct sp_compute_shader *cs = softpipe->cs;
171bf215546Sopenharmony_ci   int num_threads_in_group;
172bf215546Sopenharmony_ci   struct tgsi_exec_machine **machines;
173bf215546Sopenharmony_ci   int bwidth, bheight, bdepth;
174bf215546Sopenharmony_ci   int local_x, local_y, local_z, i;
175bf215546Sopenharmony_ci   int g_w, g_h, g_d;
176bf215546Sopenharmony_ci   uint32_t grid_size[3] = {0};
177bf215546Sopenharmony_ci   void *local_mem = NULL;
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci   softpipe_update_compute_samplers(softpipe);
180bf215546Sopenharmony_ci   bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
181bf215546Sopenharmony_ci   bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
182bf215546Sopenharmony_ci   bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
183bf215546Sopenharmony_ci   num_threads_in_group = DIV_ROUND_UP(bwidth, TGSI_QUAD_SIZE) * bheight * bdepth;
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci   fill_grid_size(context, info, grid_size);
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci   if (cs->shader.req_local_mem) {
188bf215546Sopenharmony_ci      local_mem = CALLOC(1, cs->shader.req_local_mem);
189bf215546Sopenharmony_ci   }
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci   machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
192bf215546Sopenharmony_ci   if (!machines) {
193bf215546Sopenharmony_ci      FREE(local_mem);
194bf215546Sopenharmony_ci      return;
195bf215546Sopenharmony_ci   }
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   /* initialise machines + GRID_SIZE + THREAD_ID  + BLOCK_SIZE */
198bf215546Sopenharmony_ci   int idx = 0;
199bf215546Sopenharmony_ci   for (local_z = 0; local_z < bdepth; local_z++) {
200bf215546Sopenharmony_ci      for (local_y = 0; local_y < bheight; local_y++) {
201bf215546Sopenharmony_ci         for (local_x = 0; local_x < bwidth; local_x += TGSI_QUAD_SIZE) {
202bf215546Sopenharmony_ci            machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci            machines[idx]->LocalMem = local_mem;
205bf215546Sopenharmony_ci            machines[idx]->LocalMemSize = cs->shader.req_local_mem;
206bf215546Sopenharmony_ci            machines[idx]->NonHelperMask = (1 << (MIN2(TGSI_QUAD_SIZE, bwidth - local_x))) - 1;
207bf215546Sopenharmony_ci            cs_prepare(cs, machines[idx],
208bf215546Sopenharmony_ci                       local_x, local_y, local_z,
209bf215546Sopenharmony_ci                       grid_size[0], grid_size[1], grid_size[2],
210bf215546Sopenharmony_ci                       bwidth, bheight, bdepth,
211bf215546Sopenharmony_ci                       (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
212bf215546Sopenharmony_ci                       (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
213bf215546Sopenharmony_ci                       (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
214bf215546Sopenharmony_ci            tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
215bf215546Sopenharmony_ci                                           softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
216bf215546Sopenharmony_ci                                           softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
217bf215546Sopenharmony_ci            idx++;
218bf215546Sopenharmony_ci         }
219bf215546Sopenharmony_ci      }
220bf215546Sopenharmony_ci   }
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci   for (g_d = 0; g_d < grid_size[2]; g_d++) {
223bf215546Sopenharmony_ci      for (g_h = 0; g_h < grid_size[1]; g_h++) {
224bf215546Sopenharmony_ci         for (g_w = 0; g_w < grid_size[0]; g_w++) {
225bf215546Sopenharmony_ci            run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
226bf215546Sopenharmony_ci         }
227bf215546Sopenharmony_ci      }
228bf215546Sopenharmony_ci   }
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci   if (softpipe->active_statistics_queries) {
231bf215546Sopenharmony_ci      softpipe->pipeline_statistics.cs_invocations +=
232bf215546Sopenharmony_ci          grid_size[0] * grid_size[1] * grid_size[2];
233bf215546Sopenharmony_ci   }
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci   for (i = 0; i < num_threads_in_group; i++) {
236bf215546Sopenharmony_ci      cs_delete(cs, machines[i]);
237bf215546Sopenharmony_ci      tgsi_exec_machine_destroy(machines[i]);
238bf215546Sopenharmony_ci   }
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci   FREE(local_mem);
241bf215546Sopenharmony_ci   FREE(machines);
242bf215546Sopenharmony_ci}
243