1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "brw_private.h"
25bf215546Sopenharmony_ci#include "compiler/shader_info.h"
26bf215546Sopenharmony_ci#include "intel/dev/intel_debug.h"
27bf215546Sopenharmony_ci#include "intel/dev/intel_device_info.h"
28bf215546Sopenharmony_ci#include "util/ralloc.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ciunsigned
31bf215546Sopenharmony_cibrw_required_dispatch_width(const struct shader_info *info)
32bf215546Sopenharmony_ci{
33bf215546Sopenharmony_ci   if ((int)info->subgroup_size >= (int)SUBGROUP_SIZE_REQUIRE_8) {
34bf215546Sopenharmony_ci      assert(gl_shader_stage_uses_workgroup(info->stage));
35bf215546Sopenharmony_ci      /* These enum values are expressly chosen to be equal to the subgroup
36bf215546Sopenharmony_ci       * size that they require.
37bf215546Sopenharmony_ci       */
38bf215546Sopenharmony_ci      return (unsigned)info->subgroup_size;
39bf215546Sopenharmony_ci   } else {
40bf215546Sopenharmony_ci      return 0;
41bf215546Sopenharmony_ci   }
42bf215546Sopenharmony_ci}
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_cistatic inline bool
45bf215546Sopenharmony_citest_bit(unsigned mask, unsigned bit) {
46bf215546Sopenharmony_ci   return mask & (1u << bit);
47bf215546Sopenharmony_ci}
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_cibool
50bf215546Sopenharmony_cibrw_simd_should_compile(void *mem_ctx,
51bf215546Sopenharmony_ci                        unsigned simd,
52bf215546Sopenharmony_ci                        const struct intel_device_info *devinfo,
53bf215546Sopenharmony_ci                        struct brw_cs_prog_data *prog_data,
54bf215546Sopenharmony_ci                        unsigned required,
55bf215546Sopenharmony_ci                        const char **error)
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci{
58bf215546Sopenharmony_ci   assert(!test_bit(prog_data->prog_mask, simd));
59bf215546Sopenharmony_ci   assert(error);
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci   const unsigned width = 8u << simd;
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci   /* For shaders with variable size workgroup, we will always compile all the
64bf215546Sopenharmony_ci    * variants, since the choice will happen only at dispatch time.
65bf215546Sopenharmony_ci    */
66bf215546Sopenharmony_ci   const bool workgroup_size_variable = prog_data->local_size[0] == 0;
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci   if (!workgroup_size_variable) {
69bf215546Sopenharmony_ci      if (test_bit(prog_data->prog_spilled, simd)) {
70bf215546Sopenharmony_ci         *error = ralloc_asprintf(
71bf215546Sopenharmony_ci            mem_ctx, "SIMD%u skipped because would spill", width);
72bf215546Sopenharmony_ci         return false;
73bf215546Sopenharmony_ci      }
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci      const unsigned workgroup_size = prog_data->local_size[0] *
76bf215546Sopenharmony_ci                                      prog_data->local_size[1] *
77bf215546Sopenharmony_ci                                      prog_data->local_size[2];
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci      unsigned max_threads = devinfo->max_cs_workgroup_threads;
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci      if (required && required != width) {
82bf215546Sopenharmony_ci         *error = ralloc_asprintf(
83bf215546Sopenharmony_ci            mem_ctx, "SIMD%u skipped because required dispatch width is %u",
84bf215546Sopenharmony_ci            width, required);
85bf215546Sopenharmony_ci         return false;
86bf215546Sopenharmony_ci      }
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci      if (simd > 0 && test_bit(prog_data->prog_mask, simd - 1) &&
89bf215546Sopenharmony_ci          workgroup_size <= (width / 2)) {
90bf215546Sopenharmony_ci         *error = ralloc_asprintf(
91bf215546Sopenharmony_ci            mem_ctx, "SIMD%u skipped because workgroup size %u already fits in SIMD%u",
92bf215546Sopenharmony_ci            width, workgroup_size, width / 2);
93bf215546Sopenharmony_ci         return false;
94bf215546Sopenharmony_ci      }
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci      if (DIV_ROUND_UP(workgroup_size, width) > max_threads) {
97bf215546Sopenharmony_ci         *error = ralloc_asprintf(
98bf215546Sopenharmony_ci            mem_ctx, "SIMD%u can't fit all %u invocations in %u threads",
99bf215546Sopenharmony_ci            width, workgroup_size, max_threads);
100bf215546Sopenharmony_ci         return false;
101bf215546Sopenharmony_ci      }
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci      /* The SIMD32 is only enabled for cases it is needed unless forced.
104bf215546Sopenharmony_ci       *
105bf215546Sopenharmony_ci       * TODO: Use performance_analysis and drop this rule.
106bf215546Sopenharmony_ci       */
107bf215546Sopenharmony_ci      if (width == 32) {
108bf215546Sopenharmony_ci         if (!INTEL_DEBUG(DEBUG_DO32) && prog_data->prog_mask) {
109bf215546Sopenharmony_ci            *error = ralloc_strdup(
110bf215546Sopenharmony_ci               mem_ctx, "SIMD32 skipped because not required");
111bf215546Sopenharmony_ci            return false;
112bf215546Sopenharmony_ci         }
113bf215546Sopenharmony_ci      }
114bf215546Sopenharmony_ci   }
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci   const bool env_skip[3] = {
117bf215546Sopenharmony_ci      INTEL_DEBUG(DEBUG_NO8),
118bf215546Sopenharmony_ci      INTEL_DEBUG(DEBUG_NO16),
119bf215546Sopenharmony_ci      INTEL_DEBUG(DEBUG_NO32),
120bf215546Sopenharmony_ci   };
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci   if (unlikely(env_skip[simd])) {
123bf215546Sopenharmony_ci      *error = ralloc_asprintf(
124bf215546Sopenharmony_ci         mem_ctx, "SIMD%u skipped because INTEL_DEBUG=no%u",
125bf215546Sopenharmony_ci         width, width);
126bf215546Sopenharmony_ci      return false;
127bf215546Sopenharmony_ci   }
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci   return true;
130bf215546Sopenharmony_ci}
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_civoid
133bf215546Sopenharmony_cibrw_simd_mark_compiled(unsigned simd, struct brw_cs_prog_data *prog_data, bool spilled)
134bf215546Sopenharmony_ci{
135bf215546Sopenharmony_ci   assert(!test_bit(prog_data->prog_mask, simd));
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci   prog_data->prog_mask |= 1u << simd;
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci   /* If a SIMD spilled, all the larger ones would spill too. */
140bf215546Sopenharmony_ci   if (spilled) {
141bf215546Sopenharmony_ci      for (unsigned i = simd; i < 3; i++)
142bf215546Sopenharmony_ci         prog_data->prog_spilled |= 1u << i;
143bf215546Sopenharmony_ci   }
144bf215546Sopenharmony_ci}
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ciint
147bf215546Sopenharmony_cibrw_simd_select(const struct brw_cs_prog_data *prog_data)
148bf215546Sopenharmony_ci{
149bf215546Sopenharmony_ci   assert((prog_data->prog_mask & ~0x7u) == 0);
150bf215546Sopenharmony_ci   const unsigned not_spilled_mask =
151bf215546Sopenharmony_ci      prog_data->prog_mask & ~prog_data->prog_spilled;
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci   /* Util functions index bits from 1 instead of 0, adjust before return. */
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci   if (not_spilled_mask)
156bf215546Sopenharmony_ci      return util_last_bit(not_spilled_mask) - 1;
157bf215546Sopenharmony_ci   else if (prog_data->prog_mask)
158bf215546Sopenharmony_ci      return ffs(prog_data->prog_mask) - 1;
159bf215546Sopenharmony_ci   else
160bf215546Sopenharmony_ci      return -1;
161bf215546Sopenharmony_ci}
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ciint
164bf215546Sopenharmony_cibrw_simd_select_for_workgroup_size(const struct intel_device_info *devinfo,
165bf215546Sopenharmony_ci                                   const struct brw_cs_prog_data *prog_data,
166bf215546Sopenharmony_ci                                   const unsigned *sizes)
167bf215546Sopenharmony_ci{
168bf215546Sopenharmony_ci   assert(sizes);
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci   if (prog_data->local_size[0] == sizes[0] &&
171bf215546Sopenharmony_ci       prog_data->local_size[1] == sizes[1] &&
172bf215546Sopenharmony_ci       prog_data->local_size[2] == sizes[2])
173bf215546Sopenharmony_ci      return brw_simd_select(prog_data);
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci   void *mem_ctx = ralloc_context(NULL);
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ci   struct brw_cs_prog_data cloned = *prog_data;
178bf215546Sopenharmony_ci   for (unsigned i = 0; i < 3; i++)
179bf215546Sopenharmony_ci      cloned.local_size[i] = sizes[i];
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   cloned.prog_mask = 0;
182bf215546Sopenharmony_ci   cloned.prog_spilled = 0;
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci   const char *error[3] = {0};
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   for (unsigned simd = 0; simd < 3; simd++) {
187bf215546Sopenharmony_ci      /* We are not recompiling, so use original results of prog_mask and
188bf215546Sopenharmony_ci       * prog_spilled as they will already contain all possible compilations.
189bf215546Sopenharmony_ci       */
190bf215546Sopenharmony_ci      if (brw_simd_should_compile(mem_ctx, simd, devinfo, &cloned,
191bf215546Sopenharmony_ci                                  0 /* required_dispatch_width */, &error[simd]) &&
192bf215546Sopenharmony_ci          test_bit(prog_data->prog_mask, simd)) {
193bf215546Sopenharmony_ci         brw_simd_mark_compiled(simd, &cloned, test_bit(prog_data->prog_spilled, simd));
194bf215546Sopenharmony_ci      }
195bf215546Sopenharmony_ci   }
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   ralloc_free(mem_ctx);
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   return brw_simd_select(&cloned);
200bf215546Sopenharmony_ci}
201