1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23/**
24 * @file crocus_program_cache.c
25 *
26 * The in-memory program cache.  This is basically a hash table mapping
27 * API-specified shaders and a state key to a compiled variant.  It also
28 * takes care of uploading shader assembly into a BO for use on the GPU.
29 */
30
31#include <stdio.h>
32#include <errno.h>
33#include "pipe/p_defines.h"
34#include "pipe/p_state.h"
35#include "pipe/p_context.h"
36#include "pipe/p_screen.h"
37#include "util/u_atomic.h"
38#include "util/u_upload_mgr.h"
39#include "compiler/nir/nir.h"
40#include "compiler/nir/nir_builder.h"
41#include "intel/compiler/brw_compiler.h"
42#include "intel/compiler/brw_eu.h"
43#include "intel/compiler/brw_nir.h"
44#include "crocus_context.h"
45#include "crocus_resource.h"
46
47struct keybox {
48   uint16_t size;
49   enum crocus_program_cache_id cache_id;
50   uint8_t data[0];
51};
52
53static struct keybox *
54make_keybox(void *mem_ctx, enum crocus_program_cache_id cache_id,
55            const void *key, uint32_t key_size)
56{
57   struct keybox *keybox =
58      ralloc_size(mem_ctx, sizeof(struct keybox) + key_size);
59
60   keybox->cache_id = cache_id;
61   keybox->size = key_size;
62   memcpy(keybox->data, key, key_size);
63
64   return keybox;
65}
66
67static uint32_t
68keybox_hash(const void *void_key)
69{
70   const struct keybox *key = void_key;
71   return _mesa_hash_data(&key->cache_id, key->size + sizeof(key->cache_id));
72}
73
74static bool
75keybox_equals(const void *void_a, const void *void_b)
76{
77   const struct keybox *a = void_a, *b = void_b;
78   if (a->size != b->size)
79      return false;
80
81   return memcmp(a->data, b->data, a->size) == 0;
82}
83
84struct crocus_compiled_shader *
85crocus_find_cached_shader(struct crocus_context *ice,
86                          enum crocus_program_cache_id cache_id,
87                          uint32_t key_size, const void *key)
88{
89   struct keybox *keybox = make_keybox(NULL, cache_id, key, key_size);
90   struct hash_entry *entry =
91      _mesa_hash_table_search(ice->shaders.cache, keybox);
92
93   ralloc_free(keybox);
94
95   return entry ? entry->data : NULL;
96}
97
98const void *
99crocus_find_previous_compile(const struct crocus_context *ice,
100                             enum crocus_program_cache_id cache_id,
101                             unsigned program_string_id)
102{
103   hash_table_foreach(ice->shaders.cache, entry) {
104      const struct keybox *keybox = entry->key;
105      const struct brw_base_prog_key *key = (const void *)keybox->data;
106      if (keybox->cache_id == cache_id &&
107          key->program_string_id == program_string_id) {
108         return keybox->data;
109      }
110   }
111
112   return NULL;
113}
114
115/**
116 * Look for an existing entry in the cache that has identical assembly code.
117 *
118 * This is useful for programs generating shaders at runtime, where multiple
119 * distinct shaders (from an API perspective) may compile to the same assembly
120 * in our backend.  This saves space in the program cache buffer.
121 */
122static const struct crocus_compiled_shader *
123find_existing_assembly(struct hash_table *cache, void *map,
124                       const void *assembly, unsigned assembly_size)
125{
126   hash_table_foreach (cache, entry) {
127      const struct crocus_compiled_shader *existing = entry->data;
128
129      if (existing->map_size != assembly_size)
130         continue;
131
132      if (memcmp(map + existing->offset, assembly, assembly_size) == 0)
133         return existing;
134   }
135   return NULL;
136}
137
138static void
139crocus_cache_new_bo(struct crocus_context *ice,
140                    uint32_t new_size)
141{
142   struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
143   struct crocus_bo *new_bo;
144   new_bo = crocus_bo_alloc(screen->bufmgr, "program cache", new_size);
145
146   void *map = crocus_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE |
147                             MAP_ASYNC | MAP_PERSISTENT);
148
149   if (ice->shaders.cache_next_offset != 0) {
150      memcpy(map, ice->shaders.cache_bo_map, ice->shaders.cache_next_offset);
151   }
152
153   crocus_bo_unmap(ice->shaders.cache_bo);
154   crocus_bo_unreference(ice->shaders.cache_bo);
155   ice->shaders.cache_bo = new_bo;
156   ice->shaders.cache_bo_map = map;
157
158   if (screen->devinfo.ver <= 5) {
159      /* reemit all shaders on GEN4 only. */
160      ice->state.dirty |= CROCUS_DIRTY_CLIP | CROCUS_DIRTY_RASTER |
161         CROCUS_DIRTY_WM;
162      ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS;
163   }
164   ice->batches[CROCUS_BATCH_RENDER].state_base_address_emitted = false;
165   ice->batches[CROCUS_BATCH_COMPUTE].state_base_address_emitted = false;
166   /* unset state base address */
167}
168
169static uint32_t
170crocus_alloc_item_data(struct crocus_context *ice, uint32_t size)
171{
172   if (ice->shaders.cache_next_offset + size > ice->shaders.cache_bo->size) {
173      uint32_t new_size = ice->shaders.cache_bo->size * 2;
174      while (ice->shaders.cache_next_offset + size > new_size)
175         new_size *= 2;
176
177      crocus_cache_new_bo(ice, new_size);
178   }
179   uint32_t offset = ice->shaders.cache_next_offset;
180
181   /* Programs are always 64-byte aligned, so set up the next one now */
182   ice->shaders.cache_next_offset = ALIGN(offset + size, 64);
183   return offset;
184}
185
186struct crocus_compiled_shader *
187crocus_upload_shader(struct crocus_context *ice,
188                     enum crocus_program_cache_id cache_id, uint32_t key_size,
189                     const void *key, const void *assembly, uint32_t asm_size,
190                     struct brw_stage_prog_data *prog_data,
191                     uint32_t prog_data_size, uint32_t *streamout,
192                     enum brw_param_builtin *system_values,
193                     unsigned num_system_values, unsigned num_cbufs,
194                     const struct crocus_binding_table *bt)
195{
196   struct hash_table *cache = ice->shaders.cache;
197   struct crocus_compiled_shader *shader =
198      rzalloc_size(cache, sizeof(struct crocus_compiled_shader));
199   const struct crocus_compiled_shader *existing = find_existing_assembly(
200      cache, ice->shaders.cache_bo_map, assembly, asm_size);
201
202   /* If we can find a matching prog in the cache already, then reuse the
203    * existing stuff without creating new copy into the underlying buffer
204    * object.  This is notably useful for programs generating shaders at
205    * runtime, where multiple shaders may compile to the same thing in our
206    * backend.
207    */
208   if (existing) {
209      shader->offset = existing->offset;
210      shader->map_size = existing->map_size;
211   } else {
212      shader->offset = crocus_alloc_item_data(ice, asm_size);
213      shader->map_size = asm_size;
214
215      memcpy(ice->shaders.cache_bo_map + shader->offset, assembly, asm_size);
216   }
217
218   shader->prog_data = prog_data;
219   shader->prog_data_size = prog_data_size;
220   shader->streamout = streamout;
221   shader->system_values = system_values;
222   shader->num_system_values = num_system_values;
223   shader->num_cbufs = num_cbufs;
224   shader->bt = *bt;
225
226   ralloc_steal(shader, shader->prog_data);
227   if (prog_data_size > 16)
228      ralloc_steal(shader->prog_data, prog_data->param);
229   ralloc_steal(shader, shader->streamout);
230   ralloc_steal(shader, shader->system_values);
231
232   struct keybox *keybox = make_keybox(shader, cache_id, key, key_size);
233   _mesa_hash_table_insert(ice->shaders.cache, keybox, shader);
234
235   return shader;
236}
237
238bool
239crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch, const void *key,
240                           uint32_t key_size, uint32_t *kernel_out,
241                           void *prog_data_out)
242{
243   struct blorp_context *blorp = blorp_batch->blorp;
244   struct crocus_context *ice = blorp->driver_ctx;
245   struct crocus_compiled_shader *shader =
246      crocus_find_cached_shader(ice, CROCUS_CACHE_BLORP, key_size, key);
247
248   if (!shader)
249      return false;
250
251   *kernel_out = shader->offset;
252   *((void **)prog_data_out) = shader->prog_data;
253
254   return true;
255}
256
257bool
258crocus_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
259                           const void *key, uint32_t key_size,
260                           const void *kernel, uint32_t kernel_size,
261                           const struct brw_stage_prog_data *prog_data_templ,
262                           uint32_t prog_data_size, uint32_t *kernel_out,
263                           void *prog_data_out)
264{
265   struct blorp_context *blorp = blorp_batch->blorp;
266   struct crocus_context *ice = blorp->driver_ctx;
267
268   struct brw_stage_prog_data *prog_data = ralloc_size(NULL, prog_data_size);
269   memcpy(prog_data, prog_data_templ, prog_data_size);
270
271   struct crocus_binding_table bt;
272   memset(&bt, 0, sizeof(bt));
273
274   struct crocus_compiled_shader *shader = crocus_upload_shader(
275      ice, CROCUS_CACHE_BLORP, key_size, key, kernel, kernel_size, prog_data,
276      prog_data_size, NULL, NULL, 0, 0, &bt);
277
278   *kernel_out = shader->offset;
279   *((void **)prog_data_out) = shader->prog_data;
280
281   return true;
282}
283
284void
285crocus_init_program_cache(struct crocus_context *ice)
286{
287   struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
288   ice->shaders.cache =
289      _mesa_hash_table_create(ice, keybox_hash, keybox_equals);
290
291   ice->shaders.cache_bo =
292      crocus_bo_alloc(screen->bufmgr, "program_cache", 16384);
293   ice->shaders.cache_bo_map =
294      crocus_bo_map(NULL, ice->shaders.cache_bo,
295                    MAP_READ | MAP_WRITE | MAP_ASYNC | MAP_PERSISTENT);
296}
297
298void
299crocus_destroy_program_cache(struct crocus_context *ice)
300{
301   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
302      ice->shaders.prog[i] = NULL;
303   }
304
305   if (ice->shaders.cache_bo) {
306      crocus_bo_unmap(ice->shaders.cache_bo);
307      crocus_bo_unreference(ice->shaders.cache_bo);
308      ice->shaders.cache_bo_map = NULL;
309      ice->shaders.cache_bo = NULL;
310   }
311
312   ralloc_free(ice->shaders.cache);
313}
314
315static const char *
316cache_name(enum crocus_program_cache_id cache_id)
317{
318   if (cache_id == CROCUS_CACHE_BLORP)
319      return "BLORP";
320
321   if (cache_id == CROCUS_CACHE_SF)
322      return "SF";
323
324   if (cache_id == CROCUS_CACHE_CLIP)
325      return "CLIP";
326
327   if (cache_id == CROCUS_CACHE_FF_GS)
328      return "FF_GS";
329
330   return _mesa_shader_stage_to_string(cache_id);
331}
332
333void
334crocus_print_program_cache(struct crocus_context *ice)
335{
336   struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
337   const struct brw_isa_info *isa = &screen->compiler->isa;
338
339   hash_table_foreach(ice->shaders.cache, entry) {
340      const struct keybox *keybox = entry->key;
341      struct crocus_compiled_shader *shader = entry->data;
342      fprintf(stderr, "%s:\n", cache_name(keybox->cache_id));
343      brw_disassemble(isa, ice->shaders.cache_bo_map + shader->offset, 0,
344                      shader->prog_data->program_size, NULL, stderr);
345   }
346}
347