1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file crocus_program_cache.c 25 * 26 * The in-memory program cache. This is basically a hash table mapping 27 * API-specified shaders and a state key to a compiled variant. It also 28 * takes care of uploading shader assembly into a BO for use on the GPU. 29 */ 30 31#include <stdio.h> 32#include <errno.h> 33#include "pipe/p_defines.h" 34#include "pipe/p_state.h" 35#include "pipe/p_context.h" 36#include "pipe/p_screen.h" 37#include "util/u_atomic.h" 38#include "util/u_upload_mgr.h" 39#include "compiler/nir/nir.h" 40#include "compiler/nir/nir_builder.h" 41#include "intel/compiler/brw_compiler.h" 42#include "intel/compiler/brw_eu.h" 43#include "intel/compiler/brw_nir.h" 44#include "crocus_context.h" 45#include "crocus_resource.h" 46 47struct keybox { 48 uint16_t size; 49 enum crocus_program_cache_id cache_id; 50 uint8_t data[0]; 51}; 52 53static struct keybox * 54make_keybox(void *mem_ctx, enum crocus_program_cache_id cache_id, 55 const void *key, uint32_t key_size) 56{ 57 struct keybox *keybox = 58 ralloc_size(mem_ctx, sizeof(struct keybox) + key_size); 59 60 keybox->cache_id = cache_id; 61 keybox->size = key_size; 62 memcpy(keybox->data, key, key_size); 63 64 return keybox; 65} 66 67static uint32_t 68keybox_hash(const void *void_key) 69{ 70 const struct keybox *key = void_key; 71 return _mesa_hash_data(&key->cache_id, key->size + sizeof(key->cache_id)); 72} 73 74static bool 75keybox_equals(const void *void_a, const void *void_b) 76{ 77 const struct keybox *a = void_a, *b = void_b; 78 if (a->size != b->size) 79 return false; 80 81 return memcmp(a->data, b->data, a->size) == 0; 82} 83 84struct crocus_compiled_shader * 85crocus_find_cached_shader(struct crocus_context *ice, 86 enum crocus_program_cache_id cache_id, 87 uint32_t key_size, const void *key) 88{ 89 struct keybox *keybox = make_keybox(NULL, cache_id, key, key_size); 90 struct hash_entry *entry = 91 _mesa_hash_table_search(ice->shaders.cache, keybox); 92 93 ralloc_free(keybox); 94 95 return entry ? entry->data : NULL; 96} 97 98const void * 99crocus_find_previous_compile(const struct crocus_context *ice, 100 enum crocus_program_cache_id cache_id, 101 unsigned program_string_id) 102{ 103 hash_table_foreach(ice->shaders.cache, entry) { 104 const struct keybox *keybox = entry->key; 105 const struct brw_base_prog_key *key = (const void *)keybox->data; 106 if (keybox->cache_id == cache_id && 107 key->program_string_id == program_string_id) { 108 return keybox->data; 109 } 110 } 111 112 return NULL; 113} 114 115/** 116 * Look for an existing entry in the cache that has identical assembly code. 117 * 118 * This is useful for programs generating shaders at runtime, where multiple 119 * distinct shaders (from an API perspective) may compile to the same assembly 120 * in our backend. This saves space in the program cache buffer. 121 */ 122static const struct crocus_compiled_shader * 123find_existing_assembly(struct hash_table *cache, void *map, 124 const void *assembly, unsigned assembly_size) 125{ 126 hash_table_foreach (cache, entry) { 127 const struct crocus_compiled_shader *existing = entry->data; 128 129 if (existing->map_size != assembly_size) 130 continue; 131 132 if (memcmp(map + existing->offset, assembly, assembly_size) == 0) 133 return existing; 134 } 135 return NULL; 136} 137 138static void 139crocus_cache_new_bo(struct crocus_context *ice, 140 uint32_t new_size) 141{ 142 struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 143 struct crocus_bo *new_bo; 144 new_bo = crocus_bo_alloc(screen->bufmgr, "program cache", new_size); 145 146 void *map = crocus_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE | 147 MAP_ASYNC | MAP_PERSISTENT); 148 149 if (ice->shaders.cache_next_offset != 0) { 150 memcpy(map, ice->shaders.cache_bo_map, ice->shaders.cache_next_offset); 151 } 152 153 crocus_bo_unmap(ice->shaders.cache_bo); 154 crocus_bo_unreference(ice->shaders.cache_bo); 155 ice->shaders.cache_bo = new_bo; 156 ice->shaders.cache_bo_map = map; 157 158 if (screen->devinfo.ver <= 5) { 159 /* reemit all shaders on GEN4 only. */ 160 ice->state.dirty |= CROCUS_DIRTY_CLIP | CROCUS_DIRTY_RASTER | 161 CROCUS_DIRTY_WM; 162 ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS; 163 } 164 ice->batches[CROCUS_BATCH_RENDER].state_base_address_emitted = false; 165 ice->batches[CROCUS_BATCH_COMPUTE].state_base_address_emitted = false; 166 /* unset state base address */ 167} 168 169static uint32_t 170crocus_alloc_item_data(struct crocus_context *ice, uint32_t size) 171{ 172 if (ice->shaders.cache_next_offset + size > ice->shaders.cache_bo->size) { 173 uint32_t new_size = ice->shaders.cache_bo->size * 2; 174 while (ice->shaders.cache_next_offset + size > new_size) 175 new_size *= 2; 176 177 crocus_cache_new_bo(ice, new_size); 178 } 179 uint32_t offset = ice->shaders.cache_next_offset; 180 181 /* Programs are always 64-byte aligned, so set up the next one now */ 182 ice->shaders.cache_next_offset = ALIGN(offset + size, 64); 183 return offset; 184} 185 186struct crocus_compiled_shader * 187crocus_upload_shader(struct crocus_context *ice, 188 enum crocus_program_cache_id cache_id, uint32_t key_size, 189 const void *key, const void *assembly, uint32_t asm_size, 190 struct brw_stage_prog_data *prog_data, 191 uint32_t prog_data_size, uint32_t *streamout, 192 enum brw_param_builtin *system_values, 193 unsigned num_system_values, unsigned num_cbufs, 194 const struct crocus_binding_table *bt) 195{ 196 struct hash_table *cache = ice->shaders.cache; 197 struct crocus_compiled_shader *shader = 198 rzalloc_size(cache, sizeof(struct crocus_compiled_shader)); 199 const struct crocus_compiled_shader *existing = find_existing_assembly( 200 cache, ice->shaders.cache_bo_map, assembly, asm_size); 201 202 /* If we can find a matching prog in the cache already, then reuse the 203 * existing stuff without creating new copy into the underlying buffer 204 * object. This is notably useful for programs generating shaders at 205 * runtime, where multiple shaders may compile to the same thing in our 206 * backend. 207 */ 208 if (existing) { 209 shader->offset = existing->offset; 210 shader->map_size = existing->map_size; 211 } else { 212 shader->offset = crocus_alloc_item_data(ice, asm_size); 213 shader->map_size = asm_size; 214 215 memcpy(ice->shaders.cache_bo_map + shader->offset, assembly, asm_size); 216 } 217 218 shader->prog_data = prog_data; 219 shader->prog_data_size = prog_data_size; 220 shader->streamout = streamout; 221 shader->system_values = system_values; 222 shader->num_system_values = num_system_values; 223 shader->num_cbufs = num_cbufs; 224 shader->bt = *bt; 225 226 ralloc_steal(shader, shader->prog_data); 227 if (prog_data_size > 16) 228 ralloc_steal(shader->prog_data, prog_data->param); 229 ralloc_steal(shader, shader->streamout); 230 ralloc_steal(shader, shader->system_values); 231 232 struct keybox *keybox = make_keybox(shader, cache_id, key, key_size); 233 _mesa_hash_table_insert(ice->shaders.cache, keybox, shader); 234 235 return shader; 236} 237 238bool 239crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch, const void *key, 240 uint32_t key_size, uint32_t *kernel_out, 241 void *prog_data_out) 242{ 243 struct blorp_context *blorp = blorp_batch->blorp; 244 struct crocus_context *ice = blorp->driver_ctx; 245 struct crocus_compiled_shader *shader = 246 crocus_find_cached_shader(ice, CROCUS_CACHE_BLORP, key_size, key); 247 248 if (!shader) 249 return false; 250 251 *kernel_out = shader->offset; 252 *((void **)prog_data_out) = shader->prog_data; 253 254 return true; 255} 256 257bool 258crocus_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage, 259 const void *key, uint32_t key_size, 260 const void *kernel, uint32_t kernel_size, 261 const struct brw_stage_prog_data *prog_data_templ, 262 uint32_t prog_data_size, uint32_t *kernel_out, 263 void *prog_data_out) 264{ 265 struct blorp_context *blorp = blorp_batch->blorp; 266 struct crocus_context *ice = blorp->driver_ctx; 267 268 struct brw_stage_prog_data *prog_data = ralloc_size(NULL, prog_data_size); 269 memcpy(prog_data, prog_data_templ, prog_data_size); 270 271 struct crocus_binding_table bt; 272 memset(&bt, 0, sizeof(bt)); 273 274 struct crocus_compiled_shader *shader = crocus_upload_shader( 275 ice, CROCUS_CACHE_BLORP, key_size, key, kernel, kernel_size, prog_data, 276 prog_data_size, NULL, NULL, 0, 0, &bt); 277 278 *kernel_out = shader->offset; 279 *((void **)prog_data_out) = shader->prog_data; 280 281 return true; 282} 283 284void 285crocus_init_program_cache(struct crocus_context *ice) 286{ 287 struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 288 ice->shaders.cache = 289 _mesa_hash_table_create(ice, keybox_hash, keybox_equals); 290 291 ice->shaders.cache_bo = 292 crocus_bo_alloc(screen->bufmgr, "program_cache", 16384); 293 ice->shaders.cache_bo_map = 294 crocus_bo_map(NULL, ice->shaders.cache_bo, 295 MAP_READ | MAP_WRITE | MAP_ASYNC | MAP_PERSISTENT); 296} 297 298void 299crocus_destroy_program_cache(struct crocus_context *ice) 300{ 301 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 302 ice->shaders.prog[i] = NULL; 303 } 304 305 if (ice->shaders.cache_bo) { 306 crocus_bo_unmap(ice->shaders.cache_bo); 307 crocus_bo_unreference(ice->shaders.cache_bo); 308 ice->shaders.cache_bo_map = NULL; 309 ice->shaders.cache_bo = NULL; 310 } 311 312 ralloc_free(ice->shaders.cache); 313} 314 315static const char * 316cache_name(enum crocus_program_cache_id cache_id) 317{ 318 if (cache_id == CROCUS_CACHE_BLORP) 319 return "BLORP"; 320 321 if (cache_id == CROCUS_CACHE_SF) 322 return "SF"; 323 324 if (cache_id == CROCUS_CACHE_CLIP) 325 return "CLIP"; 326 327 if (cache_id == CROCUS_CACHE_FF_GS) 328 return "FF_GS"; 329 330 return _mesa_shader_stage_to_string(cache_id); 331} 332 333void 334crocus_print_program_cache(struct crocus_context *ice) 335{ 336 struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 337 const struct brw_isa_info *isa = &screen->compiler->isa; 338 339 hash_table_foreach(ice->shaders.cache, entry) { 340 const struct keybox *keybox = entry->key; 341 struct crocus_compiled_shader *shader = entry->data; 342 fprintf(stderr, "%s:\n", cache_name(keybox->cache_id)); 343 brw_disassemble(isa, ice->shaders.cache_bo_map + shader->offset, 0, 344 shader->prog_data->program_size, NULL, stderr); 345 } 346} 347