1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2017 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included 12bf215546Sopenharmony_ci * in all copies or substantial portions of the Software. 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 21bf215546Sopenharmony_ci */ 22bf215546Sopenharmony_ci 23bf215546Sopenharmony_ci/** 24bf215546Sopenharmony_ci * @file crocus_program_cache.c 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci * The in-memory program cache. This is basically a hash table mapping 27bf215546Sopenharmony_ci * API-specified shaders and a state key to a compiled variant. It also 28bf215546Sopenharmony_ci * takes care of uploading shader assembly into a BO for use on the GPU. 29bf215546Sopenharmony_ci */ 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include <stdio.h> 32bf215546Sopenharmony_ci#include <errno.h> 33bf215546Sopenharmony_ci#include "pipe/p_defines.h" 34bf215546Sopenharmony_ci#include "pipe/p_state.h" 35bf215546Sopenharmony_ci#include "pipe/p_context.h" 36bf215546Sopenharmony_ci#include "pipe/p_screen.h" 37bf215546Sopenharmony_ci#include "util/u_atomic.h" 38bf215546Sopenharmony_ci#include "util/u_upload_mgr.h" 39bf215546Sopenharmony_ci#include "compiler/nir/nir.h" 40bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h" 41bf215546Sopenharmony_ci#include "intel/compiler/brw_compiler.h" 42bf215546Sopenharmony_ci#include "intel/compiler/brw_eu.h" 43bf215546Sopenharmony_ci#include "intel/compiler/brw_nir.h" 44bf215546Sopenharmony_ci#include "crocus_context.h" 45bf215546Sopenharmony_ci#include "crocus_resource.h" 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_cistruct keybox { 48bf215546Sopenharmony_ci uint16_t size; 49bf215546Sopenharmony_ci enum crocus_program_cache_id cache_id; 50bf215546Sopenharmony_ci uint8_t data[0]; 51bf215546Sopenharmony_ci}; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_cistatic struct keybox * 54bf215546Sopenharmony_cimake_keybox(void *mem_ctx, enum crocus_program_cache_id cache_id, 55bf215546Sopenharmony_ci const void *key, uint32_t key_size) 56bf215546Sopenharmony_ci{ 57bf215546Sopenharmony_ci struct keybox *keybox = 58bf215546Sopenharmony_ci ralloc_size(mem_ctx, sizeof(struct keybox) + key_size); 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci keybox->cache_id = cache_id; 61bf215546Sopenharmony_ci keybox->size = key_size; 62bf215546Sopenharmony_ci memcpy(keybox->data, key, key_size); 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci return keybox; 65bf215546Sopenharmony_ci} 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_cistatic uint32_t 68bf215546Sopenharmony_cikeybox_hash(const void *void_key) 69bf215546Sopenharmony_ci{ 70bf215546Sopenharmony_ci const struct keybox *key = void_key; 71bf215546Sopenharmony_ci return _mesa_hash_data(&key->cache_id, key->size + sizeof(key->cache_id)); 72bf215546Sopenharmony_ci} 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_cistatic bool 75bf215546Sopenharmony_cikeybox_equals(const void *void_a, const void *void_b) 76bf215546Sopenharmony_ci{ 77bf215546Sopenharmony_ci const struct keybox *a = void_a, *b = void_b; 78bf215546Sopenharmony_ci if (a->size != b->size) 79bf215546Sopenharmony_ci return false; 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci return memcmp(a->data, b->data, a->size) == 0; 82bf215546Sopenharmony_ci} 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_cistruct crocus_compiled_shader * 85bf215546Sopenharmony_cicrocus_find_cached_shader(struct crocus_context *ice, 86bf215546Sopenharmony_ci enum crocus_program_cache_id cache_id, 87bf215546Sopenharmony_ci uint32_t key_size, const void *key) 88bf215546Sopenharmony_ci{ 89bf215546Sopenharmony_ci struct keybox *keybox = make_keybox(NULL, cache_id, key, key_size); 90bf215546Sopenharmony_ci struct hash_entry *entry = 91bf215546Sopenharmony_ci _mesa_hash_table_search(ice->shaders.cache, keybox); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci ralloc_free(keybox); 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci return entry ? entry->data : NULL; 96bf215546Sopenharmony_ci} 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ciconst void * 99bf215546Sopenharmony_cicrocus_find_previous_compile(const struct crocus_context *ice, 100bf215546Sopenharmony_ci enum crocus_program_cache_id cache_id, 101bf215546Sopenharmony_ci unsigned program_string_id) 102bf215546Sopenharmony_ci{ 103bf215546Sopenharmony_ci hash_table_foreach(ice->shaders.cache, entry) { 104bf215546Sopenharmony_ci const struct keybox *keybox = entry->key; 105bf215546Sopenharmony_ci const struct brw_base_prog_key *key = (const void *)keybox->data; 106bf215546Sopenharmony_ci if (keybox->cache_id == cache_id && 107bf215546Sopenharmony_ci key->program_string_id == program_string_id) { 108bf215546Sopenharmony_ci return keybox->data; 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci } 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci return NULL; 113bf215546Sopenharmony_ci} 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci/** 116bf215546Sopenharmony_ci * Look for an existing entry in the cache that has identical assembly code. 117bf215546Sopenharmony_ci * 118bf215546Sopenharmony_ci * This is useful for programs generating shaders at runtime, where multiple 119bf215546Sopenharmony_ci * distinct shaders (from an API perspective) may compile to the same assembly 120bf215546Sopenharmony_ci * in our backend. This saves space in the program cache buffer. 121bf215546Sopenharmony_ci */ 122bf215546Sopenharmony_cistatic const struct crocus_compiled_shader * 123bf215546Sopenharmony_cifind_existing_assembly(struct hash_table *cache, void *map, 124bf215546Sopenharmony_ci const void *assembly, unsigned assembly_size) 125bf215546Sopenharmony_ci{ 126bf215546Sopenharmony_ci hash_table_foreach (cache, entry) { 127bf215546Sopenharmony_ci const struct crocus_compiled_shader *existing = entry->data; 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci if (existing->map_size != assembly_size) 130bf215546Sopenharmony_ci continue; 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci if (memcmp(map + existing->offset, assembly, assembly_size) == 0) 133bf215546Sopenharmony_ci return existing; 134bf215546Sopenharmony_ci } 135bf215546Sopenharmony_ci return NULL; 136bf215546Sopenharmony_ci} 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_cistatic void 139bf215546Sopenharmony_cicrocus_cache_new_bo(struct crocus_context *ice, 140bf215546Sopenharmony_ci uint32_t new_size) 141bf215546Sopenharmony_ci{ 142bf215546Sopenharmony_ci struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 143bf215546Sopenharmony_ci struct crocus_bo *new_bo; 144bf215546Sopenharmony_ci new_bo = crocus_bo_alloc(screen->bufmgr, "program cache", new_size); 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci void *map = crocus_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE | 147bf215546Sopenharmony_ci MAP_ASYNC | MAP_PERSISTENT); 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci if (ice->shaders.cache_next_offset != 0) { 150bf215546Sopenharmony_ci memcpy(map, ice->shaders.cache_bo_map, ice->shaders.cache_next_offset); 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci crocus_bo_unmap(ice->shaders.cache_bo); 154bf215546Sopenharmony_ci crocus_bo_unreference(ice->shaders.cache_bo); 155bf215546Sopenharmony_ci ice->shaders.cache_bo = new_bo; 156bf215546Sopenharmony_ci ice->shaders.cache_bo_map = map; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci if (screen->devinfo.ver <= 5) { 159bf215546Sopenharmony_ci /* reemit all shaders on GEN4 only. */ 160bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_CLIP | CROCUS_DIRTY_RASTER | 161bf215546Sopenharmony_ci CROCUS_DIRTY_WM; 162bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS; 163bf215546Sopenharmony_ci } 164bf215546Sopenharmony_ci ice->batches[CROCUS_BATCH_RENDER].state_base_address_emitted = false; 165bf215546Sopenharmony_ci ice->batches[CROCUS_BATCH_COMPUTE].state_base_address_emitted = false; 166bf215546Sopenharmony_ci /* unset state base address */ 167bf215546Sopenharmony_ci} 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_cistatic uint32_t 170bf215546Sopenharmony_cicrocus_alloc_item_data(struct crocus_context *ice, uint32_t size) 171bf215546Sopenharmony_ci{ 172bf215546Sopenharmony_ci if (ice->shaders.cache_next_offset + size > ice->shaders.cache_bo->size) { 173bf215546Sopenharmony_ci uint32_t new_size = ice->shaders.cache_bo->size * 2; 174bf215546Sopenharmony_ci while (ice->shaders.cache_next_offset + size > new_size) 175bf215546Sopenharmony_ci new_size *= 2; 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci crocus_cache_new_bo(ice, new_size); 178bf215546Sopenharmony_ci } 179bf215546Sopenharmony_ci uint32_t offset = ice->shaders.cache_next_offset; 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci /* Programs are always 64-byte aligned, so set up the next one now */ 182bf215546Sopenharmony_ci ice->shaders.cache_next_offset = ALIGN(offset + size, 64); 183bf215546Sopenharmony_ci return offset; 184bf215546Sopenharmony_ci} 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_cistruct crocus_compiled_shader * 187bf215546Sopenharmony_cicrocus_upload_shader(struct crocus_context *ice, 188bf215546Sopenharmony_ci enum crocus_program_cache_id cache_id, uint32_t key_size, 189bf215546Sopenharmony_ci const void *key, const void *assembly, uint32_t asm_size, 190bf215546Sopenharmony_ci struct brw_stage_prog_data *prog_data, 191bf215546Sopenharmony_ci uint32_t prog_data_size, uint32_t *streamout, 192bf215546Sopenharmony_ci enum brw_param_builtin *system_values, 193bf215546Sopenharmony_ci unsigned num_system_values, unsigned num_cbufs, 194bf215546Sopenharmony_ci const struct crocus_binding_table *bt) 195bf215546Sopenharmony_ci{ 196bf215546Sopenharmony_ci struct hash_table *cache = ice->shaders.cache; 197bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = 198bf215546Sopenharmony_ci rzalloc_size(cache, sizeof(struct crocus_compiled_shader)); 199bf215546Sopenharmony_ci const struct crocus_compiled_shader *existing = find_existing_assembly( 200bf215546Sopenharmony_ci cache, ice->shaders.cache_bo_map, assembly, asm_size); 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci /* If we can find a matching prog in the cache already, then reuse the 203bf215546Sopenharmony_ci * existing stuff without creating new copy into the underlying buffer 204bf215546Sopenharmony_ci * object. This is notably useful for programs generating shaders at 205bf215546Sopenharmony_ci * runtime, where multiple shaders may compile to the same thing in our 206bf215546Sopenharmony_ci * backend. 207bf215546Sopenharmony_ci */ 208bf215546Sopenharmony_ci if (existing) { 209bf215546Sopenharmony_ci shader->offset = existing->offset; 210bf215546Sopenharmony_ci shader->map_size = existing->map_size; 211bf215546Sopenharmony_ci } else { 212bf215546Sopenharmony_ci shader->offset = crocus_alloc_item_data(ice, asm_size); 213bf215546Sopenharmony_ci shader->map_size = asm_size; 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci memcpy(ice->shaders.cache_bo_map + shader->offset, assembly, asm_size); 216bf215546Sopenharmony_ci } 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci shader->prog_data = prog_data; 219bf215546Sopenharmony_ci shader->prog_data_size = prog_data_size; 220bf215546Sopenharmony_ci shader->streamout = streamout; 221bf215546Sopenharmony_ci shader->system_values = system_values; 222bf215546Sopenharmony_ci shader->num_system_values = num_system_values; 223bf215546Sopenharmony_ci shader->num_cbufs = num_cbufs; 224bf215546Sopenharmony_ci shader->bt = *bt; 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci ralloc_steal(shader, shader->prog_data); 227bf215546Sopenharmony_ci if (prog_data_size > 16) 228bf215546Sopenharmony_ci ralloc_steal(shader->prog_data, prog_data->param); 229bf215546Sopenharmony_ci ralloc_steal(shader, shader->streamout); 230bf215546Sopenharmony_ci ralloc_steal(shader, shader->system_values); 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ci struct keybox *keybox = make_keybox(shader, cache_id, key, key_size); 233bf215546Sopenharmony_ci _mesa_hash_table_insert(ice->shaders.cache, keybox, shader); 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci return shader; 236bf215546Sopenharmony_ci} 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_cibool 239bf215546Sopenharmony_cicrocus_blorp_lookup_shader(struct blorp_batch *blorp_batch, const void *key, 240bf215546Sopenharmony_ci uint32_t key_size, uint32_t *kernel_out, 241bf215546Sopenharmony_ci void *prog_data_out) 242bf215546Sopenharmony_ci{ 243bf215546Sopenharmony_ci struct blorp_context *blorp = blorp_batch->blorp; 244bf215546Sopenharmony_ci struct crocus_context *ice = blorp->driver_ctx; 245bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = 246bf215546Sopenharmony_ci crocus_find_cached_shader(ice, CROCUS_CACHE_BLORP, key_size, key); 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci if (!shader) 249bf215546Sopenharmony_ci return false; 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci *kernel_out = shader->offset; 252bf215546Sopenharmony_ci *((void **)prog_data_out) = shader->prog_data; 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci return true; 255bf215546Sopenharmony_ci} 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_cibool 258bf215546Sopenharmony_cicrocus_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage, 259bf215546Sopenharmony_ci const void *key, uint32_t key_size, 260bf215546Sopenharmony_ci const void *kernel, uint32_t kernel_size, 261bf215546Sopenharmony_ci const struct brw_stage_prog_data *prog_data_templ, 262bf215546Sopenharmony_ci uint32_t prog_data_size, uint32_t *kernel_out, 263bf215546Sopenharmony_ci void *prog_data_out) 264bf215546Sopenharmony_ci{ 265bf215546Sopenharmony_ci struct blorp_context *blorp = blorp_batch->blorp; 266bf215546Sopenharmony_ci struct crocus_context *ice = blorp->driver_ctx; 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci struct brw_stage_prog_data *prog_data = ralloc_size(NULL, prog_data_size); 269bf215546Sopenharmony_ci memcpy(prog_data, prog_data_templ, prog_data_size); 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci struct crocus_binding_table bt; 272bf215546Sopenharmony_ci memset(&bt, 0, sizeof(bt)); 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = crocus_upload_shader( 275bf215546Sopenharmony_ci ice, CROCUS_CACHE_BLORP, key_size, key, kernel, kernel_size, prog_data, 276bf215546Sopenharmony_ci prog_data_size, NULL, NULL, 0, 0, &bt); 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci *kernel_out = shader->offset; 279bf215546Sopenharmony_ci *((void **)prog_data_out) = shader->prog_data; 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci return true; 282bf215546Sopenharmony_ci} 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_civoid 285bf215546Sopenharmony_cicrocus_init_program_cache(struct crocus_context *ice) 286bf215546Sopenharmony_ci{ 287bf215546Sopenharmony_ci struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 288bf215546Sopenharmony_ci ice->shaders.cache = 289bf215546Sopenharmony_ci _mesa_hash_table_create(ice, keybox_hash, keybox_equals); 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci ice->shaders.cache_bo = 292bf215546Sopenharmony_ci crocus_bo_alloc(screen->bufmgr, "program_cache", 16384); 293bf215546Sopenharmony_ci ice->shaders.cache_bo_map = 294bf215546Sopenharmony_ci crocus_bo_map(NULL, ice->shaders.cache_bo, 295bf215546Sopenharmony_ci MAP_READ | MAP_WRITE | MAP_ASYNC | MAP_PERSISTENT); 296bf215546Sopenharmony_ci} 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_civoid 299bf215546Sopenharmony_cicrocus_destroy_program_cache(struct crocus_context *ice) 300bf215546Sopenharmony_ci{ 301bf215546Sopenharmony_ci for (int i = 0; i < MESA_SHADER_STAGES; i++) { 302bf215546Sopenharmony_ci ice->shaders.prog[i] = NULL; 303bf215546Sopenharmony_ci } 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci if (ice->shaders.cache_bo) { 306bf215546Sopenharmony_ci crocus_bo_unmap(ice->shaders.cache_bo); 307bf215546Sopenharmony_ci crocus_bo_unreference(ice->shaders.cache_bo); 308bf215546Sopenharmony_ci ice->shaders.cache_bo_map = NULL; 309bf215546Sopenharmony_ci ice->shaders.cache_bo = NULL; 310bf215546Sopenharmony_ci } 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci ralloc_free(ice->shaders.cache); 313bf215546Sopenharmony_ci} 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_cistatic const char * 316bf215546Sopenharmony_cicache_name(enum crocus_program_cache_id cache_id) 317bf215546Sopenharmony_ci{ 318bf215546Sopenharmony_ci if (cache_id == CROCUS_CACHE_BLORP) 319bf215546Sopenharmony_ci return "BLORP"; 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci if (cache_id == CROCUS_CACHE_SF) 322bf215546Sopenharmony_ci return "SF"; 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci if (cache_id == CROCUS_CACHE_CLIP) 325bf215546Sopenharmony_ci return "CLIP"; 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci if (cache_id == CROCUS_CACHE_FF_GS) 328bf215546Sopenharmony_ci return "FF_GS"; 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci return _mesa_shader_stage_to_string(cache_id); 331bf215546Sopenharmony_ci} 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_civoid 334bf215546Sopenharmony_cicrocus_print_program_cache(struct crocus_context *ice) 335bf215546Sopenharmony_ci{ 336bf215546Sopenharmony_ci struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 337bf215546Sopenharmony_ci const struct brw_isa_info *isa = &screen->compiler->isa; 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci hash_table_foreach(ice->shaders.cache, entry) { 340bf215546Sopenharmony_ci const struct keybox *keybox = entry->key; 341bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = entry->data; 342bf215546Sopenharmony_ci fprintf(stderr, "%s:\n", cache_name(keybox->cache_id)); 343bf215546Sopenharmony_ci brw_disassemble(isa, ice->shaders.cache_bo_map + shader->offset, 0, 344bf215546Sopenharmony_ci shader->prog_data->program_size, NULL, stderr); 345bf215546Sopenharmony_ci } 346bf215546Sopenharmony_ci} 347