1/* 2 * Copyright © 2009 Corbin Simpson <MostAwesomeDude@gmail.com> 3 * Copyright © 2009 Joakim Sindholt <opensource@zhasha.com> 4 * Copyright © 2011 Marek Olšák <maraeo@gmail.com> 5 * Copyright © 2015 Advanced Micro Devices, Inc. 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining 9 * a copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 18 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 20 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 * USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 * The above copyright notice and this permission notice (including the 26 * next paragraph) shall be included in all copies or substantial portions 27 * of the Software. 28 */ 29 30#include "amdgpu_cs.h" 31 32#include "util/os_file.h" 33#include "util/os_misc.h" 34#include "util/u_cpu_detect.h" 35#include "util/u_hash_table.h" 36#include "util/hash_table.h" 37#include "util/xmlconfig.h" 38#include "drm-uapi/amdgpu_drm.h" 39#include <xf86drm.h> 40#include <stdio.h> 41#include <sys/stat.h> 42#include <fcntl.h> 43#include "ac_llvm_util.h" 44#include "sid.h" 45 46static struct hash_table *dev_tab = NULL; 47static simple_mtx_t dev_tab_mutex = _SIMPLE_MTX_INITIALIZER_NP; 48 49#if DEBUG 50DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false) 51#endif 52 53static void handle_env_var_force_family(struct amdgpu_winsys *ws) 54{ 55 const char *family = debug_get_option("SI_FORCE_FAMILY", NULL); 56 unsigned i; 57 58 if (!family) 59 return; 60 61 for (i = CHIP_TAHITI; i < CHIP_LAST; i++) { 62 if (!strcmp(family, ac_get_llvm_processor_name(i))) { 63 /* Override family and gfx_level. */ 64 ws->info.family = i; 65 ws->info.name = "NOOP"; 66 strcpy(ws->info.lowercase_name , "noop"); 67 68 if (i >= CHIP_GFX1100) 69 ws->info.gfx_level = GFX11; 70 else if (i >= CHIP_NAVI21) 71 ws->info.gfx_level = GFX10_3; 72 else if (i >= CHIP_NAVI10) 73 ws->info.gfx_level = GFX10; 74 else if (i >= CHIP_VEGA10) 75 ws->info.gfx_level = GFX9; 76 else if (i >= CHIP_TONGA) 77 ws->info.gfx_level = GFX8; 78 else if (i >= CHIP_BONAIRE) 79 ws->info.gfx_level = GFX7; 80 else 81 ws->info.gfx_level = GFX6; 82 83 /* Don't submit any IBs. */ 84 setenv("RADEON_NOOP", "1", 1); 85 return; 86 } 87 } 88 89 fprintf(stderr, "radeonsi: Unknown family: %s\n", family); 90 exit(1); 91} 92 93/* Helper function to do the ioctls needed for setup and init. */ 94static bool do_winsys_init(struct amdgpu_winsys *ws, 95 const struct pipe_screen_config *config, 96 int fd) 97{ 98 if (!ac_query_gpu_info(fd, ws->dev, &ws->info)) 99 goto fail; 100 101 /* TODO: Enable this once the kernel handles it efficiently. */ 102 if (ws->info.has_dedicated_vram) 103 ws->info.has_local_buffers = false; 104 105 handle_env_var_force_family(ws); 106 107 ws->addrlib = ac_addrlib_create(&ws->info, &ws->info.max_alignment); 108 if (!ws->addrlib) { 109 fprintf(stderr, "amdgpu: Cannot create addrlib.\n"); 110 goto fail; 111 } 112 113 ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL || 114 strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL; 115 ws->noop_cs = debug_get_bool_option("RADEON_NOOP", false); 116#if DEBUG 117 ws->debug_all_bos = debug_get_option_all_bos(); 118#endif 119 ws->reserve_vmid = strstr(debug_get_option("R600_DEBUG", ""), "reserve_vmid") != NULL || 120 strstr(debug_get_option("AMD_DEBUG", ""), "reserve_vmid") != NULL || 121 strstr(debug_get_option("AMD_DEBUG", ""), "sqtt") != NULL; 122 ws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL || 123 driQueryOptionb(config->options, "radeonsi_zerovram"); 124 125 return true; 126 127fail: 128 amdgpu_device_deinitialize(ws->dev); 129 ws->dev = NULL; 130 return false; 131} 132 133static void do_winsys_deinit(struct amdgpu_winsys *ws) 134{ 135 if (ws->reserve_vmid) 136 amdgpu_vm_unreserve_vmid(ws->dev, 0); 137 138 if (util_queue_is_initialized(&ws->cs_queue)) 139 util_queue_destroy(&ws->cs_queue); 140 141 simple_mtx_destroy(&ws->bo_fence_lock); 142 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { 143 if (ws->bo_slabs[i].groups) 144 pb_slabs_deinit(&ws->bo_slabs[i]); 145 } 146 pb_cache_deinit(&ws->bo_cache); 147 _mesa_hash_table_destroy(ws->bo_export_table, NULL); 148 simple_mtx_destroy(&ws->sws_list_lock); 149#if DEBUG 150 simple_mtx_destroy(&ws->global_bo_list_lock); 151#endif 152 simple_mtx_destroy(&ws->bo_export_table_lock); 153 154 ac_addrlib_destroy(ws->addrlib); 155 amdgpu_device_deinitialize(ws->dev); 156 FREE(ws); 157} 158 159static void amdgpu_winsys_destroy_locked(struct radeon_winsys *rws, bool locked) 160{ 161 struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws); 162 struct amdgpu_winsys *ws = sws->aws; 163 bool destroy; 164 165 /* When the reference counter drops to zero, remove the device pointer 166 * from the table. 167 * This must happen while the mutex is locked, so that 168 * amdgpu_winsys_create in another thread doesn't get the winsys 169 * from the table when the counter drops to 0. 170 */ 171 if (!locked) 172 simple_mtx_lock(&dev_tab_mutex); 173 174 destroy = pipe_reference(&ws->reference, NULL); 175 if (destroy && dev_tab) { 176 _mesa_hash_table_remove_key(dev_tab, ws->dev); 177 if (_mesa_hash_table_num_entries(dev_tab) == 0) { 178 _mesa_hash_table_destroy(dev_tab, NULL); 179 dev_tab = NULL; 180 } 181 } 182 183 if (!locked) 184 simple_mtx_unlock(&dev_tab_mutex); 185 186 if (destroy) 187 do_winsys_deinit(ws); 188 189 close(sws->fd); 190 FREE(rws); 191} 192 193static void amdgpu_winsys_destroy(struct radeon_winsys *rws) 194{ 195 amdgpu_winsys_destroy_locked(rws, false); 196} 197 198static void amdgpu_winsys_query_info(struct radeon_winsys *rws, 199 struct radeon_info *info, 200 bool enable_smart_access_memory, 201 bool disable_smart_access_memory) 202{ 203 struct amdgpu_winsys *ws = amdgpu_winsys(rws); 204 205 if (disable_smart_access_memory) 206 ws->info.smart_access_memory = false; 207 else if (enable_smart_access_memory && ws->info.all_vram_visible) 208 ws->info.smart_access_memory = true; 209 210 *info = ws->info; 211} 212 213static bool amdgpu_cs_request_feature(struct radeon_cmdbuf *rcs, 214 enum radeon_feature_id fid, 215 bool enable) 216{ 217 return false; 218} 219 220static uint64_t amdgpu_query_value(struct radeon_winsys *rws, 221 enum radeon_value_id value) 222{ 223 struct amdgpu_winsys *ws = amdgpu_winsys(rws); 224 struct amdgpu_heap_info heap; 225 uint64_t retval = 0; 226 227 switch (value) { 228 case RADEON_REQUESTED_VRAM_MEMORY: 229 return ws->allocated_vram; 230 case RADEON_REQUESTED_GTT_MEMORY: 231 return ws->allocated_gtt; 232 case RADEON_MAPPED_VRAM: 233 return ws->mapped_vram; 234 case RADEON_MAPPED_GTT: 235 return ws->mapped_gtt; 236 case RADEON_SLAB_WASTED_VRAM: 237 return ws->slab_wasted_vram; 238 case RADEON_SLAB_WASTED_GTT: 239 return ws->slab_wasted_gtt; 240 case RADEON_BUFFER_WAIT_TIME_NS: 241 return ws->buffer_wait_time; 242 case RADEON_NUM_MAPPED_BUFFERS: 243 return ws->num_mapped_buffers; 244 case RADEON_TIMESTAMP: 245 amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval); 246 return retval; 247 case RADEON_NUM_GFX_IBS: 248 return ws->num_gfx_IBs; 249 case RADEON_NUM_SDMA_IBS: 250 return ws->num_sdma_IBs; 251 case RADEON_GFX_BO_LIST_COUNTER: 252 return ws->gfx_bo_list_counter; 253 case RADEON_GFX_IB_SIZE_COUNTER: 254 return ws->gfx_ib_size_counter; 255 case RADEON_NUM_BYTES_MOVED: 256 amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED, 8, &retval); 257 return retval; 258 case RADEON_NUM_EVICTIONS: 259 amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_EVICTIONS, 8, &retval); 260 return retval; 261 case RADEON_NUM_VRAM_CPU_PAGE_FAULTS: 262 amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS, 8, &retval); 263 return retval; 264 case RADEON_VRAM_USAGE: 265 amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap); 266 return heap.heap_usage; 267 case RADEON_VRAM_VIS_USAGE: 268 amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 269 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &heap); 270 return heap.heap_usage; 271 case RADEON_GTT_USAGE: 272 amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap); 273 return heap.heap_usage; 274 case RADEON_GPU_TEMPERATURE: 275 amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GPU_TEMP, 4, &retval); 276 return retval; 277 case RADEON_CURRENT_SCLK: 278 amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_SCLK, 4, &retval); 279 return retval; 280 case RADEON_CURRENT_MCLK: 281 amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_MCLK, 4, &retval); 282 return retval; 283 case RADEON_CS_THREAD_TIME: 284 return util_queue_get_thread_time_nano(&ws->cs_queue, 0); 285 } 286 return 0; 287} 288 289static bool amdgpu_read_registers(struct radeon_winsys *rws, 290 unsigned reg_offset, 291 unsigned num_registers, uint32_t *out) 292{ 293 struct amdgpu_winsys *ws = amdgpu_winsys(rws); 294 295 return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers, 296 0xffffffff, 0, out) == 0; 297} 298 299static bool amdgpu_winsys_unref(struct radeon_winsys *rws) 300{ 301 struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws); 302 struct amdgpu_winsys *aws = sws->aws; 303 bool ret; 304 305 simple_mtx_lock(&aws->sws_list_lock); 306 307 ret = pipe_reference(&sws->reference, NULL); 308 if (ret) { 309 struct amdgpu_screen_winsys **sws_iter; 310 struct amdgpu_winsys *aws = sws->aws; 311 312 /* Remove this amdgpu_screen_winsys from amdgpu_winsys' list, so that 313 * amdgpu_winsys_create can't re-use it anymore 314 */ 315 for (sws_iter = &aws->sws_list; *sws_iter; sws_iter = &(*sws_iter)->next) { 316 if (*sws_iter == sws) { 317 *sws_iter = sws->next; 318 break; 319 } 320 } 321 } 322 323 simple_mtx_unlock(&aws->sws_list_lock); 324 325 if (ret && sws->kms_handles) { 326 struct drm_gem_close args; 327 328 hash_table_foreach(sws->kms_handles, entry) { 329 args.handle = (uintptr_t)entry->data; 330 drmIoctl(sws->fd, DRM_IOCTL_GEM_CLOSE, &args); 331 } 332 _mesa_hash_table_destroy(sws->kms_handles, NULL); 333 } 334 335 return ret; 336} 337 338static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws, 339 unsigned cache) 340{ 341 struct amdgpu_winsys *ws = amdgpu_winsys(rws); 342 343 util_set_thread_affinity(ws->cs_queue.threads[0], 344 util_get_cpu_caps()->L3_affinity_mask[cache], 345 NULL, util_get_cpu_caps()->num_cpu_mask_bits); 346} 347 348static uint32_t kms_handle_hash(const void *key) 349{ 350 const struct amdgpu_winsys_bo *bo = key; 351 352 return bo->u.real.kms_handle; 353} 354 355static bool kms_handle_equals(const void *a, const void *b) 356{ 357 return a == b; 358} 359 360static bool amdgpu_cs_is_secure(struct radeon_cmdbuf *rcs) 361{ 362 struct amdgpu_cs *cs = amdgpu_cs(rcs); 363 return cs->csc->secure; 364} 365 366PUBLIC struct radeon_winsys * 367amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, 368 radeon_screen_create_t screen_create) 369{ 370 struct amdgpu_screen_winsys *ws; 371 struct amdgpu_winsys *aws; 372 amdgpu_device_handle dev; 373 uint32_t drm_major, drm_minor; 374 int r; 375 376 ws = CALLOC_STRUCT(amdgpu_screen_winsys); 377 if (!ws) 378 return NULL; 379 380 pipe_reference_init(&ws->reference, 1); 381 ws->fd = os_dupfd_cloexec(fd); 382 383 /* Look up the winsys from the dev table. */ 384 simple_mtx_lock(&dev_tab_mutex); 385 if (!dev_tab) 386 dev_tab = util_hash_table_create_ptr_keys(); 387 388 /* Initialize the amdgpu device. This should always return the same pointer 389 * for the same fd. */ 390 r = amdgpu_device_initialize(ws->fd, &drm_major, &drm_minor, &dev); 391 if (r) { 392 fprintf(stderr, "amdgpu: amdgpu_device_initialize failed.\n"); 393 goto fail; 394 } 395 396 /* Lookup a winsys if we have already created one for this device. */ 397 aws = util_hash_table_get(dev_tab, dev); 398 if (aws) { 399 struct amdgpu_screen_winsys *sws_iter; 400 401 /* Release the device handle, because we don't need it anymore. 402 * This function is returning an existing winsys instance, which 403 * has its own device handle. 404 */ 405 amdgpu_device_deinitialize(dev); 406 407 simple_mtx_lock(&aws->sws_list_lock); 408 for (sws_iter = aws->sws_list; sws_iter; sws_iter = sws_iter->next) { 409 r = os_same_file_description(sws_iter->fd, ws->fd); 410 411 if (r == 0) { 412 close(ws->fd); 413 FREE(ws); 414 ws = sws_iter; 415 pipe_reference(NULL, &ws->reference); 416 simple_mtx_unlock(&aws->sws_list_lock); 417 goto unlock; 418 } else if (r < 0) { 419 static bool logged; 420 421 if (!logged) { 422 os_log_message("amdgpu: os_same_file_description couldn't " 423 "determine if two DRM fds reference the same " 424 "file description.\n" 425 "If they do, bad things may happen!\n"); 426 logged = true; 427 } 428 } 429 } 430 simple_mtx_unlock(&aws->sws_list_lock); 431 432 ws->kms_handles = _mesa_hash_table_create(NULL, kms_handle_hash, 433 kms_handle_equals); 434 if (!ws->kms_handles) 435 goto fail; 436 437 pipe_reference(NULL, &aws->reference); 438 } else { 439 /* Create a new winsys. */ 440 aws = CALLOC_STRUCT(amdgpu_winsys); 441 if (!aws) 442 goto fail; 443 444 aws->dev = dev; 445 aws->fd = ws->fd; 446 aws->info.drm_major = drm_major; 447 aws->info.drm_minor = drm_minor; 448 aws->dummy_ws.aws = aws; /* only the pointer is used */ 449 450 if (!do_winsys_init(aws, config, fd)) 451 goto fail_alloc; 452 453 /* Create managers. */ 454 pb_cache_init(&aws->bo_cache, RADEON_NUM_HEAPS, 455 500000, aws->check_vm ? 1.0f : 2.0f, 0, 456 ((uint64_t)aws->info.vram_size_kb + aws->info.gart_size_kb) * 1024 / 8, aws, 457 /* Cast to void* because one of the function parameters 458 * is a struct pointer instead of void*. */ 459 (void*)amdgpu_bo_destroy, (void*)amdgpu_bo_can_reclaim); 460 461 unsigned min_slab_order = 8; /* 256 bytes */ 462 unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */ 463 unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) / 464 NUM_SLAB_ALLOCATORS; 465 466 /* Divide the size order range among slab managers. */ 467 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { 468 unsigned min_order = min_slab_order; 469 unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator, 470 max_slab_order); 471 472 if (!pb_slabs_init(&aws->bo_slabs[i], 473 min_order, max_order, 474 RADEON_NUM_HEAPS, true, 475 aws, 476 amdgpu_bo_can_reclaim_slab, 477 amdgpu_bo_slab_alloc, 478 /* Cast to void* because one of the function parameters 479 * is a struct pointer instead of void*. */ 480 (void*)amdgpu_bo_slab_free)) { 481 amdgpu_winsys_destroy(&ws->base); 482 simple_mtx_unlock(&dev_tab_mutex); 483 return NULL; 484 } 485 486 min_slab_order = max_order + 1; 487 } 488 489 aws->info.min_alloc_size = 1 << aws->bo_slabs[0].min_order; 490 491 /* init reference */ 492 pipe_reference_init(&aws->reference, 1); 493#if DEBUG 494 list_inithead(&aws->global_bo_list); 495#endif 496 aws->bo_export_table = util_hash_table_create_ptr_keys(); 497 498 (void) simple_mtx_init(&aws->sws_list_lock, mtx_plain); 499#if DEBUG 500 (void) simple_mtx_init(&aws->global_bo_list_lock, mtx_plain); 501#endif 502 (void) simple_mtx_init(&aws->bo_fence_lock, mtx_plain); 503 (void) simple_mtx_init(&aws->bo_export_table_lock, mtx_plain); 504 505 if (!util_queue_init(&aws->cs_queue, "cs", 8, 1, 506 UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL)) { 507 amdgpu_winsys_destroy(&ws->base); 508 simple_mtx_unlock(&dev_tab_mutex); 509 return NULL; 510 } 511 512 _mesa_hash_table_insert(dev_tab, dev, aws); 513 514 if (aws->reserve_vmid) { 515 r = amdgpu_vm_reserve_vmid(dev, 0); 516 if (r) { 517 amdgpu_winsys_destroy(&ws->base); 518 simple_mtx_unlock(&dev_tab_mutex); 519 return NULL; 520 } 521 } 522 } 523 524 ws->aws = aws; 525 526 /* Set functions. */ 527 ws->base.unref = amdgpu_winsys_unref; 528 ws->base.destroy = amdgpu_winsys_destroy; 529 ws->base.query_info = amdgpu_winsys_query_info; 530 ws->base.cs_request_feature = amdgpu_cs_request_feature; 531 ws->base.query_value = amdgpu_query_value; 532 ws->base.read_registers = amdgpu_read_registers; 533 ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache; 534 ws->base.cs_is_secure = amdgpu_cs_is_secure; 535 536 amdgpu_bo_init_functions(ws); 537 amdgpu_cs_init_functions(ws); 538 amdgpu_surface_init_functions(ws); 539 540 simple_mtx_lock(&aws->sws_list_lock); 541 ws->next = aws->sws_list; 542 aws->sws_list = ws; 543 simple_mtx_unlock(&aws->sws_list_lock); 544 545 /* Create the screen at the end. The winsys must be initialized 546 * completely. 547 * 548 * Alternatively, we could create the screen based on "ws->gen" 549 * and link all drivers into one binary blob. */ 550 ws->base.screen = screen_create(&ws->base, config); 551 if (!ws->base.screen) { 552 amdgpu_winsys_destroy_locked(&ws->base, true); 553 simple_mtx_unlock(&dev_tab_mutex); 554 return NULL; 555 } 556 557unlock: 558 /* We must unlock the mutex once the winsys is fully initialized, so that 559 * other threads attempting to create the winsys from the same fd will 560 * get a fully initialized winsys and not just half-way initialized. */ 561 simple_mtx_unlock(&dev_tab_mutex); 562 563 return &ws->base; 564 565fail_alloc: 566 FREE(aws); 567fail: 568 if (ws->kms_handles) 569 _mesa_hash_table_destroy(ws->kms_handles, NULL); 570 close(ws->fd); 571 FREE(ws); 572 simple_mtx_unlock(&dev_tab_mutex); 573 return NULL; 574} 575