1/* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#include "ac_gpu_info.h" 26#include "ac_perfcounter.h" 27 28#include "util/u_memory.h" 29#include "macros.h" 30 31/* cik_CB */ 32static unsigned cik_CB_select0[] = { 33 R_037004_CB_PERFCOUNTER0_SELECT, 34 R_03700C_CB_PERFCOUNTER1_SELECT, 35 R_037010_CB_PERFCOUNTER2_SELECT, 36 R_037014_CB_PERFCOUNTER3_SELECT, 37}; 38static unsigned cik_CB_select1[] = { 39 R_037008_CB_PERFCOUNTER0_SELECT1, 40}; 41static struct ac_pc_block_base cik_CB = { 42 .gpu_block = CB, 43 .name = "CB", 44 .num_counters = 4, 45 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS, 46 47 .select0 = cik_CB_select0, 48 .select1 = cik_CB_select1, 49 .counter0_lo = R_035018_CB_PERFCOUNTER0_LO, 50 51 .num_spm_counters = 1, 52 .num_spm_wires = 2, 53 .spm_block_select = 0x0, 54}; 55 56/* cik_CPC */ 57static unsigned cik_CPC_select0[] = { 58 R_036024_CPC_PERFCOUNTER0_SELECT, 59 R_03600C_CPC_PERFCOUNTER1_SELECT, 60}; 61static unsigned cik_CPC_select1[] = { 62 R_036010_CPC_PERFCOUNTER0_SELECT1, 63}; 64static unsigned cik_CPC_counters[] = { 65 R_034018_CPC_PERFCOUNTER0_LO, 66 R_034010_CPC_PERFCOUNTER1_LO, 67}; 68static struct ac_pc_block_base cik_CPC = { 69 .gpu_block = CPC, 70 .name = "CPC", 71 .num_counters = 2, 72 73 .select0 = cik_CPC_select0, 74 .select1 = cik_CPC_select1, 75 .counters = cik_CPC_counters, 76 77 .num_spm_counters = 1, 78 .num_spm_wires = 2, 79 .spm_block_select = 0x1, 80}; 81 82/* cik_CPF */ 83static unsigned cik_CPF_select0[] = { 84 R_03601C_CPF_PERFCOUNTER0_SELECT, 85 R_036014_CPF_PERFCOUNTER1_SELECT, 86}; 87static unsigned cik_CPF_select1[] = { 88 R_036018_CPF_PERFCOUNTER0_SELECT1, 89}; 90static unsigned cik_CPF_counters[] = { 91 R_034028_CPF_PERFCOUNTER0_LO, 92 R_034020_CPF_PERFCOUNTER1_LO, 93}; 94static struct ac_pc_block_base cik_CPF = { 95 .gpu_block = CPF, 96 .name = "CPF", 97 .num_counters = 2, 98 99 .select0 = cik_CPF_select0, 100 .select1 = cik_CPF_select1, 101 .counters = cik_CPF_counters, 102 103 .num_spm_counters = 1, 104 .num_spm_wires = 2, 105 .spm_block_select = 0x2, 106}; 107 108/* cik_CPG */ 109static unsigned cik_CPG_select0[] = { 110 R_036008_CPG_PERFCOUNTER0_SELECT, 111 R_036000_CPG_PERFCOUNTER1_SELECT, 112}; 113static unsigned cik_CPG_select1[] = { 114 R_036004_CPG_PERFCOUNTER0_SELECT1 115}; 116static unsigned cik_CPG_counters[] = { 117 R_034008_CPG_PERFCOUNTER0_LO, 118 R_034000_CPG_PERFCOUNTER1_LO, 119}; 120static struct ac_pc_block_base cik_CPG = { 121 .gpu_block = CPG, 122 .name = "CPG", 123 .num_counters = 2, 124 125 .select0 = cik_CPG_select0, 126 .select1 = cik_CPG_select1, 127 .counters = cik_CPG_counters, 128 129 .num_spm_counters = 1, 130 .num_spm_wires = 2, 131 .spm_block_select = 0x0, 132}; 133 134/* cik_DB */ 135static unsigned cik_DB_select0[] = { 136 R_037100_DB_PERFCOUNTER0_SELECT, 137 R_037108_DB_PERFCOUNTER1_SELECT, 138 R_037110_DB_PERFCOUNTER2_SELECT, 139 R_037118_DB_PERFCOUNTER3_SELECT, 140}; 141static unsigned cik_DB_select1[] = { 142 R_037104_DB_PERFCOUNTER0_SELECT1, 143 R_03710C_DB_PERFCOUNTER1_SELECT1, 144}; 145static struct ac_pc_block_base cik_DB = { 146 .gpu_block = DB, 147 .name = "DB", 148 .num_counters = 4, 149 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS, 150 151 .select0 = cik_DB_select0, 152 .select1 = cik_DB_select1, 153 .counter0_lo = R_035100_DB_PERFCOUNTER0_LO, 154 155 .num_spm_counters = 2, 156 .num_spm_wires = 3, 157 .spm_block_select = 0x1, 158}; 159 160/* cik_GDS */ 161static unsigned cik_GDS_select0[] = { 162 R_036A00_GDS_PERFCOUNTER0_SELECT, 163 R_036A04_GDS_PERFCOUNTER1_SELECT, 164 R_036A08_GDS_PERFCOUNTER2_SELECT, 165 R_036A0C_GDS_PERFCOUNTER3_SELECT, 166}; 167static unsigned cik_GDS_select1[] = { 168 R_036A10_GDS_PERFCOUNTER0_SELECT1, 169}; 170static struct ac_pc_block_base cik_GDS = { 171 .gpu_block = GDS, 172 .name = "GDS", 173 .num_counters = 4, 174 175 .select0 = cik_GDS_select0, 176 .select1 = cik_GDS_select1, 177 .counter0_lo = R_034A00_GDS_PERFCOUNTER0_LO, 178 179 .num_spm_counters = 1, 180 .num_spm_wires = 2, 181 .spm_block_select = 0x3, 182}; 183 184/* cik_GRBM */ 185static unsigned cik_GRBM_select0[] = { 186 R_036100_GRBM_PERFCOUNTER0_SELECT, 187 R_036104_GRBM_PERFCOUNTER1_SELECT, 188}; 189static unsigned cik_GRBM_counters[] = { 190 R_034100_GRBM_PERFCOUNTER0_LO, 191 R_03410C_GRBM_PERFCOUNTER1_LO, 192}; 193static struct ac_pc_block_base cik_GRBM = { 194 .gpu_block = GRBM, 195 .name = "GRBM", 196 .num_counters = 2, 197 198 .select0 = cik_GRBM_select0, 199 .counters = cik_GRBM_counters, 200}; 201 202/* cik_GRBMSE */ 203static unsigned cik_GRBMSE_select0[] = { 204 R_036108_GRBM_SE0_PERFCOUNTER_SELECT, 205 R_03610C_GRBM_SE1_PERFCOUNTER_SELECT, 206 R_036110_GRBM_SE2_PERFCOUNTER_SELECT, 207 R_036114_GRBM_SE3_PERFCOUNTER_SELECT, 208}; 209static struct ac_pc_block_base cik_GRBMSE = { 210 .gpu_block = GRBMSE, 211 .name = "GRBMSE", 212 .num_counters = 4, 213 214 .select0 = cik_GRBMSE_select0, 215 .counter0_lo = R_034114_GRBM_SE0_PERFCOUNTER_LO, 216}; 217 218/* cik_IA */ 219static unsigned cik_IA_select0[] = { 220 R_036210_IA_PERFCOUNTER0_SELECT, 221 R_036214_IA_PERFCOUNTER1_SELECT, 222 R_036218_IA_PERFCOUNTER2_SELECT, 223 R_03621C_IA_PERFCOUNTER3_SELECT, 224}; 225static unsigned cik_IA_select1[] = { 226 R_036220_IA_PERFCOUNTER0_SELECT1, 227}; 228static struct ac_pc_block_base cik_IA = { 229 .gpu_block = IA, 230 .name = "IA", 231 .num_counters = 4, 232 233 .select0 = cik_IA_select0, 234 .select1 = cik_IA_select1, 235 .counter0_lo = R_034220_IA_PERFCOUNTER0_LO, 236 237 .num_spm_counters = 1, 238 .num_spm_wires = 2, 239 .spm_block_select = 0x6, 240}; 241 242/* cik_PA_SC */ 243static unsigned cik_PA_SC_select0[] = { 244 R_036500_PA_SC_PERFCOUNTER0_SELECT, 245 R_036508_PA_SC_PERFCOUNTER1_SELECT, 246 R_03650C_PA_SC_PERFCOUNTER2_SELECT, 247 R_036510_PA_SC_PERFCOUNTER3_SELECT, 248 R_036514_PA_SC_PERFCOUNTER4_SELECT, 249 R_036518_PA_SC_PERFCOUNTER5_SELECT, 250 R_03651C_PA_SC_PERFCOUNTER6_SELECT, 251 R_036520_PA_SC_PERFCOUNTER7_SELECT, 252}; 253static unsigned cik_PA_SC_select1[] = { 254 R_036504_PA_SC_PERFCOUNTER0_SELECT1, 255}; 256static struct ac_pc_block_base cik_PA_SC = { 257 .gpu_block = PA_SC, 258 .name = "PA_SC", 259 .num_counters = 8, 260 .flags = AC_PC_BLOCK_SE, 261 262 .select0 = cik_PA_SC_select0, 263 .select1 = cik_PA_SC_select1, 264 .counter0_lo = R_034500_PA_SC_PERFCOUNTER0_LO, 265 266 .num_spm_counters = 1, 267 .num_spm_wires = 2, 268 .spm_block_select = 0x4, 269}; 270 271/* cik_PA_SU */ 272static unsigned cik_PA_SU_select0[] = { 273 R_036400_PA_SU_PERFCOUNTER0_SELECT, 274 R_036408_PA_SU_PERFCOUNTER1_SELECT, 275 R_036410_PA_SU_PERFCOUNTER2_SELECT, 276 R_036414_PA_SU_PERFCOUNTER3_SELECT, 277}; 278static unsigned cik_PA_SU_select1[] = { 279 R_036404_PA_SU_PERFCOUNTER0_SELECT1, 280 R_03640C_PA_SU_PERFCOUNTER1_SELECT1, 281}; 282/* According to docs, PA_SU counters are only 48 bits wide. */ 283static struct ac_pc_block_base cik_PA_SU = { 284 .gpu_block = PA_SU, 285 .name = "PA_SU", 286 .num_counters = 4, 287 .flags = AC_PC_BLOCK_SE, 288 289 .select0 = cik_PA_SU_select0, 290 .select1 = cik_PA_SU_select1, 291 .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO, 292 293 .num_spm_counters = 2, 294 .num_spm_wires = 3, 295 .spm_block_select = 0x2, 296}; 297 298/* cik_SPI */ 299static unsigned cik_SPI_select0[] = { 300 R_036600_SPI_PERFCOUNTER0_SELECT, 301 R_036604_SPI_PERFCOUNTER1_SELECT, 302 R_036608_SPI_PERFCOUNTER2_SELECT, 303 R_03660C_SPI_PERFCOUNTER3_SELECT, 304 R_036620_SPI_PERFCOUNTER4_SELECT, 305 R_036624_SPI_PERFCOUNTER5_SELECT, 306}; 307static unsigned cik_SPI_select1[] = { 308 R_036610_SPI_PERFCOUNTER0_SELECT1, 309 R_036614_SPI_PERFCOUNTER1_SELECT1, 310 R_036618_SPI_PERFCOUNTER2_SELECT1, 311 R_03661C_SPI_PERFCOUNTER3_SELECT1 312}; 313static struct ac_pc_block_base cik_SPI = { 314 .gpu_block = SPI, 315 .name = "SPI", 316 .num_counters = 6, 317 .flags = AC_PC_BLOCK_SE, 318 319 .select0 = cik_SPI_select0, 320 .select1 = cik_SPI_select1, 321 .counter0_lo = R_034604_SPI_PERFCOUNTER0_LO, 322 323 .num_spm_counters = 4, 324 .num_spm_wires = 8, 325 .spm_block_select = 0x8, 326}; 327 328/* cik_SQ */ 329static unsigned cik_SQ_select0[] = { 330 R_036700_SQ_PERFCOUNTER0_SELECT, 331 R_036704_SQ_PERFCOUNTER1_SELECT, 332 R_036708_SQ_PERFCOUNTER2_SELECT, 333 R_03670C_SQ_PERFCOUNTER3_SELECT, 334 R_036710_SQ_PERFCOUNTER4_SELECT, 335 R_036714_SQ_PERFCOUNTER5_SELECT, 336 R_036718_SQ_PERFCOUNTER6_SELECT, 337 R_03671C_SQ_PERFCOUNTER7_SELECT, 338 R_036720_SQ_PERFCOUNTER8_SELECT, 339 R_036724_SQ_PERFCOUNTER9_SELECT, 340 R_036728_SQ_PERFCOUNTER10_SELECT, 341 R_03672C_SQ_PERFCOUNTER11_SELECT, 342 R_036730_SQ_PERFCOUNTER12_SELECT, 343 R_036734_SQ_PERFCOUNTER13_SELECT, 344 R_036738_SQ_PERFCOUNTER14_SELECT, 345 R_03673C_SQ_PERFCOUNTER15_SELECT, 346}; 347static struct ac_pc_block_base cik_SQ = { 348 .gpu_block = SQ, 349 .name = "SQ", 350 .num_counters = 16, 351 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER, 352 353 .select0 = cik_SQ_select0, 354 .select_or = S_036700_SQC_BANK_MASK(15) | S_036700_SQC_CLIENT_MASK(15) | S_036700_SIMD_MASK(15), 355 .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO, 356 357 .num_spm_wires = 8, 358 .spm_block_select = 0x9, 359}; 360 361/* cik_SX */ 362static unsigned cik_SX_select0[] = { 363 R_036900_SX_PERFCOUNTER0_SELECT, 364 R_036904_SX_PERFCOUNTER1_SELECT, 365 R_036908_SX_PERFCOUNTER2_SELECT, 366 R_03690C_SX_PERFCOUNTER3_SELECT, 367}; 368static unsigned cik_SX_select1[] = { 369 R_036910_SX_PERFCOUNTER0_SELECT1, 370 R_036914_SX_PERFCOUNTER1_SELECT1, 371}; 372static struct ac_pc_block_base cik_SX = { 373 .gpu_block = SX, 374 .name = "SX", 375 .num_counters = 4, 376 .flags = AC_PC_BLOCK_SE, 377 378 .select0 = cik_SX_select0, 379 .select1 = cik_SX_select1, 380 .counter0_lo = R_034900_SX_PERFCOUNTER0_LO, 381 382 .num_spm_counters = 2, 383 .num_spm_wires = 4, 384 .spm_block_select = 0x3, 385}; 386 387/* cik_TA */ 388static unsigned cik_TA_select0[] = { 389 R_036B00_TA_PERFCOUNTER0_SELECT, 390 R_036B08_TA_PERFCOUNTER1_SELECT, 391}; 392static unsigned cik_TA_select1[] = { 393 R_036B04_TA_PERFCOUNTER0_SELECT1, 394}; 395static struct ac_pc_block_base cik_TA = { 396 .gpu_block = TA, 397 .name = "TA", 398 .num_counters = 2, 399 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED, 400 401 .select0 = cik_TA_select0, 402 .select1 = cik_TA_select1, 403 .counter0_lo = R_034B00_TA_PERFCOUNTER0_LO, 404 405 .num_spm_counters = 1, 406 .num_spm_wires = 2, 407 .spm_block_select = 0x5, 408}; 409 410/* cik_TD */ 411static unsigned cik_TD_select0[] = { 412 R_036C00_TD_PERFCOUNTER0_SELECT, 413 R_036C08_TD_PERFCOUNTER1_SELECT, 414}; 415static unsigned cik_TD_select1[] = { 416 R_036C04_TD_PERFCOUNTER0_SELECT1, 417}; 418static struct ac_pc_block_base cik_TD = { 419 .gpu_block = TD, 420 .name = "TD", 421 .num_counters = 2, 422 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED, 423 424 .select0 = cik_TD_select0, 425 .select1 = cik_TD_select1, 426 .counter0_lo = R_034C00_TD_PERFCOUNTER0_LO, 427 428 .num_spm_counters = 1, 429 .num_spm_wires = 2, 430 .spm_block_select = 0x6, 431}; 432 433/* cik_TCA */ 434static unsigned cik_TCA_select0[] = { 435 R_036E40_TCA_PERFCOUNTER0_SELECT, 436 R_036E48_TCA_PERFCOUNTER1_SELECT, 437 R_036E50_TCA_PERFCOUNTER2_SELECT, 438 R_036E54_TCA_PERFCOUNTER3_SELECT, 439}; 440static unsigned cik_TCA_select1[] = { 441 R_036E44_TCA_PERFCOUNTER0_SELECT1, 442 R_036E4C_TCA_PERFCOUNTER1_SELECT1, 443}; 444static struct ac_pc_block_base cik_TCA = { 445 .gpu_block = TCA, 446 .name = "TCA", 447 .num_counters = 4, 448 .flags = AC_PC_BLOCK_INSTANCE_GROUPS, 449 450 .select0 = cik_TCA_select0, 451 .select1 = cik_TCA_select1, 452 .counter0_lo = R_034E40_TCA_PERFCOUNTER0_LO, 453 454 .num_spm_counters = 2, 455 .num_spm_wires = 4, 456 .spm_block_select = 0x5, 457}; 458 459/* cik_TCC */ 460static unsigned cik_TCC_select0[] = { 461 R_036E00_TCC_PERFCOUNTER0_SELECT, 462 R_036E08_TCC_PERFCOUNTER1_SELECT, 463 R_036E10_TCC_PERFCOUNTER2_SELECT, 464 R_036E14_TCC_PERFCOUNTER3_SELECT, 465}; 466static unsigned cik_TCC_select1[] = { 467 R_036E04_TCC_PERFCOUNTER0_SELECT1, 468 R_036E0C_TCC_PERFCOUNTER1_SELECT1, 469}; 470static struct ac_pc_block_base cik_TCC = { 471 .gpu_block = TCC, 472 .name = "TCC", 473 .num_counters = 4, 474 .flags = AC_PC_BLOCK_INSTANCE_GROUPS, 475 476 .select0 = cik_TCC_select0, 477 .select1 = cik_TCC_select1, 478 .counter0_lo = R_034E00_TCC_PERFCOUNTER0_LO, 479 480 .num_spm_counters = 2, 481 .num_spm_wires = 4, 482 .spm_block_select = 0x4, 483}; 484 485/* cik_TCP */ 486static unsigned cik_TCP_select0[] = { 487 R_036D00_TCP_PERFCOUNTER0_SELECT, 488 R_036D08_TCP_PERFCOUNTER1_SELECT, 489 R_036D10_TCP_PERFCOUNTER2_SELECT, 490 R_036D14_TCP_PERFCOUNTER3_SELECT, 491}; 492static unsigned cik_TCP_select1[] = { 493 R_036D04_TCP_PERFCOUNTER0_SELECT1, 494 R_036D0C_TCP_PERFCOUNTER1_SELECT1, 495}; 496static struct ac_pc_block_base cik_TCP = { 497 .gpu_block = TCP, 498 .name = "TCP", 499 .num_counters = 4, 500 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED, 501 502 .select0 = cik_TCP_select0, 503 .select1 = cik_TCP_select1, 504 .counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO, 505 506 .num_spm_counters = 2, 507 .num_spm_wires = 3, 508 .spm_block_select = 0x7, 509}; 510 511/* cik_VGT */ 512static unsigned cik_VGT_select0[] = { 513 R_036230_VGT_PERFCOUNTER0_SELECT, 514 R_036234_VGT_PERFCOUNTER1_SELECT, 515 R_036238_VGT_PERFCOUNTER2_SELECT, 516 R_03623C_VGT_PERFCOUNTER3_SELECT, 517}; 518static unsigned cik_VGT_select1[] = { 519 R_036240_VGT_PERFCOUNTER0_SELECT1, 520 R_036244_VGT_PERFCOUNTER1_SELECT1, 521}; 522static struct ac_pc_block_base cik_VGT = { 523 .gpu_block = VGT, 524 .name = "VGT", 525 .num_counters = 4, 526 .flags = AC_PC_BLOCK_SE, 527 528 .select0 = cik_VGT_select0, 529 .select1 = cik_VGT_select1, 530 .counter0_lo = R_034240_VGT_PERFCOUNTER0_LO, 531 532 .num_spm_counters = 2, 533 .num_spm_wires = 3, 534 .spm_block_select = 0xa, 535}; 536 537/* cik_WD */ 538static unsigned cik_WD_select0[] = { 539 R_036200_WD_PERFCOUNTER0_SELECT, 540 R_036204_WD_PERFCOUNTER1_SELECT, 541 R_036208_WD_PERFCOUNTER2_SELECT, 542 R_03620C_WD_PERFCOUNTER3_SELECT, 543}; 544static struct ac_pc_block_base cik_WD = { 545 .gpu_block = WD, 546 .name = "WD", 547 .num_counters = 4, 548 549 .select0 = cik_WD_select0, 550 .counter0_lo = R_034200_WD_PERFCOUNTER0_LO, 551}; 552 553/* cik_MC */ 554static struct ac_pc_block_base cik_MC = { 555 .gpu_block = MC, 556 .name = "MC", 557 .num_counters = 4, 558}; 559 560/* cik_SRBM */ 561static struct ac_pc_block_base cik_SRBM = { 562 .gpu_block = SRBM, 563 .name = "SRBM", 564 .num_counters = 2, 565}; 566 567/* gfx10_CHA */ 568static unsigned gfx10_CHA_select0[] = { 569 R_037780_CHA_PERFCOUNTER0_SELECT, 570 R_037788_CHA_PERFCOUNTER1_SELECT, 571 R_03778C_CHA_PERFCOUNTER2_SELECT, 572 R_037790_CHA_PERFCOUNTER3_SELECT, 573}; 574static unsigned gfx10_CHA_select1[] = { 575 R_037784_CHA_PERFCOUNTER0_SELECT1, 576}; 577static struct ac_pc_block_base gfx10_CHA = { 578 .gpu_block = CHA, 579 .name = "CHA", 580 .num_counters = 4, 581 582 .select0 = gfx10_CHA_select0, 583 .select1 = gfx10_CHA_select1, 584 .counter0_lo = R_035800_CHA_PERFCOUNTER0_LO, 585 586 .num_spm_counters = 1, 587 .num_spm_wires = 2, 588 .spm_block_select = 0xc, 589}; 590 591/* gfx10_CHCG */ 592static unsigned gfx10_CHCG_select0[] = { 593 R_036F18_CHCG_PERFCOUNTER0_SELECT, 594 R_036F20_CHCG_PERFCOUNTER1_SELECT, 595 R_036F24_CHCG_PERFCOUNTER2_SELECT, 596 R_036F28_CHCG_PERFCOUNTER3_SELECT, 597}; 598static unsigned gfx10_CHCG_select1[] = { 599 R_036F1C_CHCG_PERFCOUNTER0_SELECT1, 600}; 601static struct ac_pc_block_base gfx10_CHCG = { 602 .gpu_block = CHCG, 603 .name = "CHCG", 604 .num_counters = 4, 605 606 .select0 = gfx10_CHCG_select0, 607 .select1 = gfx10_CHCG_select1, 608 .counter0_lo = R_034F20_CHCG_PERFCOUNTER0_LO, 609 610 .num_spm_counters = 1, 611 .num_spm_wires = 2, 612 .spm_block_select = 0xe, 613}; 614 615/* gfx10_CHC */ 616static unsigned gfx10_CHC_select0[] = { 617 R_036F00_CHC_PERFCOUNTER0_SELECT, 618 R_036F08_CHC_PERFCOUNTER1_SELECT, 619 R_036F0C_CHC_PERFCOUNTER2_SELECT, 620 R_036F10_CHC_PERFCOUNTER3_SELECT, 621}; 622static unsigned gfx10_CHC_select1[] = { 623 R_036F04_CHC_PERFCOUNTER0_SELECT1, 624}; 625static struct ac_pc_block_base gfx10_CHC = { 626 .gpu_block = CHC, 627 .name = "CHC", 628 .num_counters = 4, 629 630 .select0 = gfx10_CHC_select0, 631 .select1 = gfx10_CHC_select1, 632 .counter0_lo = R_034F00_CHC_PERFCOUNTER0_LO, 633 634 .num_spm_counters = 1, 635 .num_spm_wires = 2, 636 .spm_block_select = 0xd, 637}; 638 639/* gfx10_DB */ 640static struct ac_pc_block_base gfx10_DB = { 641 .gpu_block = DB, 642 .name = "DB", 643 .num_counters = 4, 644 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS, 645 646 .select0 = cik_DB_select0, 647 .select1 = cik_DB_select1, 648 .counter0_lo = R_035100_DB_PERFCOUNTER0_LO, 649 650 .num_spm_counters = 2, 651 .num_spm_wires = 4, 652 .spm_block_select = 0x1, 653}; 654 655/* gfx10_GCR */ 656static unsigned gfx10_GCR_select0[] = { 657 R_037580_GCR_PERFCOUNTER0_SELECT, 658 R_037588_GCR_PERFCOUNTER1_SELECT, 659}; 660static unsigned gfx10_GCR_select1[] = { 661 R_037584_GCR_PERFCOUNTER0_SELECT1, 662}; 663static struct ac_pc_block_base gfx10_GCR = { 664 .gpu_block = GCR, 665 .name = "GCR", 666 .num_counters = 2, 667 668 .select0 = gfx10_GCR_select0, 669 .select1 = gfx10_GCR_select1, 670 .counter0_lo = R_035480_GCR_PERFCOUNTER0_LO, 671 672 .num_spm_counters = 1, 673 .num_spm_wires = 2, 674 .spm_block_select = 0x4, 675}; 676 677/* gfx10_GE */ 678static unsigned gfx10_GE_select0[] = { 679 R_036200_GE_PERFCOUNTER0_SELECT, 680 R_036208_GE_PERFCOUNTER1_SELECT, 681 R_036210_GE_PERFCOUNTER2_SELECT, 682 R_036218_GE_PERFCOUNTER3_SELECT, 683 R_036220_GE_PERFCOUNTER4_SELECT, 684 R_036228_GE_PERFCOUNTER5_SELECT, 685 R_036230_GE_PERFCOUNTER6_SELECT, 686 R_036238_GE_PERFCOUNTER7_SELECT, 687 R_036240_GE_PERFCOUNTER8_SELECT, 688 R_036248_GE_PERFCOUNTER9_SELECT, 689 R_036250_GE_PERFCOUNTER10_SELECT, 690 R_036258_GE_PERFCOUNTER11_SELECT, 691}; 692static unsigned gfx10_GE_select1[] = { 693 R_036204_GE_PERFCOUNTER0_SELECT1, 694 R_03620C_GE_PERFCOUNTER1_SELECT1, 695 R_036214_GE_PERFCOUNTER2_SELECT1, 696 R_03621C_GE_PERFCOUNTER3_SELECT1, 697}; 698static struct ac_pc_block_base gfx10_GE = { 699 .gpu_block = GE, 700 .name = "GE", 701 .num_counters = 12, 702 703 .select0 = gfx10_GE_select0, 704 .select1 = gfx10_GE_select1, 705 .counter0_lo = R_034200_GE_PERFCOUNTER0_LO, 706 707 .num_spm_counters = 4, 708 .num_spm_wires = 8, 709 .spm_block_select = 0x6, 710}; 711 712/* gfx10_GL1A */ 713static unsigned gfx10_GL1A_select0[] = { 714 R_037700_GL1A_PERFCOUNTER0_SELECT, 715 R_037708_GL1A_PERFCOUNTER1_SELECT, 716 R_03770C_GL1A_PERFCOUNTER2_SELECT, 717 R_037710_GL1A_PERFCOUNTER3_SELECT, 718}; 719static unsigned gfx10_GL1A_select1[] = { 720 R_037704_GL1A_PERFCOUNTER0_SELECT1, 721}; 722static struct ac_pc_block_base gfx10_GL1A = { 723 .gpu_block = GL1A, 724 .name = "GL1A", 725 .num_counters = 4, 726 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED, 727 728 .select0 = gfx10_GL1A_select0, 729 .select1 = gfx10_GL1A_select1, 730 .counter0_lo = R_035700_GL1A_PERFCOUNTER0_LO, 731 732 .num_spm_counters = 1, 733 .num_spm_wires = 2, 734 .spm_block_select = 0xa, 735}; 736 737/* gfx10_GL1C */ 738static unsigned gfx10_GL1C_select0[] = { 739 R_036E80_GL1C_PERFCOUNTER0_SELECT, 740 R_036E88_GL1C_PERFCOUNTER1_SELECT, 741 R_036E8C_GL1C_PERFCOUNTER2_SELECT, 742 R_036E90_GL1C_PERFCOUNTER3_SELECT, 743}; 744static unsigned gfx10_GL1C_select1[] = { 745 R_036E84_GL1C_PERFCOUNTER0_SELECT1, 746}; 747static struct ac_pc_block_base gfx10_GL1C = { 748 .gpu_block = GL1C, 749 .name = "GL1C", 750 .num_counters = 4, 751 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED, 752 753 .select0 = gfx10_GL1C_select0, 754 .select1 = gfx10_GL1C_select1, 755 .counter0_lo = R_034E80_GL1C_PERFCOUNTER0_LO, 756 757 .num_spm_counters = 1, 758 .num_spm_wires = 2, 759 .spm_block_select = 0xc 760}; 761 762/* gfx10_GL2A */ 763static unsigned gfx10_GL2A_select0[] = { 764 R_036E40_GL2A_PERFCOUNTER0_SELECT, 765 R_036E48_GL2A_PERFCOUNTER1_SELECT, 766 R_036E50_GL2A_PERFCOUNTER2_SELECT, 767 R_036E54_GL2A_PERFCOUNTER3_SELECT, 768}; 769static unsigned gfx10_GL2A_select1[] = { 770 R_036E44_GL2A_PERFCOUNTER0_SELECT1, 771 R_036E4C_GL2A_PERFCOUNTER1_SELECT1, 772}; 773static struct ac_pc_block_base gfx10_GL2A = { 774 .gpu_block = GL2A, 775 .name = "GL2A", 776 .num_counters = 4, 777 778 .select0 = gfx10_GL2A_select0, 779 .select1 = gfx10_GL2A_select1, 780 .counter0_lo = R_034E40_GL2A_PERFCOUNTER0_LO, 781 782 .num_spm_counters = 2, 783 .num_spm_wires = 4, 784 .spm_block_select = 0x7, 785}; 786 787/* gfx10_GL2C */ 788static unsigned gfx10_GL2C_select0[] = { 789 R_036E00_GL2C_PERFCOUNTER0_SELECT, 790 R_036E08_GL2C_PERFCOUNTER1_SELECT, 791 R_036E10_GL2C_PERFCOUNTER2_SELECT, 792 R_036E14_GL2C_PERFCOUNTER3_SELECT, 793}; 794static unsigned gfx10_GL2C_select1[] = { 795 R_036E04_GL2C_PERFCOUNTER0_SELECT1, 796 R_036E0C_GL2C_PERFCOUNTER1_SELECT1, 797}; 798static struct ac_pc_block_base gfx10_GL2C = { 799 .gpu_block = GL2C, 800 .name = "GL2C", 801 .num_counters = 4, 802 803 .select0 = gfx10_GL2C_select0, 804 .select1 = gfx10_GL2C_select1, 805 .counter0_lo = R_034E00_GL2C_PERFCOUNTER0_LO, 806 807 .num_spm_counters = 2, 808 .num_spm_wires = 4, 809 .spm_block_select = 0x8, 810}; 811 812/* gfx10_PA_PH */ 813static unsigned gfx10_PA_PH_select0[] = { 814 R_037600_PA_PH_PERFCOUNTER0_SELECT, 815 R_037608_PA_PH_PERFCOUNTER1_SELECT, 816 R_03760C_PA_PH_PERFCOUNTER2_SELECT, 817 R_037610_PA_PH_PERFCOUNTER3_SELECT, 818 R_037614_PA_PH_PERFCOUNTER4_SELECT, 819 R_037618_PA_PH_PERFCOUNTER5_SELECT, 820 R_03761C_PA_PH_PERFCOUNTER6_SELECT, 821 R_037620_PA_PH_PERFCOUNTER7_SELECT, 822}; 823static unsigned gfx10_PA_PH_select1[] = { 824 R_037604_PA_PH_PERFCOUNTER0_SELECT1, 825 R_037640_PA_PH_PERFCOUNTER1_SELECT1, 826 R_037644_PA_PH_PERFCOUNTER2_SELECT1, 827 R_037648_PA_PH_PERFCOUNTER3_SELECT1, 828}; 829static struct ac_pc_block_base gfx10_PA_PH = { 830 .gpu_block = PA_PH, 831 .name = "PA_PH", 832 .num_counters = 8, 833 .flags = AC_PC_BLOCK_SE, 834 835 .select0 = gfx10_PA_PH_select0, 836 .select1 = gfx10_PA_PH_select1, 837 .counter0_lo = R_035600_PA_PH_PERFCOUNTER0_LO, 838 839 .num_spm_counters = 4, 840 .num_spm_wires = 8, 841 .spm_block_select = 0x5, 842}; 843 844/* gfx10_PA_SU */ 845static unsigned gfx10_PA_SU_select0[] = { 846 R_036400_PA_SU_PERFCOUNTER0_SELECT, 847 R_036408_PA_SU_PERFCOUNTER1_SELECT, 848 R_036410_PA_SU_PERFCOUNTER2_SELECT, 849 R_036418_PA_SU_PERFCOUNTER3_SELECT, 850}; 851static unsigned gfx10_PA_SU_select1[] = { 852 R_036404_PA_SU_PERFCOUNTER0_SELECT1, 853 R_03640C_PA_SU_PERFCOUNTER1_SELECT1, 854 R_036414_PA_SU_PERFCOUNTER2_SELECT1, 855 R_03641C_PA_SU_PERFCOUNTER3_SELECT1, 856}; 857static struct ac_pc_block_base gfx10_PA_SU = { 858 .gpu_block = PA_SU, 859 .name = "PA_SU", 860 .num_counters = 4, 861 .flags = AC_PC_BLOCK_SE, 862 863 .select0 = gfx10_PA_SU_select0, 864 .select1 = gfx10_PA_SU_select1, 865 .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO, 866 867 .num_spm_counters = 4, 868 .num_spm_wires = 8, 869 .spm_block_select = 0x2, 870}; 871 872/* gfx10_RLC */ 873static unsigned gfx10_RLC_select0[] = { 874 R_037304_RLC_PERFCOUNTER0_SELECT, 875 R_037308_RLC_PERFCOUNTER1_SELECT, 876}; 877static struct ac_pc_block_base gfx10_RLC = { 878 .gpu_block = RLC, 879 .name = "RLC", 880 .num_counters = 2, 881 882 .select0 = gfx10_RLC_select0, 883 .counter0_lo = R_035200_RLC_PERFCOUNTER0_LO, 884 .num_spm_counters = 0, 885}; 886 887/* gfx10_RMI */ 888static unsigned gfx10_RMI_select0[] = { 889 R_037400_RMI_PERFCOUNTER0_SELECT, 890 R_037408_RMI_PERFCOUNTER1_SELECT, 891 R_03740C_RMI_PERFCOUNTER2_SELECT, 892 R_037414_RMI_PERFCOUNTER3_SELECT, 893}; 894static unsigned gfx10_RMI_select1[] = { 895 R_037404_RMI_PERFCOUNTER0_SELECT1, 896 R_037410_RMI_PERFCOUNTER2_SELECT1, 897}; 898static struct ac_pc_block_base gfx10_RMI = { 899 .gpu_block = RMI, 900 .name = "RMI", 901 .num_counters = 4, 902 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS, 903 904 .select0 = gfx10_RMI_select0, 905 .select1 = gfx10_RMI_select1, 906 .counter0_lo = R_035300_RMI_PERFCOUNTER0_LO, 907 908 .num_spm_counters = 2, 909 .num_spm_wires = 2, 910 .spm_block_select = 0xb, 911}; 912 913/* gfx10_SQ */ 914static struct ac_pc_block_base gfx10_SQ = { 915 .gpu_block = SQ, 916 .name = "SQ", 917 .num_counters = 16, 918 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER, 919 920 .select0 = cik_SQ_select0, 921 .select_or = S_036700_SQC_BANK_MASK(15), 922 .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO, 923 924 .num_spm_wires = 16, 925 .spm_block_select = 0x9, 926}; 927 928/* gfx10_TCP */ 929static struct ac_pc_block_base gfx10_TCP = { 930 .gpu_block = TCP, 931 .name = "TCP", 932 .num_counters = 4, 933 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED, 934 935 .select0 = cik_TCP_select0, 936 .select1 = cik_TCP_select1, 937 .counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO, 938 939 .num_spm_counters = 2, 940 .num_spm_wires = 4, 941 .spm_block_select = 0x7, 942}; 943 944/* gfx10_UTCL1 */ 945static unsigned gfx10_UTCL1_select0[] = { 946 R_03758C_UTCL1_PERFCOUNTER0_SELECT, 947 R_037590_UTCL1_PERFCOUNTER1_SELECT, 948}; 949static struct ac_pc_block_base gfx10_UTCL1 = { 950 .gpu_block = UTCL1, 951 .name = "UTCL1", 952 .num_counters = 2, 953 .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED, 954 955 .select0 = gfx10_UTCL1_select0, 956 .counter0_lo = R_035470_UTCL1_PERFCOUNTER0_LO, 957 .num_spm_counters = 0, 958}; 959 960/* Both the number of instances and selectors varies between chips of the same 961 * class. We only differentiate by class here and simply expose the maximum 962 * number over all chips in a class. 963 * 964 * Unfortunately, GPUPerfStudio uses the order of performance counter groups 965 * blindly once it believes it has identified the hardware, so the order of 966 * blocks here matters. 967 */ 968static struct ac_pc_block_gfxdescr groups_CIK[] = { 969 {&cik_CB, 226}, {&cik_CPF, 17}, {&cik_DB, 257}, {&cik_GRBM, 34}, {&cik_GRBMSE, 15}, 970 {&cik_PA_SU, 153}, {&cik_PA_SC, 395}, {&cik_SPI, 186}, {&cik_SQ, 252}, {&cik_SX, 32}, 971 {&cik_TA, 111}, {&cik_TCA, 39, 2}, {&cik_TCC, 160}, {&cik_TD, 55}, {&cik_TCP, 154}, 972 {&cik_GDS, 121}, {&cik_VGT, 140}, {&cik_IA, 22}, {&cik_MC, 22}, {&cik_SRBM, 19}, 973 {&cik_WD, 22}, {&cik_CPG, 46}, {&cik_CPC, 22}, 974 975}; 976 977static struct ac_pc_block_gfxdescr groups_VI[] = { 978 {&cik_CB, 405}, {&cik_CPF, 19}, {&cik_DB, 257}, {&cik_GRBM, 34}, {&cik_GRBMSE, 15}, 979 {&cik_PA_SU, 154}, {&cik_PA_SC, 397}, {&cik_SPI, 197}, {&cik_SQ, 273}, {&cik_SX, 34}, 980 {&cik_TA, 119}, {&cik_TCA, 35, 2}, {&cik_TCC, 192}, {&cik_TD, 55}, {&cik_TCP, 180}, 981 {&cik_GDS, 121}, {&cik_VGT, 147}, {&cik_IA, 24}, {&cik_MC, 22}, {&cik_SRBM, 27}, 982 {&cik_WD, 37}, {&cik_CPG, 48}, {&cik_CPC, 24}, 983 984}; 985 986static struct ac_pc_block_gfxdescr groups_gfx9[] = { 987 {&cik_CB, 438}, {&cik_CPF, 32}, {&cik_DB, 328}, {&cik_GRBM, 38}, {&cik_GRBMSE, 16}, 988 {&cik_PA_SU, 292}, {&cik_PA_SC, 491}, {&cik_SPI, 196}, {&cik_SQ, 374}, {&cik_SX, 208}, 989 {&cik_TA, 119}, {&cik_TCA, 35, 2}, {&cik_TCC, 256}, {&cik_TD, 57}, {&cik_TCP, 85}, 990 {&cik_GDS, 121}, {&cik_VGT, 148}, {&cik_IA, 32}, {&cik_WD, 58}, {&cik_CPG, 59}, 991 {&cik_CPC, 35}, 992}; 993 994static struct ac_pc_block_gfxdescr groups_gfx10[] = { 995 {&cik_CB, 461}, 996 {&gfx10_CHA, 45}, 997 {&gfx10_CHCG, 35}, 998 {&gfx10_CHC, 35}, 999 {&cik_CPC, 47}, 1000 {&cik_CPF, 40}, 1001 {&cik_CPG, 82}, 1002 {&gfx10_DB, 370}, 1003 {&gfx10_GCR, 94}, 1004 {&cik_GDS, 123}, 1005 {&gfx10_GE, 315}, 1006 {&gfx10_GL1A, 36}, 1007 {&gfx10_GL1C, 64}, 1008 {&gfx10_GL2A, 91}, 1009 {&gfx10_GL2C, 235}, 1010 {&cik_GRBM, 47}, 1011 {&cik_GRBMSE, 19}, 1012 {&gfx10_PA_PH, 960}, 1013 {&cik_PA_SC, 552}, 1014 {&gfx10_PA_SU, 266}, 1015 {&gfx10_RLC, 7}, 1016 {&gfx10_RMI, 258}, 1017 {&cik_SPI, 329}, 1018 {&gfx10_SQ, 509}, 1019 {&cik_SX, 225}, 1020 {&cik_TA, 226}, 1021 {&gfx10_TCP, 77}, 1022 {&cik_TD, 61}, 1023 {&gfx10_UTCL1, 15}, 1024}; 1025 1026struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc, 1027 unsigned index, unsigned *base_gid, 1028 unsigned *sub_index) 1029{ 1030 struct ac_pc_block *block = pc->blocks; 1031 unsigned bid; 1032 1033 *base_gid = 0; 1034 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { 1035 unsigned total = block->num_groups * block->b->selectors; 1036 1037 if (index < total) { 1038 *sub_index = index; 1039 return block; 1040 } 1041 1042 index -= total; 1043 *base_gid += block->num_groups; 1044 } 1045 1046 return NULL; 1047} 1048 1049struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc, 1050 unsigned *index) 1051{ 1052 unsigned bid; 1053 struct ac_pc_block *block = pc->blocks; 1054 1055 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { 1056 if (*index < block->num_groups) 1057 return block; 1058 *index -= block->num_groups; 1059 } 1060 1061 return NULL; 1062} 1063 1064bool ac_init_block_names(const struct radeon_info *info, 1065 const struct ac_perfcounters *pc, 1066 struct ac_pc_block *block) 1067{ 1068 bool per_instance_groups = ac_pc_block_has_per_instance_groups(pc, block); 1069 bool per_se_groups = ac_pc_block_has_per_se_groups(pc, block); 1070 unsigned i, j, k; 1071 unsigned groups_shader = 1, groups_se = 1, groups_instance = 1; 1072 unsigned namelen; 1073 char *groupname; 1074 char *p; 1075 1076 if (per_instance_groups) 1077 groups_instance = block->num_instances; 1078 if (per_se_groups) 1079 groups_se = info->max_se; 1080 if (block->b->b->flags & AC_PC_BLOCK_SHADER) 1081 groups_shader = ARRAY_SIZE(ac_pc_shader_type_bits); 1082 1083 namelen = strlen(block->b->b->name); 1084 block->group_name_stride = namelen + 1; 1085 if (block->b->b->flags & AC_PC_BLOCK_SHADER) 1086 block->group_name_stride += 3; 1087 if (per_se_groups) { 1088 assert(groups_se <= 10); 1089 block->group_name_stride += 1; 1090 1091 if (per_instance_groups) 1092 block->group_name_stride += 1; 1093 } 1094 if (per_instance_groups) { 1095 assert(groups_instance <= 100); 1096 block->group_name_stride += 2; 1097 } 1098 1099 block->group_names = MALLOC(block->num_groups * block->group_name_stride); 1100 if (!block->group_names) 1101 return false; 1102 1103 groupname = block->group_names; 1104 for (i = 0; i < groups_shader; ++i) { 1105 const char *shader_suffix = ac_pc_shader_type_suffixes[i]; 1106 unsigned shaderlen = strlen(shader_suffix); 1107 for (j = 0; j < groups_se; ++j) { 1108 for (k = 0; k < groups_instance; ++k) { 1109 strcpy(groupname, block->b->b->name); 1110 p = groupname + namelen; 1111 1112 if (block->b->b->flags & AC_PC_BLOCK_SHADER) { 1113 strcpy(p, shader_suffix); 1114 p += shaderlen; 1115 } 1116 1117 if (per_se_groups) { 1118 p += sprintf(p, "%d", j); 1119 if (per_instance_groups) 1120 *p++ = '_'; 1121 } 1122 1123 if (per_instance_groups) 1124 p += sprintf(p, "%d", k); 1125 1126 groupname += block->group_name_stride; 1127 } 1128 } 1129 } 1130 1131 assert(block->b->selectors <= 1000); 1132 block->selector_name_stride = block->group_name_stride + 4; 1133 block->selector_names = 1134 MALLOC(block->num_groups * block->b->selectors * block->selector_name_stride); 1135 if (!block->selector_names) 1136 return false; 1137 1138 groupname = block->group_names; 1139 p = block->selector_names; 1140 for (i = 0; i < block->num_groups; ++i) { 1141 for (j = 0; j < block->b->selectors; ++j) { 1142 sprintf(p, "%s_%03d", groupname, j); 1143 p += block->selector_name_stride; 1144 } 1145 groupname += block->group_name_stride; 1146 } 1147 1148 return true; 1149} 1150 1151bool ac_init_perfcounters(const struct radeon_info *info, 1152 bool separate_se, 1153 bool separate_instance, 1154 struct ac_perfcounters *pc) 1155{ 1156 const struct ac_pc_block_gfxdescr *blocks; 1157 unsigned num_blocks; 1158 1159 switch (info->gfx_level) { 1160 case GFX7: 1161 blocks = groups_CIK; 1162 num_blocks = ARRAY_SIZE(groups_CIK); 1163 break; 1164 case GFX8: 1165 blocks = groups_VI; 1166 num_blocks = ARRAY_SIZE(groups_VI); 1167 break; 1168 case GFX9: 1169 blocks = groups_gfx9; 1170 num_blocks = ARRAY_SIZE(groups_gfx9); 1171 break; 1172 case GFX10: 1173 case GFX10_3: 1174 blocks = groups_gfx10; 1175 num_blocks = ARRAY_SIZE(groups_gfx10); 1176 break; 1177 case GFX6: 1178 default: 1179 return false; /* not implemented */ 1180 } 1181 1182 pc->separate_se = separate_se; 1183 pc->separate_instance = separate_instance; 1184 1185 pc->blocks = CALLOC(num_blocks, sizeof(struct ac_pc_block)); 1186 if (!pc->blocks) 1187 return false; 1188 pc->num_blocks = num_blocks; 1189 1190 for (unsigned i = 0; i < num_blocks; i++) { 1191 struct ac_pc_block *block = &pc->blocks[i]; 1192 1193 block->b = &blocks[i]; 1194 block->num_instances = MAX2(1, block->b->instances); 1195 1196 if (!strcmp(block->b->b->name, "CB") || 1197 !strcmp(block->b->b->name, "DB") || 1198 !strcmp(block->b->b->name, "RMI")) 1199 block->num_instances = info->max_se; 1200 else if (!strcmp(block->b->b->name, "TCC")) 1201 block->num_instances = info->max_tcc_blocks; 1202 else if (!strcmp(block->b->b->name, "IA")) 1203 block->num_instances = MAX2(1, info->max_se / 2); 1204 else if (!strcmp(block->b->b->name, "TA") || 1205 !strcmp(block->b->b->name, "TCP") || 1206 !strcmp(block->b->b->name, "TD")) { 1207 block->num_instances = MAX2(1, info->max_good_cu_per_sa); 1208 } 1209 1210 if (ac_pc_block_has_per_instance_groups(pc, block)) { 1211 block->num_groups = block->num_instances; 1212 } else { 1213 block->num_groups = 1; 1214 } 1215 1216 if (ac_pc_block_has_per_se_groups(pc, block)) 1217 block->num_groups *= info->max_se; 1218 if (block->b->b->flags & AC_PC_BLOCK_SHADER) 1219 block->num_groups *= ARRAY_SIZE(ac_pc_shader_type_bits); 1220 1221 pc->num_groups += block->num_groups; 1222 } 1223 1224 return true; 1225} 1226 1227void ac_destroy_perfcounters(struct ac_perfcounters *pc) 1228{ 1229 if (!pc) 1230 return; 1231 1232 for (unsigned i = 0; i < pc->num_blocks; ++i) { 1233 FREE(pc->blocks[i].group_names); 1234 FREE(pc->blocks[i].selector_names); 1235 } 1236 FREE(pc->blocks); 1237} 1238 1239struct ac_pc_block *ac_pc_get_block(const struct ac_perfcounters *pc, 1240 enum ac_pc_gpu_block gpu_block) 1241{ 1242 for (unsigned i = 0; i < pc->num_blocks; i++) { 1243 struct ac_pc_block *block = &pc->blocks[i]; 1244 if (block->b->b->gpu_block == gpu_block) { 1245 return block; 1246 } 1247 } 1248 return NULL; 1249} 1250