1 /* 2 * Copyright © 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25#ifndef INTEL_DEVICE_INFO_H 26#define INTEL_DEVICE_INFO_H 27 28#include <stdbool.h> 29#include <stdint.h> 30 31#include "util/macros.h" 32#include "compiler/shader_enums.h" 33 34#ifdef __cplusplus 35extern "C" { 36#endif 37 38struct drm_i915_query_topology_info; 39 40#define INTEL_DEVICE_MAX_NAME_SIZE 64 41#define INTEL_DEVICE_MAX_SLICES 8 42#define INTEL_DEVICE_MAX_SUBSLICES (8) /* Maximum on gfx11 */ 43#define INTEL_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */ 44#define INTEL_DEVICE_MAX_PIXEL_PIPES (16) /* Maximum on DG2 */ 45 46#define INTEL_PLATFORM_GROUP_START(group, new_enum) \ 47 new_enum, INTEL_PLATFORM_ ## group ## _START = new_enum 48#define INTEL_PLATFORM_GROUP_END(group, new_enum) \ 49 new_enum, INTEL_PLATFORM_ ## group ## _END = new_enum 50 51enum intel_platform { 52 INTEL_PLATFORM_GFX3 = 1, 53 INTEL_PLATFORM_I965, 54 INTEL_PLATFORM_ILK, 55 INTEL_PLATFORM_G4X, 56 INTEL_PLATFORM_SNB, 57 INTEL_PLATFORM_IVB, 58 INTEL_PLATFORM_BYT, 59 INTEL_PLATFORM_HSW, 60 INTEL_PLATFORM_BDW, 61 INTEL_PLATFORM_CHV, 62 INTEL_PLATFORM_SKL, 63 INTEL_PLATFORM_BXT, 64 INTEL_PLATFORM_KBL, 65 INTEL_PLATFORM_GLK, 66 INTEL_PLATFORM_CFL, 67 INTEL_PLATFORM_ICL, 68 INTEL_PLATFORM_EHL, 69 INTEL_PLATFORM_TGL, 70 INTEL_PLATFORM_RKL, 71 INTEL_PLATFORM_DG1, 72 INTEL_PLATFORM_ADL, 73 INTEL_PLATFORM_RPL, 74 INTEL_PLATFORM_GROUP_START(DG2, INTEL_PLATFORM_DG2_G10), 75 INTEL_PLATFORM_DG2_G11, 76 INTEL_PLATFORM_GROUP_END(DG2, INTEL_PLATFORM_DG2_G12), 77}; 78 79#undef INTEL_PLATFORM_GROUP_START 80#undef INTEL_PLATFORM_GROUP_END 81 82#define intel_platform_in_range(platform, platform_range) \ 83 (((platform) >= INTEL_PLATFORM_ ## platform_range ## _START) && \ 84 ((platform) <= INTEL_PLATFORM_ ## platform_range ## _END)) 85 86#define intel_device_info_is_dg2(devinfo) \ 87 intel_platform_in_range((devinfo)->platform, DG2) 88 89/** 90 * Intel hardware information and quirks 91 */ 92struct intel_device_info 93{ 94 /* Driver internal numbers used to differentiate platforms. */ 95 int ver; 96 int verx10; 97 int display_ver; 98 99 /** 100 * This revision is from ioctl (I915_PARAM_REVISION) unlike 101 * pci_revision_id from drm device. Its value is not always 102 * same as the pci_revision_id. 103 */ 104 int revision; 105 int gt; 106 107 /* PCI info */ 108 uint16_t pci_domain; 109 uint8_t pci_bus; 110 uint8_t pci_dev; 111 uint8_t pci_func; 112 uint16_t pci_device_id; 113 uint8_t pci_revision_id; 114 115 enum intel_platform platform; 116 117 bool has_hiz_and_separate_stencil; 118 bool must_use_separate_stencil; 119 bool has_sample_with_hiz; 120 bool has_bit6_swizzle; 121 bool has_llc; 122 123 bool has_pln; 124 bool has_64bit_float; 125 bool has_64bit_int; 126 bool has_integer_dword_mul; 127 bool has_compr4; 128 bool has_surface_tile_offset; 129 bool supports_simd16_3src; 130 bool disable_ccs_repack; 131 132 /** 133 * True if CCS uses a flat virtual address translation to a memory 134 * carve-out, rather than aux map translations, or additional surfaces. 135 */ 136 bool has_flat_ccs; 137 bool has_aux_map; 138 bool has_tiling_uapi; 139 bool has_ray_tracing; 140 bool has_ray_query; 141 bool has_local_mem; 142 bool has_lsc; 143 bool has_mesh_shading; 144 145 /** 146 * \name Intel hardware quirks 147 * @{ 148 */ 149 bool has_negative_rhw_bug; 150 151 /** 152 * Whether this platform supports fragment shading rate controlled by a 153 * primitive in geometry shaders and by a control buffer. 154 */ 155 bool has_coarse_pixel_primitive_and_cb; 156 157 /** 158 * Some versions of Gen hardware don't do centroid interpolation correctly 159 * on unlit pixels, causing incorrect values for derivatives near triangle 160 * edges. Enabling this flag causes the fragment shader to use 161 * non-centroid interpolation for unlit pixels, at the expense of two extra 162 * fragment shader instructions. 163 */ 164 bool needs_unlit_centroid_workaround; 165 /** @} */ 166 167 /** 168 * \name GPU hardware limits 169 * 170 * In general, you can find shader thread maximums by looking at the "Maximum 171 * Number of Threads" field in the Intel PRM description of the 3DSTATE_VS, 172 * 3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry 173 * limits come from the "Number of URB Entries" field in the 174 * 3DSTATE_URB_VS command and friends. 175 * 176 * These fields are used to calculate the scratch space to allocate. The 177 * amount of scratch space can be larger without being harmful on modern 178 * GPUs, however, prior to Haswell, programming the maximum number of threads 179 * to greater than the hardware maximum would cause GPU performance to tank. 180 * 181 * @{ 182 */ 183 /** 184 * Total number of slices present on the device whether or not they've been 185 * fused off. 186 * 187 * XXX: CS thread counts are limited by the inability to do cross subslice 188 * communication. It is the effectively the number of logical threads which 189 * can be executed in a subslice. Fuse configurations may cause this number 190 * to change, so we program @max_cs_threads as the lower maximum. 191 */ 192 unsigned num_slices; 193 194 /** 195 * Maximum number of slices present on this device (can be more than 196 * num_slices if some slices are fused). 197 */ 198 unsigned max_slices; 199 200 /** 201 * Number of subslices for each slice (used to be uniform until CNL). 202 */ 203 unsigned num_subslices[INTEL_DEVICE_MAX_SLICES]; 204 205 /** 206 * Maximum number of subslices per slice present on this device (can be 207 * more than the maximum value in the num_subslices[] array if some 208 * subslices are fused). 209 */ 210 unsigned max_subslices_per_slice; 211 212 /** 213 * Maximum number of subslices per slice present on this device (can be 214 * more than the maximum value in the num_subslices[] array if some 215 * subslices are fused). 216 */ 217 unsigned max_subslices_per_slice; 218 219 /** 220 * Number of subslices on each pixel pipe (ICL). 221 */ 222 unsigned ppipe_subslices[INTEL_DEVICE_MAX_PIXEL_PIPES]; 223 224 /** 225 * Maximum number of EUs per subslice (some EUs can be fused off). 226 */ 227 unsigned max_eus_per_subslice; 228 229 /** 230 * Maximum number of EUs per subslice (can be more than num_eu_per_subslice 231 * if some EUs are fused off). 232 */ 233 unsigned max_eu_per_subslice; 234 235 /** 236 * Number of threads per eu, varies between 4 and 8 between generations. 237 */ 238 unsigned num_thread_per_eu; 239 240 /** 241 * A bit mask of the slices available. 242 */ 243 uint8_t slice_masks; 244 245 /** 246 * An array of bit mask of the subslices available, use subslice_slice_stride 247 * to access this array. 248 */ 249 uint8_t subslice_masks[INTEL_DEVICE_MAX_SLICES * 250 DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)]; 251 252 /** 253 * The number of enabled subslices (considering fusing). For exactly which 254 * subslices are enabled, see subslice_masks[]. 255 */ 256 unsigned subslice_total; 257 258 /** 259 * An array of bit mask of EUs available, use eu_slice_stride & 260 * eu_subslice_stride to access this array. 261 */ 262 uint8_t eu_masks[INTEL_DEVICE_MAX_SLICES * 263 INTEL_DEVICE_MAX_SUBSLICES * 264 DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)]; 265 266 /** 267 * Stride to access subslice_masks[]. 268 */ 269 uint16_t subslice_slice_stride; 270 271 /** 272 * Strides to access eu_masks[]. 273 */ 274 uint16_t eu_slice_stride; 275 uint16_t eu_subslice_stride; 276 277 unsigned l3_banks; 278 unsigned max_vs_threads; /**< Maximum Vertex Shader threads */ 279 unsigned max_tcs_threads; /**< Maximum Hull Shader threads */ 280 unsigned max_tes_threads; /**< Maximum Domain Shader threads */ 281 unsigned max_gs_threads; /**< Maximum Geometry Shader threads. */ 282 /** 283 * Theoretical maximum number of Pixel Shader threads. 284 * 285 * PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will 286 * automatically scale pixel shader thread count, based on a single value 287 * programmed into 3DSTATE_PS. 288 * 289 * To calculate the maximum number of threads for Gfx8 beyond (which have 290 * multiple Pixel Shader Dispatchers): 291 * 292 * - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD" 293 * - Usually there's only one PSD per subslice, so use the number of 294 * subslices for number of PSDs. 295 * - For max_wm_threads, the total should be PSD threads * #PSDs. 296 */ 297 unsigned max_wm_threads; 298 299 unsigned max_threads_per_psd; 300 301 /** 302 * Maximum Compute Shader threads. 303 * 304 * Thread count * number of EUs per subslice 305 */ 306 unsigned max_cs_threads; 307 308 /** 309 * Maximum number of threads per workgroup supported by the GPGPU_WALKER or 310 * COMPUTE_WALKER command. 311 * 312 * This may be smaller than max_cs_threads as it takes into account added 313 * restrictions on the GPGPU/COMPUTE_WALKER commands. While max_cs_threads 314 * expresses the total parallelism of the GPU, this expresses the maximum 315 * number of threads we can dispatch in a single workgroup. 316 */ 317 unsigned max_cs_workgroup_threads; 318 319 /** 320 * The maximum number of potential scratch ids. Due to hardware 321 * implementation details, the range of scratch ids may be larger than the 322 * number of subslices. 323 */ 324 unsigned max_scratch_ids[MESA_SHADER_STAGES]; 325 326 struct { 327 /** 328 * Fixed size of the URB. 329 * 330 * On Gfx6 and DG1, this is measured in KB. Gfx4-5 instead measure 331 * this in 512b blocks, as that's more convenient there. 332 * 333 * On most Gfx7+ platforms, the URB is a section of the L3 cache, 334 * and can be resized based on the L3 programming. For those platforms, 335 * simply leave this field blank (zero) - it isn't used. 336 */ 337 unsigned size; 338 339 /** 340 * The minimum number of URB entries. See the 3DSTATE_URB_<XS> docs. 341 */ 342 unsigned min_entries[4]; 343 344 /** 345 * The maximum number of URB entries. See the 3DSTATE_URB_<XS> docs. 346 */ 347 unsigned max_entries[4]; 348 } urb; 349 350 /* Maximum size in Kb that can be allocated to constants in the URB, this 351 * is usually divided among the stages for implementing push constants. 352 * See 3DSTATE_PUSH_CONSTANT_ALLOC_*. 353 */ 354 unsigned max_constant_urb_size_kb; 355 356 /** 357 * Size of the command streamer prefetch. This is important to know for 358 * self modifying batches. 359 */ 360 unsigned cs_prefetch_size; 361 362 /** 363 * For the longest time the timestamp frequency for Gen's timestamp counter 364 * could be assumed to be 12.5MHz, where the least significant bit neatly 365 * corresponded to 80 nanoseconds. 366 * 367 * Since Gfx9 the numbers aren't so round, with a a frequency of 12MHz for 368 * SKL (or scale factor of 83.33333333) and a frequency of 19200000Hz for 369 * BXT. 370 * 371 * For simplicity to fit with the current code scaling by a single constant 372 * to map from raw timestamps to nanoseconds we now do the conversion in 373 * floating point instead of integer arithmetic. 374 * 375 * In general it's probably worth noting that the documented constants we 376 * have for the per-platform timestamp frequencies aren't perfect and 377 * shouldn't be trusted for scaling and comparing timestamps with a large 378 * delta. 379 * 380 * E.g. with crude testing on my system using the 'correct' scale factor I'm 381 * seeing a drift of ~2 milliseconds per second. 382 */ 383 uint64_t timestamp_frequency; 384 385 uint64_t aperture_bytes; 386 uint64_t gtt_size; 387 388 /** 389 * ID to put into the .aub files. 390 */ 391 int simulator_id; 392 393 /** 394 * holds the name of the device 395 */ 396 char name[INTEL_DEVICE_MAX_NAME_SIZE]; 397 398 /** 399 * no_hw is true when the pci_device_id has been overridden 400 */ 401 bool no_hw; 402 403 /** 404 * apply_hwconfig is true when the platform should apply hwconfig values 405 */ 406 bool apply_hwconfig; 407 408 struct { 409 bool use_class_instance; 410 struct { 411 uint16_t mem_class; 412 uint16_t mem_instance; 413 struct { 414 uint64_t size; 415 uint64_t free; 416 } mappable, unmappable; 417 } sram, vram; 418 } mem; 419 /** @} */ 420}; 421 422#ifdef GFX_VER 423 424#define intel_device_info_is_9lp(devinfo) \ 425 (GFX_VER == 9 && ((devinfo)->platform == INTEL_PLATFORM_BXT || \ 426 (devinfo)->platform == INTEL_PLATFORM_GLK)) 427 428#else 429 430#define intel_device_info_is_9lp(devinfo) \ 431 ((devinfo)->platform == INTEL_PLATFORM_BXT || \ 432 (devinfo)->platform == INTEL_PLATFORM_GLK) 433 434#endif 435 436static inline bool 437intel_device_info_slice_available(const struct intel_device_info *devinfo, 438 int slice) 439{ 440 assert(slice < INTEL_DEVICE_MAX_SLICES); 441 return (devinfo->slice_masks & (1U << slice)) != 0; 442} 443 444static inline bool 445intel_device_info_subslice_available(const struct intel_device_info *devinfo, 446 int slice, int subslice) 447{ 448 return (devinfo->subslice_masks[slice * devinfo->subslice_slice_stride + 449 subslice / 8] & (1U << (subslice % 8))) != 0; 450} 451 452static inline bool 453intel_device_info_eu_available(const struct intel_device_info *devinfo, 454 int slice, int subslice, int eu) 455{ 456 unsigned subslice_offset = slice * devinfo->eu_slice_stride + 457 subslice * devinfo->eu_subslice_stride; 458 459 return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0; 460} 461 462static inline uint32_t 463intel_device_info_subslice_total(const struct intel_device_info *devinfo) 464{ 465 uint32_t total = 0; 466 467 for (size_t i = 0; i < ARRAY_SIZE(devinfo->subslice_masks); i++) { 468 total += __builtin_popcount(devinfo->subslice_masks[i]); 469 } 470 471 return total; 472} 473 474static inline uint32_t 475intel_device_info_eu_total(const struct intel_device_info *devinfo) 476{ 477 uint32_t total = 0; 478 479 for (size_t i = 0; i < ARRAY_SIZE(devinfo->eu_masks); i++) 480 total += __builtin_popcount(devinfo->eu_masks[i]); 481 482 return total; 483} 484 485static inline unsigned 486intel_device_info_num_dual_subslices(UNUSED 487 const struct intel_device_info *devinfo) 488{ 489 unreachable("TODO"); 490} 491 492int intel_device_name_to_pci_device_id(const char *name); 493 494static inline uint64_t 495intel_device_info_timebase_scale(const struct intel_device_info *devinfo, 496 uint64_t gpu_timestamp) 497{ 498 /* Try to avoid going over the 64bits when doing the scaling */ 499 uint64_t upper_ts = gpu_timestamp >> 32; 500 uint64_t lower_ts = gpu_timestamp & 0xffffffff; 501 uint64_t upper_scaled_ts = upper_ts * 1000000000ull / devinfo->timestamp_frequency; 502 uint64_t lower_scaled_ts = lower_ts * 1000000000ull / devinfo->timestamp_frequency; 503 return (upper_scaled_ts << 32) + lower_scaled_ts; 504} 505 506static inline bool 507intel_vram_all_mappable(const struct intel_device_info *devinfo) 508{ 509 return devinfo->mem.vram.unmappable.size == 0; 510} 511 512bool intel_get_device_info_from_fd(int fh, struct intel_device_info *devinfo); 513bool intel_get_device_info_from_pci_id(int pci_id, 514 struct intel_device_info *devinfo); 515 516/* Only updates intel_device_info::regions::...::free fields. The 517 * class/instance/size should remain the same over time. 518 */ 519bool intel_device_info_update_memory_info(struct intel_device_info *devinfo, 520 int fd); 521 522#ifdef __cplusplus 523} 524#endif 525 526#endif /* INTEL_DEVICE_INFO_H */ 527