1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2013 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include <assert.h> 25bf215546Sopenharmony_ci#include <stdbool.h> 26bf215546Sopenharmony_ci#include <stdio.h> 27bf215546Sopenharmony_ci#include <stdlib.h> 28bf215546Sopenharmony_ci#include <string.h> 29bf215546Sopenharmony_ci#include <unistd.h> 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include <xf86drm.h> 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci#include "intel_device_info.h" 34bf215546Sopenharmony_ci#include "intel_hwconfig.h" 35bf215546Sopenharmony_ci#include "intel/common/intel_gem.h" 36bf215546Sopenharmony_ci#include "util/bitscan.h" 37bf215546Sopenharmony_ci#include "util/debug.h" 38bf215546Sopenharmony_ci#include "util/log.h" 39bf215546Sopenharmony_ci#include "util/macros.h" 40bf215546Sopenharmony_ci#include "util/os_misc.h" 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci#include "drm-uapi/i915_drm.h" 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_cistatic const struct { 45bf215546Sopenharmony_ci const char *name; 46bf215546Sopenharmony_ci int pci_id; 47bf215546Sopenharmony_ci} name_map[] = { 48bf215546Sopenharmony_ci { "lpt", 0x27a2 }, 49bf215546Sopenharmony_ci { "brw", 0x2a02 }, 50bf215546Sopenharmony_ci { "g4x", 0x2a42 }, 51bf215546Sopenharmony_ci { "ilk", 0x0042 }, 52bf215546Sopenharmony_ci { "snb", 0x0126 }, 53bf215546Sopenharmony_ci { "ivb", 0x016a }, 54bf215546Sopenharmony_ci { "hsw", 0x0d2e }, 55bf215546Sopenharmony_ci { "byt", 0x0f33 }, 56bf215546Sopenharmony_ci { "bdw", 0x162e }, 57bf215546Sopenharmony_ci { "chv", 0x22B3 }, 58bf215546Sopenharmony_ci { "skl", 0x1912 }, 59bf215546Sopenharmony_ci { "bxt", 0x5A85 }, 60bf215546Sopenharmony_ci { "kbl", 0x5912 }, 61bf215546Sopenharmony_ci { "aml", 0x591C }, 62bf215546Sopenharmony_ci { "glk", 0x3185 }, 63bf215546Sopenharmony_ci { "cfl", 0x3E9B }, 64bf215546Sopenharmony_ci { "whl", 0x3EA1 }, 65bf215546Sopenharmony_ci { "cml", 0x9b41 }, 66bf215546Sopenharmony_ci { "icl", 0x8a52 }, 67bf215546Sopenharmony_ci { "ehl", 0x4500 }, 68bf215546Sopenharmony_ci { "jsl", 0x4E71 }, 69bf215546Sopenharmony_ci { "tgl", 0x9a49 }, 70bf215546Sopenharmony_ci { "rkl", 0x4c8a }, 71bf215546Sopenharmony_ci { "dg1", 0x4905 }, 72bf215546Sopenharmony_ci { "adl", 0x4680 }, 73bf215546Sopenharmony_ci { "sg1", 0x4907 }, 74bf215546Sopenharmony_ci { "rpl", 0xa780 }, 75bf215546Sopenharmony_ci { "dg2", 0x5690 }, 76bf215546Sopenharmony_ci}; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci/** 79bf215546Sopenharmony_ci * Get the PCI ID for the device name. 80bf215546Sopenharmony_ci * 81bf215546Sopenharmony_ci * Returns -1 if the device is not known. 82bf215546Sopenharmony_ci */ 83bf215546Sopenharmony_ciint 84bf215546Sopenharmony_ciintel_device_name_to_pci_device_id(const char *name) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) { 87bf215546Sopenharmony_ci if (!strcmp(name_map[i].name, name)) 88bf215546Sopenharmony_ci return name_map[i].pci_id; 89bf215546Sopenharmony_ci } 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci return -1; 92bf215546Sopenharmony_ci} 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_gfx3 = { 95bf215546Sopenharmony_ci .ver = 3, 96bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_GFX3, 97bf215546Sopenharmony_ci .simulator_id = -1, 98bf215546Sopenharmony_ci .num_slices = 1, 99bf215546Sopenharmony_ci .num_subslices = { 1, }, 100bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 101bf215546Sopenharmony_ci .num_thread_per_eu = 4, 102bf215546Sopenharmony_ci .timestamp_frequency = 12500000, 103bf215546Sopenharmony_ci .cs_prefetch_size = 512, 104bf215546Sopenharmony_ci}; 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_i965 = { 107bf215546Sopenharmony_ci .ver = 4, 108bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_I965, 109bf215546Sopenharmony_ci .has_negative_rhw_bug = true, 110bf215546Sopenharmony_ci .num_slices = 1, 111bf215546Sopenharmony_ci .num_subslices = { 1, }, 112bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 113bf215546Sopenharmony_ci .num_thread_per_eu = 4, 114bf215546Sopenharmony_ci .max_vs_threads = 16, 115bf215546Sopenharmony_ci .max_gs_threads = 2, 116bf215546Sopenharmony_ci .max_wm_threads = 8 * 4, 117bf215546Sopenharmony_ci .urb = { 118bf215546Sopenharmony_ci .size = 256, 119bf215546Sopenharmony_ci }, 120bf215546Sopenharmony_ci .timestamp_frequency = 12500000, 121bf215546Sopenharmony_ci .simulator_id = -1, 122bf215546Sopenharmony_ci .cs_prefetch_size = 512, 123bf215546Sopenharmony_ci}; 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_g4x = { 126bf215546Sopenharmony_ci .ver = 4, 127bf215546Sopenharmony_ci .verx10 = 45, 128bf215546Sopenharmony_ci .has_pln = true, 129bf215546Sopenharmony_ci .has_compr4 = true, 130bf215546Sopenharmony_ci .has_surface_tile_offset = true, 131bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_G4X, 132bf215546Sopenharmony_ci .num_slices = 1, 133bf215546Sopenharmony_ci .num_subslices = { 1, }, 134bf215546Sopenharmony_ci .max_eus_per_subslice = 10, 135bf215546Sopenharmony_ci .num_thread_per_eu = 5, 136bf215546Sopenharmony_ci .max_vs_threads = 32, 137bf215546Sopenharmony_ci .max_gs_threads = 2, 138bf215546Sopenharmony_ci .max_wm_threads = 10 * 5, 139bf215546Sopenharmony_ci .urb = { 140bf215546Sopenharmony_ci .size = 384, 141bf215546Sopenharmony_ci }, 142bf215546Sopenharmony_ci .timestamp_frequency = 12500000, 143bf215546Sopenharmony_ci .simulator_id = -1, 144bf215546Sopenharmony_ci .cs_prefetch_size = 512, 145bf215546Sopenharmony_ci}; 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_ilk = { 148bf215546Sopenharmony_ci .ver = 5, 149bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_ILK, 150bf215546Sopenharmony_ci .has_pln = true, 151bf215546Sopenharmony_ci .has_compr4 = true, 152bf215546Sopenharmony_ci .has_surface_tile_offset = true, 153bf215546Sopenharmony_ci .num_slices = 1, 154bf215546Sopenharmony_ci .num_subslices = { 1, }, 155bf215546Sopenharmony_ci .max_eus_per_subslice = 12, 156bf215546Sopenharmony_ci .num_thread_per_eu = 6, 157bf215546Sopenharmony_ci .max_vs_threads = 72, 158bf215546Sopenharmony_ci .max_gs_threads = 32, 159bf215546Sopenharmony_ci .max_wm_threads = 12 * 6, 160bf215546Sopenharmony_ci .urb = { 161bf215546Sopenharmony_ci .size = 1024, 162bf215546Sopenharmony_ci }, 163bf215546Sopenharmony_ci .timestamp_frequency = 12500000, 164bf215546Sopenharmony_ci .simulator_id = -1, 165bf215546Sopenharmony_ci .cs_prefetch_size = 512, 166bf215546Sopenharmony_ci}; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_snb_gt1 = { 169bf215546Sopenharmony_ci .ver = 6, 170bf215546Sopenharmony_ci .gt = 1, 171bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_SNB, 172bf215546Sopenharmony_ci .has_hiz_and_separate_stencil = true, 173bf215546Sopenharmony_ci .has_llc = true, 174bf215546Sopenharmony_ci .has_pln = true, 175bf215546Sopenharmony_ci .has_surface_tile_offset = true, 176bf215546Sopenharmony_ci .needs_unlit_centroid_workaround = true, 177bf215546Sopenharmony_ci .num_slices = 1, 178bf215546Sopenharmony_ci .num_subslices = { 1, }, 179bf215546Sopenharmony_ci .max_eus_per_subslice = 6, 180bf215546Sopenharmony_ci .num_thread_per_eu = 6, /* Not confirmed */ 181bf215546Sopenharmony_ci .max_vs_threads = 24, 182bf215546Sopenharmony_ci .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */ 183bf215546Sopenharmony_ci .max_wm_threads = 40, 184bf215546Sopenharmony_ci .urb = { 185bf215546Sopenharmony_ci .size = 32, 186bf215546Sopenharmony_ci .min_entries = { 187bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 24, 188bf215546Sopenharmony_ci }, 189bf215546Sopenharmony_ci .max_entries = { 190bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 256, 191bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 256, 192bf215546Sopenharmony_ci }, 193bf215546Sopenharmony_ci }, 194bf215546Sopenharmony_ci .timestamp_frequency = 12500000, 195bf215546Sopenharmony_ci .simulator_id = -1, 196bf215546Sopenharmony_ci .cs_prefetch_size = 512, 197bf215546Sopenharmony_ci}; 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_snb_gt2 = { 200bf215546Sopenharmony_ci .ver = 6, 201bf215546Sopenharmony_ci .gt = 2, 202bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_SNB, 203bf215546Sopenharmony_ci .has_hiz_and_separate_stencil = true, 204bf215546Sopenharmony_ci .has_llc = true, 205bf215546Sopenharmony_ci .has_pln = true, 206bf215546Sopenharmony_ci .has_surface_tile_offset = true, 207bf215546Sopenharmony_ci .needs_unlit_centroid_workaround = true, 208bf215546Sopenharmony_ci .num_slices = 1, 209bf215546Sopenharmony_ci .num_subslices = { 1, }, 210bf215546Sopenharmony_ci .max_eus_per_subslice = 12, 211bf215546Sopenharmony_ci .num_thread_per_eu = 6, /* Not confirmed */ 212bf215546Sopenharmony_ci .max_vs_threads = 60, 213bf215546Sopenharmony_ci .max_gs_threads = 60, 214bf215546Sopenharmony_ci .max_wm_threads = 80, 215bf215546Sopenharmony_ci .urb = { 216bf215546Sopenharmony_ci .size = 64, 217bf215546Sopenharmony_ci .min_entries = { 218bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 24, 219bf215546Sopenharmony_ci }, 220bf215546Sopenharmony_ci .max_entries = { 221bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 256, 222bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 256, 223bf215546Sopenharmony_ci }, 224bf215546Sopenharmony_ci }, 225bf215546Sopenharmony_ci .timestamp_frequency = 12500000, 226bf215546Sopenharmony_ci .simulator_id = -1, 227bf215546Sopenharmony_ci .cs_prefetch_size = 512, 228bf215546Sopenharmony_ci}; 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci#define GFX7_FEATURES \ 231bf215546Sopenharmony_ci .ver = 7, \ 232bf215546Sopenharmony_ci .has_hiz_and_separate_stencil = true, \ 233bf215546Sopenharmony_ci .must_use_separate_stencil = true, \ 234bf215546Sopenharmony_ci .has_llc = true, \ 235bf215546Sopenharmony_ci .has_pln = true, \ 236bf215546Sopenharmony_ci .has_64bit_float = true, \ 237bf215546Sopenharmony_ci .has_surface_tile_offset = true, \ 238bf215546Sopenharmony_ci .timestamp_frequency = 12500000, \ 239bf215546Sopenharmony_ci .max_constant_urb_size_kb = 16, \ 240bf215546Sopenharmony_ci .cs_prefetch_size = 512 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_ivb_gt1 = { 243bf215546Sopenharmony_ci GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 1, 244bf215546Sopenharmony_ci .num_slices = 1, 245bf215546Sopenharmony_ci .num_subslices = { 1, }, 246bf215546Sopenharmony_ci .max_eus_per_subslice = 6, 247bf215546Sopenharmony_ci .num_thread_per_eu = 6, 248bf215546Sopenharmony_ci .l3_banks = 2, 249bf215546Sopenharmony_ci .max_vs_threads = 36, 250bf215546Sopenharmony_ci .max_tcs_threads = 36, 251bf215546Sopenharmony_ci .max_tes_threads = 36, 252bf215546Sopenharmony_ci .max_gs_threads = 36, 253bf215546Sopenharmony_ci .max_wm_threads = 48, 254bf215546Sopenharmony_ci .max_cs_threads = 36, 255bf215546Sopenharmony_ci .urb = { 256bf215546Sopenharmony_ci .min_entries = { 257bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 32, 258bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 10, 259bf215546Sopenharmony_ci }, 260bf215546Sopenharmony_ci .max_entries = { 261bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 512, 262bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 32, 263bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 288, 264bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 192, 265bf215546Sopenharmony_ci }, 266bf215546Sopenharmony_ci }, 267bf215546Sopenharmony_ci .simulator_id = 7, 268bf215546Sopenharmony_ci}; 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_ivb_gt2 = { 271bf215546Sopenharmony_ci GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 2, 272bf215546Sopenharmony_ci .num_slices = 1, 273bf215546Sopenharmony_ci .num_subslices = { 1, }, 274bf215546Sopenharmony_ci .max_eus_per_subslice = 12, 275bf215546Sopenharmony_ci .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of 276bf215546Sopenharmony_ci * @max_wm_threads ... */ 277bf215546Sopenharmony_ci .l3_banks = 4, 278bf215546Sopenharmony_ci .max_vs_threads = 128, 279bf215546Sopenharmony_ci .max_tcs_threads = 128, 280bf215546Sopenharmony_ci .max_tes_threads = 128, 281bf215546Sopenharmony_ci .max_gs_threads = 128, 282bf215546Sopenharmony_ci .max_wm_threads = 172, 283bf215546Sopenharmony_ci .max_cs_threads = 64, 284bf215546Sopenharmony_ci .urb = { 285bf215546Sopenharmony_ci .min_entries = { 286bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 32, 287bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 10, 288bf215546Sopenharmony_ci }, 289bf215546Sopenharmony_ci .max_entries = { 290bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 704, 291bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 64, 292bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 448, 293bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 320, 294bf215546Sopenharmony_ci }, 295bf215546Sopenharmony_ci }, 296bf215546Sopenharmony_ci .simulator_id = 7, 297bf215546Sopenharmony_ci}; 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_byt = { 300bf215546Sopenharmony_ci GFX7_FEATURES, .platform = INTEL_PLATFORM_BYT, .gt = 1, 301bf215546Sopenharmony_ci .num_slices = 1, 302bf215546Sopenharmony_ci .num_subslices = { 1, }, 303bf215546Sopenharmony_ci .max_eus_per_subslice = 4, 304bf215546Sopenharmony_ci .num_thread_per_eu = 8, 305bf215546Sopenharmony_ci .l3_banks = 1, 306bf215546Sopenharmony_ci .has_llc = false, 307bf215546Sopenharmony_ci .max_vs_threads = 36, 308bf215546Sopenharmony_ci .max_tcs_threads = 36, 309bf215546Sopenharmony_ci .max_tes_threads = 36, 310bf215546Sopenharmony_ci .max_gs_threads = 36, 311bf215546Sopenharmony_ci .max_wm_threads = 48, 312bf215546Sopenharmony_ci .max_cs_threads = 32, 313bf215546Sopenharmony_ci .urb = { 314bf215546Sopenharmony_ci .min_entries = { 315bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 32, 316bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 10, 317bf215546Sopenharmony_ci }, 318bf215546Sopenharmony_ci .max_entries = { 319bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 512, 320bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 32, 321bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 288, 322bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 192, 323bf215546Sopenharmony_ci }, 324bf215546Sopenharmony_ci }, 325bf215546Sopenharmony_ci .simulator_id = 10, 326bf215546Sopenharmony_ci}; 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci#define HSW_FEATURES \ 329bf215546Sopenharmony_ci GFX7_FEATURES, \ 330bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_HSW, \ 331bf215546Sopenharmony_ci .verx10 = 75, \ 332bf215546Sopenharmony_ci .supports_simd16_3src = true 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_hsw_gt1 = { 335bf215546Sopenharmony_ci HSW_FEATURES, .gt = 1, 336bf215546Sopenharmony_ci .num_slices = 1, 337bf215546Sopenharmony_ci .num_subslices = { 1, }, 338bf215546Sopenharmony_ci .max_eus_per_subslice = 10, 339bf215546Sopenharmony_ci .num_thread_per_eu = 7, 340bf215546Sopenharmony_ci .l3_banks = 2, 341bf215546Sopenharmony_ci .max_vs_threads = 70, 342bf215546Sopenharmony_ci .max_tcs_threads = 70, 343bf215546Sopenharmony_ci .max_tes_threads = 70, 344bf215546Sopenharmony_ci .max_gs_threads = 70, 345bf215546Sopenharmony_ci .max_wm_threads = 102, 346bf215546Sopenharmony_ci .max_cs_threads = 70, 347bf215546Sopenharmony_ci .urb = { 348bf215546Sopenharmony_ci .min_entries = { 349bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 32, 350bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 10, 351bf215546Sopenharmony_ci }, 352bf215546Sopenharmony_ci .max_entries = { 353bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 640, 354bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 64, 355bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 384, 356bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 256, 357bf215546Sopenharmony_ci }, 358bf215546Sopenharmony_ci }, 359bf215546Sopenharmony_ci .simulator_id = 9, 360bf215546Sopenharmony_ci}; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_hsw_gt2 = { 363bf215546Sopenharmony_ci HSW_FEATURES, .gt = 2, 364bf215546Sopenharmony_ci .num_slices = 1, 365bf215546Sopenharmony_ci .num_subslices = { 2, }, 366bf215546Sopenharmony_ci .max_eus_per_subslice = 10, 367bf215546Sopenharmony_ci .num_thread_per_eu = 7, 368bf215546Sopenharmony_ci .l3_banks = 4, 369bf215546Sopenharmony_ci .max_vs_threads = 280, 370bf215546Sopenharmony_ci .max_tcs_threads = 256, 371bf215546Sopenharmony_ci .max_tes_threads = 280, 372bf215546Sopenharmony_ci .max_gs_threads = 256, 373bf215546Sopenharmony_ci .max_wm_threads = 204, 374bf215546Sopenharmony_ci .max_cs_threads = 70, 375bf215546Sopenharmony_ci .urb = { 376bf215546Sopenharmony_ci .min_entries = { 377bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 64, 378bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 10, 379bf215546Sopenharmony_ci }, 380bf215546Sopenharmony_ci .max_entries = { 381bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 1664, 382bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 128, 383bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 960, 384bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 640, 385bf215546Sopenharmony_ci }, 386bf215546Sopenharmony_ci }, 387bf215546Sopenharmony_ci .simulator_id = 9, 388bf215546Sopenharmony_ci}; 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_hsw_gt3 = { 391bf215546Sopenharmony_ci HSW_FEATURES, .gt = 3, 392bf215546Sopenharmony_ci .num_slices = 2, 393bf215546Sopenharmony_ci .num_subslices = { 2, 2, }, 394bf215546Sopenharmony_ci .max_eus_per_subslice = 10, 395bf215546Sopenharmony_ci .num_thread_per_eu = 7, 396bf215546Sopenharmony_ci .l3_banks = 8, 397bf215546Sopenharmony_ci .max_vs_threads = 280, 398bf215546Sopenharmony_ci .max_tcs_threads = 256, 399bf215546Sopenharmony_ci .max_tes_threads = 280, 400bf215546Sopenharmony_ci .max_gs_threads = 256, 401bf215546Sopenharmony_ci .max_wm_threads = 408, 402bf215546Sopenharmony_ci .max_cs_threads = 70, 403bf215546Sopenharmony_ci .urb = { 404bf215546Sopenharmony_ci .min_entries = { 405bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 64, 406bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 10, 407bf215546Sopenharmony_ci }, 408bf215546Sopenharmony_ci .max_entries = { 409bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 1664, 410bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 128, 411bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 960, 412bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 640, 413bf215546Sopenharmony_ci }, 414bf215546Sopenharmony_ci }, 415bf215546Sopenharmony_ci .max_constant_urb_size_kb = 32, 416bf215546Sopenharmony_ci .simulator_id = 9, 417bf215546Sopenharmony_ci}; 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci/* It's unclear how well supported sampling from the hiz buffer is on GFX8, 420bf215546Sopenharmony_ci * so keep things conservative for now and set has_sample_with_hiz = false. 421bf215546Sopenharmony_ci */ 422bf215546Sopenharmony_ci#define GFX8_FEATURES \ 423bf215546Sopenharmony_ci .ver = 8, \ 424bf215546Sopenharmony_ci .has_hiz_and_separate_stencil = true, \ 425bf215546Sopenharmony_ci .must_use_separate_stencil = true, \ 426bf215546Sopenharmony_ci .has_llc = true, \ 427bf215546Sopenharmony_ci .has_sample_with_hiz = false, \ 428bf215546Sopenharmony_ci .has_pln = true, \ 429bf215546Sopenharmony_ci .has_integer_dword_mul = true, \ 430bf215546Sopenharmony_ci .has_64bit_float = true, \ 431bf215546Sopenharmony_ci .has_64bit_int = true, \ 432bf215546Sopenharmony_ci .supports_simd16_3src = true, \ 433bf215546Sopenharmony_ci .has_surface_tile_offset = true, \ 434bf215546Sopenharmony_ci .num_thread_per_eu = 7, \ 435bf215546Sopenharmony_ci .max_vs_threads = 504, \ 436bf215546Sopenharmony_ci .max_tcs_threads = 504, \ 437bf215546Sopenharmony_ci .max_tes_threads = 504, \ 438bf215546Sopenharmony_ci .max_gs_threads = 504, \ 439bf215546Sopenharmony_ci .max_wm_threads = 384, \ 440bf215546Sopenharmony_ci .max_threads_per_psd = 64, \ 441bf215546Sopenharmony_ci .timestamp_frequency = 12500000, \ 442bf215546Sopenharmony_ci .max_constant_urb_size_kb = 32, \ 443bf215546Sopenharmony_ci .cs_prefetch_size = 512 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_bdw_gt1 = { 446bf215546Sopenharmony_ci GFX8_FEATURES, .gt = 1, 447bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_BDW, 448bf215546Sopenharmony_ci .num_slices = 1, 449bf215546Sopenharmony_ci .num_subslices = { 2, }, 450bf215546Sopenharmony_ci .max_eus_per_subslice = 6, 451bf215546Sopenharmony_ci .l3_banks = 2, 452bf215546Sopenharmony_ci .max_cs_threads = 42, 453bf215546Sopenharmony_ci .urb = { 454bf215546Sopenharmony_ci .min_entries = { 455bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 64, 456bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 34, 457bf215546Sopenharmony_ci }, 458bf215546Sopenharmony_ci .max_entries = { 459bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 2560, 460bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 504, 461bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 1536, 462bf215546Sopenharmony_ci /* Reduced from 960, seems to be similar to the bug on Gfx9 GT1. */ 463bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 690, 464bf215546Sopenharmony_ci }, 465bf215546Sopenharmony_ci }, 466bf215546Sopenharmony_ci .simulator_id = 11, 467bf215546Sopenharmony_ci}; 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_bdw_gt2 = { 470bf215546Sopenharmony_ci GFX8_FEATURES, .gt = 2, 471bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_BDW, 472bf215546Sopenharmony_ci .num_slices = 1, 473bf215546Sopenharmony_ci .num_subslices = { 3, }, 474bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 475bf215546Sopenharmony_ci .l3_banks = 4, 476bf215546Sopenharmony_ci .max_cs_threads = 56, 477bf215546Sopenharmony_ci .urb = { 478bf215546Sopenharmony_ci .min_entries = { 479bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 64, 480bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 34, 481bf215546Sopenharmony_ci }, 482bf215546Sopenharmony_ci .max_entries = { 483bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 2560, 484bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 504, 485bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 1536, 486bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 960, 487bf215546Sopenharmony_ci }, 488bf215546Sopenharmony_ci }, 489bf215546Sopenharmony_ci .simulator_id = 11, 490bf215546Sopenharmony_ci}; 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_bdw_gt3 = { 493bf215546Sopenharmony_ci GFX8_FEATURES, .gt = 3, 494bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_BDW, 495bf215546Sopenharmony_ci .num_slices = 2, 496bf215546Sopenharmony_ci .num_subslices = { 3, 3, }, 497bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 498bf215546Sopenharmony_ci .l3_banks = 8, 499bf215546Sopenharmony_ci .max_cs_threads = 56, 500bf215546Sopenharmony_ci .urb = { 501bf215546Sopenharmony_ci .min_entries = { 502bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 64, 503bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 34, 504bf215546Sopenharmony_ci }, 505bf215546Sopenharmony_ci .max_entries = { 506bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 2560, 507bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 504, 508bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 1536, 509bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 960, 510bf215546Sopenharmony_ci }, 511bf215546Sopenharmony_ci }, 512bf215546Sopenharmony_ci .simulator_id = 11, 513bf215546Sopenharmony_ci}; 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_chv = { 516bf215546Sopenharmony_ci GFX8_FEATURES, .platform = INTEL_PLATFORM_CHV, .gt = 1, 517bf215546Sopenharmony_ci .has_llc = false, 518bf215546Sopenharmony_ci .has_integer_dword_mul = false, 519bf215546Sopenharmony_ci .num_slices = 1, 520bf215546Sopenharmony_ci .num_subslices = { 2, }, 521bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 522bf215546Sopenharmony_ci .l3_banks = 2, 523bf215546Sopenharmony_ci .max_vs_threads = 80, 524bf215546Sopenharmony_ci .max_tcs_threads = 80, 525bf215546Sopenharmony_ci .max_tes_threads = 80, 526bf215546Sopenharmony_ci .max_gs_threads = 80, 527bf215546Sopenharmony_ci .max_wm_threads = 128, 528bf215546Sopenharmony_ci .max_cs_threads = 6 * 7, 529bf215546Sopenharmony_ci .urb = { 530bf215546Sopenharmony_ci .min_entries = { 531bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 34, 532bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 34, 533bf215546Sopenharmony_ci }, 534bf215546Sopenharmony_ci .max_entries = { 535bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 640, 536bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 80, 537bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 384, 538bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 256, 539bf215546Sopenharmony_ci }, 540bf215546Sopenharmony_ci }, 541bf215546Sopenharmony_ci .simulator_id = 13, 542bf215546Sopenharmony_ci}; 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci#define GFX9_HW_INFO \ 545bf215546Sopenharmony_ci .ver = 9, \ 546bf215546Sopenharmony_ci .max_vs_threads = 336, \ 547bf215546Sopenharmony_ci .max_gs_threads = 336, \ 548bf215546Sopenharmony_ci .max_tcs_threads = 336, \ 549bf215546Sopenharmony_ci .max_tes_threads = 336, \ 550bf215546Sopenharmony_ci .max_threads_per_psd = 64, \ 551bf215546Sopenharmony_ci .max_cs_threads = 56, \ 552bf215546Sopenharmony_ci .timestamp_frequency = 12000000, \ 553bf215546Sopenharmony_ci .cs_prefetch_size = 512, \ 554bf215546Sopenharmony_ci .urb = { \ 555bf215546Sopenharmony_ci .min_entries = { \ 556bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 64, \ 557bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 34, \ 558bf215546Sopenharmony_ci }, \ 559bf215546Sopenharmony_ci .max_entries = { \ 560bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 1856, \ 561bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 672, \ 562bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 1120, \ 563bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 640, \ 564bf215546Sopenharmony_ci }, \ 565bf215546Sopenharmony_ci } 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci#define GFX9_LP_FEATURES \ 568bf215546Sopenharmony_ci GFX8_FEATURES, \ 569bf215546Sopenharmony_ci GFX9_HW_INFO, \ 570bf215546Sopenharmony_ci .has_integer_dword_mul = false, \ 571bf215546Sopenharmony_ci .gt = 1, \ 572bf215546Sopenharmony_ci .has_llc = false, \ 573bf215546Sopenharmony_ci .has_sample_with_hiz = true, \ 574bf215546Sopenharmony_ci .num_slices = 1, \ 575bf215546Sopenharmony_ci .num_thread_per_eu = 6, \ 576bf215546Sopenharmony_ci .max_vs_threads = 112, \ 577bf215546Sopenharmony_ci .max_tcs_threads = 112, \ 578bf215546Sopenharmony_ci .max_tes_threads = 112, \ 579bf215546Sopenharmony_ci .max_gs_threads = 112, \ 580bf215546Sopenharmony_ci .max_cs_threads = 6 * 6, \ 581bf215546Sopenharmony_ci .timestamp_frequency = 19200000, \ 582bf215546Sopenharmony_ci .urb = { \ 583bf215546Sopenharmony_ci .min_entries = { \ 584bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 34, \ 585bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 34, \ 586bf215546Sopenharmony_ci }, \ 587bf215546Sopenharmony_ci .max_entries = { \ 588bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 704, \ 589bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 256, \ 590bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 416, \ 591bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 256, \ 592bf215546Sopenharmony_ci }, \ 593bf215546Sopenharmony_ci } 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci#define GFX9_LP_FEATURES_3X6 \ 596bf215546Sopenharmony_ci GFX9_LP_FEATURES, \ 597bf215546Sopenharmony_ci .num_subslices = { 3, }, \ 598bf215546Sopenharmony_ci .max_eus_per_subslice = 6 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci#define GFX9_LP_FEATURES_2X6 \ 601bf215546Sopenharmony_ci GFX9_LP_FEATURES, \ 602bf215546Sopenharmony_ci .num_subslices = { 2, }, \ 603bf215546Sopenharmony_ci .max_eus_per_subslice = 6, \ 604bf215546Sopenharmony_ci .max_vs_threads = 56, \ 605bf215546Sopenharmony_ci .max_tcs_threads = 56, \ 606bf215546Sopenharmony_ci .max_tes_threads = 56, \ 607bf215546Sopenharmony_ci .max_gs_threads = 56, \ 608bf215546Sopenharmony_ci .max_cs_threads = 6 * 6, \ 609bf215546Sopenharmony_ci .urb = { \ 610bf215546Sopenharmony_ci .min_entries = { \ 611bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 34, \ 612bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 34, \ 613bf215546Sopenharmony_ci }, \ 614bf215546Sopenharmony_ci .max_entries = { \ 615bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 352, \ 616bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 128, \ 617bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 208, \ 618bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 128, \ 619bf215546Sopenharmony_ci }, \ 620bf215546Sopenharmony_ci } 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci#define GFX9_FEATURES \ 623bf215546Sopenharmony_ci GFX8_FEATURES, \ 624bf215546Sopenharmony_ci GFX9_HW_INFO, \ 625bf215546Sopenharmony_ci .has_sample_with_hiz = true 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_skl_gt1 = { 628bf215546Sopenharmony_ci GFX9_FEATURES, .gt = 1, 629bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_SKL, 630bf215546Sopenharmony_ci .num_slices = 1, 631bf215546Sopenharmony_ci .num_subslices = { 2, }, 632bf215546Sopenharmony_ci .max_eus_per_subslice = 6, 633bf215546Sopenharmony_ci .l3_banks = 2, 634bf215546Sopenharmony_ci /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 635bf215546Sopenharmony_ci * leading to some vertices to go missing if we use too much URB. 636bf215546Sopenharmony_ci */ 637bf215546Sopenharmony_ci .urb.max_entries[MESA_SHADER_VERTEX] = 928, 638bf215546Sopenharmony_ci .simulator_id = 12, 639bf215546Sopenharmony_ci}; 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_skl_gt2 = { 642bf215546Sopenharmony_ci GFX9_FEATURES, .gt = 2, 643bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_SKL, 644bf215546Sopenharmony_ci .num_slices = 1, 645bf215546Sopenharmony_ci .num_subslices = { 3, }, 646bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 647bf215546Sopenharmony_ci .l3_banks = 4, 648bf215546Sopenharmony_ci .simulator_id = 12, 649bf215546Sopenharmony_ci}; 650bf215546Sopenharmony_ci 651bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_skl_gt3 = { 652bf215546Sopenharmony_ci GFX9_FEATURES, .gt = 3, 653bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_SKL, 654bf215546Sopenharmony_ci .num_slices = 2, 655bf215546Sopenharmony_ci .num_subslices = { 3, 3, }, 656bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 657bf215546Sopenharmony_ci .l3_banks = 8, 658bf215546Sopenharmony_ci .simulator_id = 12, 659bf215546Sopenharmony_ci}; 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_skl_gt4 = { 662bf215546Sopenharmony_ci GFX9_FEATURES, .gt = 4, 663bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_SKL, 664bf215546Sopenharmony_ci .num_slices = 3, 665bf215546Sopenharmony_ci .num_subslices = { 3, 3, 3, }, 666bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 667bf215546Sopenharmony_ci .l3_banks = 12, 668bf215546Sopenharmony_ci /* From the "L3 Allocation and Programming" documentation: 669bf215546Sopenharmony_ci * 670bf215546Sopenharmony_ci * "URB is limited to 1008KB due to programming restrictions. This is not a 671bf215546Sopenharmony_ci * restriction of the L3 implementation, but of the FF and other clients. 672bf215546Sopenharmony_ci * Therefore, in a GT4 implementation it is possible for the programmed 673bf215546Sopenharmony_ci * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but 674bf215546Sopenharmony_ci * only 1008KB of this will be used." 675bf215546Sopenharmony_ci */ 676bf215546Sopenharmony_ci .simulator_id = 12, 677bf215546Sopenharmony_ci}; 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_bxt = { 680bf215546Sopenharmony_ci GFX9_LP_FEATURES_3X6, 681bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_BXT, 682bf215546Sopenharmony_ci .l3_banks = 2, 683bf215546Sopenharmony_ci .simulator_id = 14, 684bf215546Sopenharmony_ci}; 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_bxt_2x6 = { 687bf215546Sopenharmony_ci GFX9_LP_FEATURES_2X6, 688bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_BXT, 689bf215546Sopenharmony_ci .l3_banks = 1, 690bf215546Sopenharmony_ci .simulator_id = 14, 691bf215546Sopenharmony_ci}; 692bf215546Sopenharmony_ci/* 693bf215546Sopenharmony_ci * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. 694bf215546Sopenharmony_ci * There's no KBL entry. Using the default SKL (GFX9) GS entries value. 695bf215546Sopenharmony_ci */ 696bf215546Sopenharmony_ci 697bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_kbl_gt1 = { 698bf215546Sopenharmony_ci GFX9_FEATURES, 699bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_KBL, 700bf215546Sopenharmony_ci .gt = 1, 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_ci .max_cs_threads = 7 * 6, 703bf215546Sopenharmony_ci .num_slices = 1, 704bf215546Sopenharmony_ci .num_subslices = { 2, }, 705bf215546Sopenharmony_ci .max_eus_per_subslice = 6, 706bf215546Sopenharmony_ci .l3_banks = 2, 707bf215546Sopenharmony_ci /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 708bf215546Sopenharmony_ci * leading to some vertices to go missing if we use too much URB. 709bf215546Sopenharmony_ci */ 710bf215546Sopenharmony_ci .urb.max_entries[MESA_SHADER_VERTEX] = 928, 711bf215546Sopenharmony_ci .urb.max_entries[MESA_SHADER_GEOMETRY] = 256, 712bf215546Sopenharmony_ci .simulator_id = 16, 713bf215546Sopenharmony_ci}; 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_kbl_gt1_5 = { 716bf215546Sopenharmony_ci GFX9_FEATURES, 717bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_KBL, 718bf215546Sopenharmony_ci .gt = 1, 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci .max_cs_threads = 7 * 6, 721bf215546Sopenharmony_ci .num_slices = 1, 722bf215546Sopenharmony_ci .num_subslices = { 3, }, 723bf215546Sopenharmony_ci .max_eus_per_subslice = 6, 724bf215546Sopenharmony_ci .l3_banks = 4, 725bf215546Sopenharmony_ci .simulator_id = 16, 726bf215546Sopenharmony_ci}; 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_kbl_gt2 = { 729bf215546Sopenharmony_ci GFX9_FEATURES, 730bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_KBL, 731bf215546Sopenharmony_ci .gt = 2, 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci .num_slices = 1, 734bf215546Sopenharmony_ci .num_subslices = { 3, }, 735bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 736bf215546Sopenharmony_ci .l3_banks = 4, 737bf215546Sopenharmony_ci .simulator_id = 16, 738bf215546Sopenharmony_ci}; 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_kbl_gt3 = { 741bf215546Sopenharmony_ci GFX9_FEATURES, 742bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_KBL, 743bf215546Sopenharmony_ci .gt = 3, 744bf215546Sopenharmony_ci 745bf215546Sopenharmony_ci .num_slices = 2, 746bf215546Sopenharmony_ci .num_subslices = { 3, 3, }, 747bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 748bf215546Sopenharmony_ci .l3_banks = 8, 749bf215546Sopenharmony_ci .simulator_id = 16, 750bf215546Sopenharmony_ci}; 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_kbl_gt4 = { 753bf215546Sopenharmony_ci GFX9_FEATURES, 754bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_KBL, 755bf215546Sopenharmony_ci .gt = 4, 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_ci /* 758bf215546Sopenharmony_ci * From the "L3 Allocation and Programming" documentation: 759bf215546Sopenharmony_ci * 760bf215546Sopenharmony_ci * "URB is limited to 1008KB due to programming restrictions. This 761bf215546Sopenharmony_ci * is not a restriction of the L3 implementation, but of the FF and 762bf215546Sopenharmony_ci * other clients. Therefore, in a GT4 implementation it is 763bf215546Sopenharmony_ci * possible for the programmed allocation of the L3 data array to 764bf215546Sopenharmony_ci * provide 3*384KB=1152KB for URB, but only 1008KB of this 765bf215546Sopenharmony_ci * will be used." 766bf215546Sopenharmony_ci */ 767bf215546Sopenharmony_ci .num_slices = 3, 768bf215546Sopenharmony_ci .num_subslices = { 3, 3, 3, }, 769bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 770bf215546Sopenharmony_ci .l3_banks = 12, 771bf215546Sopenharmony_ci .simulator_id = 16, 772bf215546Sopenharmony_ci}; 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_glk = { 775bf215546Sopenharmony_ci GFX9_LP_FEATURES_3X6, 776bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_GLK, 777bf215546Sopenharmony_ci .l3_banks = 2, 778bf215546Sopenharmony_ci .simulator_id = 17, 779bf215546Sopenharmony_ci}; 780bf215546Sopenharmony_ci 781bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_glk_2x6 = { 782bf215546Sopenharmony_ci GFX9_LP_FEATURES_2X6, 783bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_GLK, 784bf215546Sopenharmony_ci .l3_banks = 2, 785bf215546Sopenharmony_ci .simulator_id = 17, 786bf215546Sopenharmony_ci}; 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_cfl_gt1 = { 789bf215546Sopenharmony_ci GFX9_FEATURES, 790bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_CFL, 791bf215546Sopenharmony_ci .gt = 1, 792bf215546Sopenharmony_ci 793bf215546Sopenharmony_ci .num_slices = 1, 794bf215546Sopenharmony_ci .num_subslices = { 2, }, 795bf215546Sopenharmony_ci .max_eus_per_subslice = 6, 796bf215546Sopenharmony_ci .l3_banks = 2, 797bf215546Sopenharmony_ci /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 798bf215546Sopenharmony_ci * leading to some vertices to go missing if we use too much URB. 799bf215546Sopenharmony_ci */ 800bf215546Sopenharmony_ci .urb.max_entries[MESA_SHADER_VERTEX] = 928, 801bf215546Sopenharmony_ci .urb.max_entries[MESA_SHADER_GEOMETRY] = 256, 802bf215546Sopenharmony_ci .simulator_id = 24, 803bf215546Sopenharmony_ci}; 804bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_cfl_gt2 = { 805bf215546Sopenharmony_ci GFX9_FEATURES, 806bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_CFL, 807bf215546Sopenharmony_ci .gt = 2, 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci .num_slices = 1, 810bf215546Sopenharmony_ci .num_subslices = { 3, }, 811bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 812bf215546Sopenharmony_ci .l3_banks = 4, 813bf215546Sopenharmony_ci .simulator_id = 24, 814bf215546Sopenharmony_ci}; 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_cfl_gt3 = { 817bf215546Sopenharmony_ci GFX9_FEATURES, 818bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_CFL, 819bf215546Sopenharmony_ci .gt = 3, 820bf215546Sopenharmony_ci 821bf215546Sopenharmony_ci .num_slices = 2, 822bf215546Sopenharmony_ci .num_subslices = { 3, 3, }, 823bf215546Sopenharmony_ci .max_eus_per_subslice = 8, 824bf215546Sopenharmony_ci .l3_banks = 8, 825bf215546Sopenharmony_ci .simulator_id = 24, 826bf215546Sopenharmony_ci}; 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci#define subslices(args...) { args, } 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_ci#define GFX11_HW_INFO \ 831bf215546Sopenharmony_ci .ver = 11, \ 832bf215546Sopenharmony_ci .has_pln = false, \ 833bf215546Sopenharmony_ci .max_vs_threads = 364, \ 834bf215546Sopenharmony_ci .max_gs_threads = 224, \ 835bf215546Sopenharmony_ci .max_tcs_threads = 224, \ 836bf215546Sopenharmony_ci .max_tes_threads = 364, \ 837bf215546Sopenharmony_ci .max_threads_per_psd = 64, \ 838bf215546Sopenharmony_ci .max_cs_threads = 56, \ 839bf215546Sopenharmony_ci .cs_prefetch_size = 512 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci#define GFX11_FEATURES(_gt, _slices, _subslices, _l3, _platform) \ 842bf215546Sopenharmony_ci GFX8_FEATURES, \ 843bf215546Sopenharmony_ci GFX11_HW_INFO, \ 844bf215546Sopenharmony_ci .platform = _platform, \ 845bf215546Sopenharmony_ci .has_64bit_float = false, \ 846bf215546Sopenharmony_ci .has_64bit_int = false, \ 847bf215546Sopenharmony_ci .has_integer_dword_mul = false, \ 848bf215546Sopenharmony_ci .has_sample_with_hiz = false, \ 849bf215546Sopenharmony_ci .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ 850bf215546Sopenharmony_ci .num_subslices = _subslices, \ 851bf215546Sopenharmony_ci .max_eus_per_subslice = 8 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_ci#define GFX11_URB_MIN_MAX_ENTRIES \ 854bf215546Sopenharmony_ci .min_entries = { \ 855bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 64, \ 856bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 34, \ 857bf215546Sopenharmony_ci }, \ 858bf215546Sopenharmony_ci .max_entries = { \ 859bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 2384, \ 860bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 1032, \ 861bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 2384, \ 862bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 1032, \ 863bf215546Sopenharmony_ci } 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_icl_gt2 = { 866bf215546Sopenharmony_ci GFX11_FEATURES(2, 1, subslices(8), 8, INTEL_PLATFORM_ICL), 867bf215546Sopenharmony_ci .urb = { 868bf215546Sopenharmony_ci GFX11_URB_MIN_MAX_ENTRIES, 869bf215546Sopenharmony_ci }, 870bf215546Sopenharmony_ci .simulator_id = 19, 871bf215546Sopenharmony_ci}; 872bf215546Sopenharmony_ci 873bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_icl_gt1_5 = { 874bf215546Sopenharmony_ci GFX11_FEATURES(1, 1, subslices(6), 6, INTEL_PLATFORM_ICL), 875bf215546Sopenharmony_ci .urb = { 876bf215546Sopenharmony_ci GFX11_URB_MIN_MAX_ENTRIES, 877bf215546Sopenharmony_ci }, 878bf215546Sopenharmony_ci .simulator_id = 19, 879bf215546Sopenharmony_ci}; 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_icl_gt1 = { 882bf215546Sopenharmony_ci GFX11_FEATURES(1, 1, subslices(4), 6, INTEL_PLATFORM_ICL), 883bf215546Sopenharmony_ci .urb = { 884bf215546Sopenharmony_ci GFX11_URB_MIN_MAX_ENTRIES, 885bf215546Sopenharmony_ci }, 886bf215546Sopenharmony_ci .simulator_id = 19, 887bf215546Sopenharmony_ci}; 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_icl_gt0_5 = { 890bf215546Sopenharmony_ci GFX11_FEATURES(1, 1, subslices(1), 6, INTEL_PLATFORM_ICL), 891bf215546Sopenharmony_ci .urb = { 892bf215546Sopenharmony_ci GFX11_URB_MIN_MAX_ENTRIES, 893bf215546Sopenharmony_ci }, 894bf215546Sopenharmony_ci .simulator_id = 19, 895bf215546Sopenharmony_ci}; 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci#define GFX11_LP_FEATURES \ 898bf215546Sopenharmony_ci .urb = { \ 899bf215546Sopenharmony_ci GFX11_URB_MIN_MAX_ENTRIES, \ 900bf215546Sopenharmony_ci }, \ 901bf215546Sopenharmony_ci .disable_ccs_repack = true, \ 902bf215546Sopenharmony_ci .simulator_id = 28 903bf215546Sopenharmony_ci 904bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_ehl_4x8 = { 905bf215546Sopenharmony_ci GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL), 906bf215546Sopenharmony_ci GFX11_LP_FEATURES, 907bf215546Sopenharmony_ci}; 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_ehl_4x6 = { 910bf215546Sopenharmony_ci GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL), 911bf215546Sopenharmony_ci GFX11_LP_FEATURES, 912bf215546Sopenharmony_ci .max_eus_per_subslice = 6, 913bf215546Sopenharmony_ci}; 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_ehl_4x5 = { 916bf215546Sopenharmony_ci GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL), 917bf215546Sopenharmony_ci GFX11_LP_FEATURES, 918bf215546Sopenharmony_ci .max_eus_per_subslice = 5, 919bf215546Sopenharmony_ci}; 920bf215546Sopenharmony_ci 921bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_ehl_4x4 = { 922bf215546Sopenharmony_ci GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL), 923bf215546Sopenharmony_ci GFX11_LP_FEATURES, 924bf215546Sopenharmony_ci .max_eus_per_subslice = 4, 925bf215546Sopenharmony_ci}; 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_ehl_2x8 = { 928bf215546Sopenharmony_ci GFX11_FEATURES(1, 1, subslices(2), 4, INTEL_PLATFORM_EHL), 929bf215546Sopenharmony_ci GFX11_LP_FEATURES, 930bf215546Sopenharmony_ci}; 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_ehl_2x4 = { 933bf215546Sopenharmony_ci GFX11_FEATURES(1, 1, subslices(2), 4, INTEL_PLATFORM_EHL), 934bf215546Sopenharmony_ci GFX11_LP_FEATURES, 935bf215546Sopenharmony_ci .max_eus_per_subslice = 4, 936bf215546Sopenharmony_ci}; 937bf215546Sopenharmony_ci 938bf215546Sopenharmony_ci#define GFX12_URB_MIN_MAX_ENTRIES \ 939bf215546Sopenharmony_ci .min_entries = { \ 940bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 64, \ 941bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 34, \ 942bf215546Sopenharmony_ci }, \ 943bf215546Sopenharmony_ci .max_entries = { \ 944bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 3576, \ 945bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 1548, \ 946bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 3576, \ 947bf215546Sopenharmony_ci /* Wa_14013840143 */ \ 948bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 1536, \ 949bf215546Sopenharmony_ci } 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_ci#define GFX12_HW_INFO \ 952bf215546Sopenharmony_ci .ver = 12, \ 953bf215546Sopenharmony_ci .has_pln = false, \ 954bf215546Sopenharmony_ci .has_sample_with_hiz = false, \ 955bf215546Sopenharmony_ci .has_aux_map = true, \ 956bf215546Sopenharmony_ci .max_vs_threads = 546, \ 957bf215546Sopenharmony_ci .max_gs_threads = 336, \ 958bf215546Sopenharmony_ci .max_tcs_threads = 336, \ 959bf215546Sopenharmony_ci .max_tes_threads = 546, \ 960bf215546Sopenharmony_ci .max_threads_per_psd = 64, \ 961bf215546Sopenharmony_ci .max_cs_threads = 112, /* threads per DSS */ \ 962bf215546Sopenharmony_ci .urb = { \ 963bf215546Sopenharmony_ci GFX12_URB_MIN_MAX_ENTRIES, \ 964bf215546Sopenharmony_ci } 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci#define GFX12_FEATURES(_gt, _slices, _l3) \ 967bf215546Sopenharmony_ci GFX8_FEATURES, \ 968bf215546Sopenharmony_ci GFX12_HW_INFO, \ 969bf215546Sopenharmony_ci .has_64bit_float = false, \ 970bf215546Sopenharmony_ci .has_64bit_int = false, \ 971bf215546Sopenharmony_ci .has_integer_dword_mul = false, \ 972bf215546Sopenharmony_ci .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ 973bf215546Sopenharmony_ci .simulator_id = 22, \ 974bf215546Sopenharmony_ci .max_eus_per_subslice = 16, \ 975bf215546Sopenharmony_ci .cs_prefetch_size = 512 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci#define dual_subslices(args...) { args, } 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci#define GFX12_GT05_FEATURES \ 980bf215546Sopenharmony_ci GFX12_FEATURES(1, 1, 4), \ 981bf215546Sopenharmony_ci .num_subslices = dual_subslices(1) 982bf215546Sopenharmony_ci 983bf215546Sopenharmony_ci#define GFX12_GT_FEATURES(_gt) \ 984bf215546Sopenharmony_ci GFX12_FEATURES(_gt, 1, _gt == 1 ? 4 : 8), \ 985bf215546Sopenharmony_ci .num_subslices = dual_subslices(_gt == 1 ? 2 : 6) 986bf215546Sopenharmony_ci 987bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_tgl_gt1 = { 988bf215546Sopenharmony_ci GFX12_GT_FEATURES(1), 989bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_TGL, 990bf215546Sopenharmony_ci}; 991bf215546Sopenharmony_ci 992bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_tgl_gt2 = { 993bf215546Sopenharmony_ci GFX12_GT_FEATURES(2), 994bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_TGL, 995bf215546Sopenharmony_ci}; 996bf215546Sopenharmony_ci 997bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_rkl_gt05 = { 998bf215546Sopenharmony_ci GFX12_GT05_FEATURES, 999bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_RKL, 1000bf215546Sopenharmony_ci}; 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_rkl_gt1 = { 1003bf215546Sopenharmony_ci GFX12_GT_FEATURES(1), 1004bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_RKL, 1005bf215546Sopenharmony_ci}; 1006bf215546Sopenharmony_ci 1007bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_adl_gt05 = { 1008bf215546Sopenharmony_ci GFX12_GT05_FEATURES, 1009bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_ADL, 1010bf215546Sopenharmony_ci .display_ver = 13, 1011bf215546Sopenharmony_ci}; 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_adl_gt1 = { 1014bf215546Sopenharmony_ci GFX12_GT_FEATURES(1), 1015bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_ADL, 1016bf215546Sopenharmony_ci .display_ver = 13, 1017bf215546Sopenharmony_ci}; 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_adl_n = { 1020bf215546Sopenharmony_ci GFX12_GT_FEATURES(1), 1021bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_ADL, 1022bf215546Sopenharmony_ci .display_ver = 13, 1023bf215546Sopenharmony_ci}; 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_adl_gt2 = { 1026bf215546Sopenharmony_ci GFX12_GT_FEATURES(2), 1027bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_ADL, 1028bf215546Sopenharmony_ci .display_ver = 13, 1029bf215546Sopenharmony_ci}; 1030bf215546Sopenharmony_ci 1031bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_rpl = { 1032bf215546Sopenharmony_ci GFX12_FEATURES(1, 1, 4), 1033bf215546Sopenharmony_ci .num_subslices = dual_subslices(2), 1034bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_RPL, 1035bf215546Sopenharmony_ci .display_ver = 13, 1036bf215546Sopenharmony_ci}; 1037bf215546Sopenharmony_ci 1038bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_rpl_p = { 1039bf215546Sopenharmony_ci GFX12_GT_FEATURES(2), 1040bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_RPL, 1041bf215546Sopenharmony_ci .display_ver = 13, 1042bf215546Sopenharmony_ci}; 1043bf215546Sopenharmony_ci 1044bf215546Sopenharmony_ci#define GFX12_DG1_SG1_FEATURES \ 1045bf215546Sopenharmony_ci GFX12_GT_FEATURES(2), \ 1046bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_DG1, \ 1047bf215546Sopenharmony_ci .has_llc = false, \ 1048bf215546Sopenharmony_ci .has_local_mem = true, \ 1049bf215546Sopenharmony_ci .urb.size = 768, \ 1050bf215546Sopenharmony_ci .simulator_id = 30 1051bf215546Sopenharmony_ci 1052bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_dg1 = { 1053bf215546Sopenharmony_ci GFX12_DG1_SG1_FEATURES, 1054bf215546Sopenharmony_ci}; 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_sg1 = { 1057bf215546Sopenharmony_ci GFX12_DG1_SG1_FEATURES, 1058bf215546Sopenharmony_ci}; 1059bf215546Sopenharmony_ci 1060bf215546Sopenharmony_ci#define XEHP_FEATURES(_gt, _slices, _l3) \ 1061bf215546Sopenharmony_ci GFX12_FEATURES(_gt, _slices, _l3), \ 1062bf215546Sopenharmony_ci .num_thread_per_eu = 8 /* BSpec 44472 */, \ 1063bf215546Sopenharmony_ci .verx10 = 125, \ 1064bf215546Sopenharmony_ci .has_llc = false, \ 1065bf215546Sopenharmony_ci .has_local_mem = true, \ 1066bf215546Sopenharmony_ci .has_aux_map = false, \ 1067bf215546Sopenharmony_ci .simulator_id = 29, \ 1068bf215546Sopenharmony_ci .cs_prefetch_size = 1024 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci#define DG2_FEATURES \ 1071bf215546Sopenharmony_ci /* (Sub)slice info comes from the kernel topology info */ \ 1072bf215546Sopenharmony_ci XEHP_FEATURES(0, 1, 0), \ 1073bf215546Sopenharmony_ci .display_ver = 13, \ 1074bf215546Sopenharmony_ci .revision = 4, /* For offline compiler */ \ 1075bf215546Sopenharmony_ci .num_subslices = dual_subslices(1), \ 1076bf215546Sopenharmony_ci .has_lsc = true, \ 1077bf215546Sopenharmony_ci .apply_hwconfig = true, \ 1078bf215546Sopenharmony_ci .has_coarse_pixel_primitive_and_cb = true, \ 1079bf215546Sopenharmony_ci .has_mesh_shading = true 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_dg2_g10 = { 1082bf215546Sopenharmony_ci DG2_FEATURES, 1083bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_DG2_G10, 1084bf215546Sopenharmony_ci}; 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_dg2_g11 = { 1087bf215546Sopenharmony_ci DG2_FEATURES, 1088bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_DG2_G11, 1089bf215546Sopenharmony_ci}; 1090bf215546Sopenharmony_ci 1091bf215546Sopenharmony_cistatic const struct intel_device_info intel_device_info_dg2_g12 = { 1092bf215546Sopenharmony_ci DG2_FEATURES, 1093bf215546Sopenharmony_ci .platform = INTEL_PLATFORM_DG2_G12, 1094bf215546Sopenharmony_ci}; 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_cistatic void 1097bf215546Sopenharmony_cireset_masks(struct intel_device_info *devinfo) 1098bf215546Sopenharmony_ci{ 1099bf215546Sopenharmony_ci devinfo->subslice_slice_stride = 0; 1100bf215546Sopenharmony_ci devinfo->eu_subslice_stride = 0; 1101bf215546Sopenharmony_ci devinfo->eu_slice_stride = 0; 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci devinfo->num_slices = 0; 1104bf215546Sopenharmony_ci memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices)); 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks)); 1107bf215546Sopenharmony_ci memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks)); 1108bf215546Sopenharmony_ci memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks)); 1109bf215546Sopenharmony_ci memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices)); 1110bf215546Sopenharmony_ci} 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_cistatic void 1113bf215546Sopenharmony_ciupdate_slice_subslice_counts(struct intel_device_info *devinfo) 1114bf215546Sopenharmony_ci{ 1115bf215546Sopenharmony_ci devinfo->num_slices = __builtin_popcount(devinfo->slice_masks); 1116bf215546Sopenharmony_ci devinfo->subslice_total = 0; 1117bf215546Sopenharmony_ci for (int s = 0; s < devinfo->max_slices; s++) { 1118bf215546Sopenharmony_ci if (!intel_device_info_slice_available(devinfo, s)) 1119bf215546Sopenharmony_ci continue; 1120bf215546Sopenharmony_ci 1121bf215546Sopenharmony_ci for (int b = 0; b < devinfo->subslice_slice_stride; b++) { 1122bf215546Sopenharmony_ci devinfo->num_subslices[s] += 1123bf215546Sopenharmony_ci __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]); 1124bf215546Sopenharmony_ci } 1125bf215546Sopenharmony_ci devinfo->subslice_total += devinfo->num_subslices[s]; 1126bf215546Sopenharmony_ci } 1127bf215546Sopenharmony_ci assert(devinfo->num_slices > 0); 1128bf215546Sopenharmony_ci assert(devinfo->subslice_total > 0); 1129bf215546Sopenharmony_ci} 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_cistatic void 1132bf215546Sopenharmony_ciupdate_pixel_pipes(struct intel_device_info *devinfo, uint8_t *subslice_masks) 1133bf215546Sopenharmony_ci{ 1134bf215546Sopenharmony_ci if (devinfo->ver < 11) 1135bf215546Sopenharmony_ci return; 1136bf215546Sopenharmony_ci 1137bf215546Sopenharmony_ci /* The kernel only reports one slice on all existing ICL+ platforms, even 1138bf215546Sopenharmony_ci * if multiple slices are present. The slice mask is allowed to have the 1139bf215546Sopenharmony_ci * accurate value greater than 1 on gfx12.5+ platforms though, in order to 1140bf215546Sopenharmony_ci * be tolerant with the behavior of our simulation environment. 1141bf215546Sopenharmony_ci */ 1142bf215546Sopenharmony_ci assert(devinfo->slice_masks == 1 || devinfo->verx10 >= 125); 1143bf215546Sopenharmony_ci 1144bf215546Sopenharmony_ci /* Count the number of subslices on each pixel pipe. Assume that every 1145bf215546Sopenharmony_ci * contiguous group of 4 subslices in the mask belong to the same pixel 1146bf215546Sopenharmony_ci * pipe. However note that on TGL+ the kernel returns a mask of enabled 1147bf215546Sopenharmony_ci * *dual* subslices instead of actual subslices somewhat confusingly, so 1148bf215546Sopenharmony_ci * each pixel pipe only takes 2 bits in the mask even though it's still 4 1149bf215546Sopenharmony_ci * subslices. 1150bf215546Sopenharmony_ci */ 1151bf215546Sopenharmony_ci const unsigned ppipe_bits = devinfo->ver >= 12 ? 2 : 4; 1152bf215546Sopenharmony_ci for (unsigned p = 0; p < INTEL_DEVICE_MAX_PIXEL_PIPES; p++) { 1153bf215546Sopenharmony_ci const unsigned offset = p * ppipe_bits; 1154bf215546Sopenharmony_ci const unsigned subslice_idx = offset / 1155bf215546Sopenharmony_ci devinfo->max_subslices_per_slice * devinfo->subslice_slice_stride; 1156bf215546Sopenharmony_ci const unsigned ppipe_mask = 1157bf215546Sopenharmony_ci BITFIELD_RANGE(offset % devinfo->max_subslices_per_slice, ppipe_bits); 1158bf215546Sopenharmony_ci 1159bf215546Sopenharmony_ci if (subslice_idx < ARRAY_SIZE(devinfo->subslice_masks)) 1160bf215546Sopenharmony_ci devinfo->ppipe_subslices[p] = 1161bf215546Sopenharmony_ci __builtin_popcount(subslice_masks[subslice_idx] & ppipe_mask); 1162bf215546Sopenharmony_ci else 1163bf215546Sopenharmony_ci devinfo->ppipe_subslices[p] = 0; 1164bf215546Sopenharmony_ci } 1165bf215546Sopenharmony_ci} 1166bf215546Sopenharmony_ci 1167bf215546Sopenharmony_cistatic void 1168bf215546Sopenharmony_ciupdate_l3_banks(struct intel_device_info *devinfo) 1169bf215546Sopenharmony_ci{ 1170bf215546Sopenharmony_ci if (devinfo->ver != 12) 1171bf215546Sopenharmony_ci return; 1172bf215546Sopenharmony_ci 1173bf215546Sopenharmony_ci if (devinfo->verx10 >= 125) { 1174bf215546Sopenharmony_ci if (devinfo->subslice_total > 16) { 1175bf215546Sopenharmony_ci assert(devinfo->subslice_total <= 32); 1176bf215546Sopenharmony_ci devinfo->l3_banks = 32; 1177bf215546Sopenharmony_ci } else if (devinfo->subslice_total > 8) { 1178bf215546Sopenharmony_ci devinfo->l3_banks = 16; 1179bf215546Sopenharmony_ci } else { 1180bf215546Sopenharmony_ci devinfo->l3_banks = 8; 1181bf215546Sopenharmony_ci } 1182bf215546Sopenharmony_ci } else { 1183bf215546Sopenharmony_ci assert(devinfo->num_slices == 1); 1184bf215546Sopenharmony_ci if (devinfo->subslice_total >= 6) { 1185bf215546Sopenharmony_ci assert(devinfo->subslice_total == 6); 1186bf215546Sopenharmony_ci devinfo->l3_banks = 8; 1187bf215546Sopenharmony_ci } else if (devinfo->subslice_total > 2) { 1188bf215546Sopenharmony_ci devinfo->l3_banks = 6; 1189bf215546Sopenharmony_ci } else { 1190bf215546Sopenharmony_ci devinfo->l3_banks = 4; 1191bf215546Sopenharmony_ci } 1192bf215546Sopenharmony_ci } 1193bf215546Sopenharmony_ci} 1194bf215546Sopenharmony_ci 1195bf215546Sopenharmony_ci/* At some point in time, some people decided to redefine what topology means, 1196bf215546Sopenharmony_ci * from useful HW related information (slice, subslice, etc...), to much less 1197bf215546Sopenharmony_ci * useful generic stuff that no one cares about (a single slice with lots of 1198bf215546Sopenharmony_ci * subslices). Of course all of this was done without asking the people who 1199bf215546Sopenharmony_ci * defined the topology query in the first place, to solve a lack of 1200bf215546Sopenharmony_ci * information Gfx10+. This function is here to workaround the fact it's not 1201bf215546Sopenharmony_ci * possible to change people's mind even before this stuff goes upstream. Sad 1202bf215546Sopenharmony_ci * times... 1203bf215546Sopenharmony_ci */ 1204bf215546Sopenharmony_cistatic void 1205bf215546Sopenharmony_ciupdate_from_single_slice_topology(struct intel_device_info *devinfo, 1206bf215546Sopenharmony_ci const struct drm_i915_query_topology_info *topology, 1207bf215546Sopenharmony_ci const struct drm_i915_query_topology_info *geom_topology) 1208bf215546Sopenharmony_ci{ 1209bf215546Sopenharmony_ci /* An array of bit masks of the subslices available for 3D 1210bf215546Sopenharmony_ci * workloads, analogous to intel_device_info::subslice_masks. This 1211bf215546Sopenharmony_ci * may differ from the set of enabled subslices on XeHP+ platforms 1212bf215546Sopenharmony_ci * with compute-only subslices. 1213bf215546Sopenharmony_ci */ 1214bf215546Sopenharmony_ci uint8_t geom_subslice_masks[ARRAY_SIZE(devinfo->subslice_masks)] = { 0 }; 1215bf215546Sopenharmony_ci 1216bf215546Sopenharmony_ci assert(devinfo->verx10 >= 125); 1217bf215546Sopenharmony_ci 1218bf215546Sopenharmony_ci reset_masks(devinfo); 1219bf215546Sopenharmony_ci 1220bf215546Sopenharmony_ci assert(topology->max_slices == 1); 1221bf215546Sopenharmony_ci assert(topology->max_subslices > 0); 1222bf215546Sopenharmony_ci assert(topology->max_eus_per_subslice > 0); 1223bf215546Sopenharmony_ci 1224bf215546Sopenharmony_ci /* i915 gives us only one slice so we have to rebuild that out of groups of 1225bf215546Sopenharmony_ci * 4 dualsubslices. 1226bf215546Sopenharmony_ci */ 1227bf215546Sopenharmony_ci devinfo->max_subslices_per_slice = 4; 1228bf215546Sopenharmony_ci devinfo->max_eus_per_subslice = 16; 1229bf215546Sopenharmony_ci devinfo->subslice_slice_stride = 1; 1230bf215546Sopenharmony_ci devinfo->eu_slice_stride = DIV_ROUND_UP(16 * 4, 8); 1231bf215546Sopenharmony_ci devinfo->eu_subslice_stride = DIV_ROUND_UP(16, 8); 1232bf215546Sopenharmony_ci 1233bf215546Sopenharmony_ci for (uint32_t ss_idx = 0; ss_idx < topology->max_subslices; ss_idx++) { 1234bf215546Sopenharmony_ci const uint32_t s = ss_idx / 4; 1235bf215546Sopenharmony_ci const uint32_t ss = ss_idx % 4; 1236bf215546Sopenharmony_ci 1237bf215546Sopenharmony_ci /* Determine whether ss_idx is enabled (ss_idx_available) and 1238bf215546Sopenharmony_ci * available for 3D workloads (geom_ss_idx_available), which may 1239bf215546Sopenharmony_ci * differ on XeHP+ if ss_idx is a compute-only DSS. 1240bf215546Sopenharmony_ci */ 1241bf215546Sopenharmony_ci const bool ss_idx_available = 1242bf215546Sopenharmony_ci (topology->data[topology->subslice_offset + ss_idx / 8] >> 1243bf215546Sopenharmony_ci (ss_idx % 8)) & 1; 1244bf215546Sopenharmony_ci const bool geom_ss_idx_available = 1245bf215546Sopenharmony_ci (geom_topology->data[geom_topology->subslice_offset + ss_idx / 8] >> 1246bf215546Sopenharmony_ci (ss_idx % 8)) & 1; 1247bf215546Sopenharmony_ci 1248bf215546Sopenharmony_ci if (geom_ss_idx_available) { 1249bf215546Sopenharmony_ci assert(ss_idx_available); 1250bf215546Sopenharmony_ci geom_subslice_masks[s * devinfo->subslice_slice_stride + 1251bf215546Sopenharmony_ci ss / 8] |= 1u << (ss % 8); 1252bf215546Sopenharmony_ci } 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_ci if (!ss_idx_available) 1255bf215546Sopenharmony_ci continue; 1256bf215546Sopenharmony_ci 1257bf215546Sopenharmony_ci devinfo->max_slices = MAX2(devinfo->max_slices, s + 1); 1258bf215546Sopenharmony_ci devinfo->slice_masks |= 1u << s; 1259bf215546Sopenharmony_ci 1260bf215546Sopenharmony_ci devinfo->subslice_masks[s * devinfo->subslice_slice_stride + 1261bf215546Sopenharmony_ci ss / 8] |= 1u << (ss % 8); 1262bf215546Sopenharmony_ci 1263bf215546Sopenharmony_ci for (uint32_t eu = 0; eu < devinfo->max_eus_per_subslice; eu++) { 1264bf215546Sopenharmony_ci const bool eu_available = 1265bf215546Sopenharmony_ci (topology->data[topology->eu_offset + 1266bf215546Sopenharmony_ci ss_idx * topology->eu_stride + 1267bf215546Sopenharmony_ci eu / 8] >> (eu % 8)) & 1; 1268bf215546Sopenharmony_ci 1269bf215546Sopenharmony_ci if (!eu_available) 1270bf215546Sopenharmony_ci continue; 1271bf215546Sopenharmony_ci 1272bf215546Sopenharmony_ci devinfo->eu_masks[s * devinfo->eu_slice_stride + 1273bf215546Sopenharmony_ci ss * devinfo->eu_subslice_stride + 1274bf215546Sopenharmony_ci eu / 8] |= 1u << (eu % 8); 1275bf215546Sopenharmony_ci } 1276bf215546Sopenharmony_ci } 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_ci update_slice_subslice_counts(devinfo); 1279bf215546Sopenharmony_ci update_pixel_pipes(devinfo, geom_subslice_masks); 1280bf215546Sopenharmony_ci update_l3_banks(devinfo); 1281bf215546Sopenharmony_ci} 1282bf215546Sopenharmony_ci 1283bf215546Sopenharmony_cistatic void 1284bf215546Sopenharmony_ciupdate_from_topology(struct intel_device_info *devinfo, 1285bf215546Sopenharmony_ci const struct drm_i915_query_topology_info *topology) 1286bf215546Sopenharmony_ci{ 1287bf215546Sopenharmony_ci reset_masks(devinfo); 1288bf215546Sopenharmony_ci 1289bf215546Sopenharmony_ci assert(topology->max_slices > 0); 1290bf215546Sopenharmony_ci assert(topology->max_subslices > 0); 1291bf215546Sopenharmony_ci assert(topology->max_eus_per_subslice > 0); 1292bf215546Sopenharmony_ci 1293bf215546Sopenharmony_ci devinfo->subslice_slice_stride = topology->subslice_stride; 1294bf215546Sopenharmony_ci 1295bf215546Sopenharmony_ci devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8); 1296bf215546Sopenharmony_ci devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride; 1297bf215546Sopenharmony_ci 1298bf215546Sopenharmony_ci assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8)); 1299bf215546Sopenharmony_ci memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8)); 1300bf215546Sopenharmony_ci devinfo->max_slices = topology->max_slices; 1301bf215546Sopenharmony_ci devinfo->max_subslices_per_slice = topology->max_subslices; 1302bf215546Sopenharmony_ci devinfo->max_eus_per_subslice = topology->max_eus_per_subslice; 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_ci uint32_t subslice_mask_len = 1305bf215546Sopenharmony_ci topology->max_slices * topology->subslice_stride; 1306bf215546Sopenharmony_ci assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len); 1307bf215546Sopenharmony_ci memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset], 1308bf215546Sopenharmony_ci subslice_mask_len); 1309bf215546Sopenharmony_ci 1310bf215546Sopenharmony_ci uint32_t eu_mask_len = 1311bf215546Sopenharmony_ci topology->eu_stride * topology->max_subslices * topology->max_slices; 1312bf215546Sopenharmony_ci assert(sizeof(devinfo->eu_masks) >= eu_mask_len); 1313bf215546Sopenharmony_ci memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], eu_mask_len); 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_ci /* Now that all the masks are in place, update the counts. */ 1316bf215546Sopenharmony_ci update_slice_subslice_counts(devinfo); 1317bf215546Sopenharmony_ci update_pixel_pipes(devinfo, devinfo->subslice_masks); 1318bf215546Sopenharmony_ci update_l3_banks(devinfo); 1319bf215546Sopenharmony_ci} 1320bf215546Sopenharmony_ci 1321bf215546Sopenharmony_ci/* Generate detailed mask from the I915_PARAM_SLICE_MASK, 1322bf215546Sopenharmony_ci * I915_PARAM_SUBSLICE_MASK & I915_PARAM_EU_TOTAL getparam. 1323bf215546Sopenharmony_ci */ 1324bf215546Sopenharmony_cistatic bool 1325bf215546Sopenharmony_ciupdate_from_masks(struct intel_device_info *devinfo, uint32_t slice_mask, 1326bf215546Sopenharmony_ci uint32_t subslice_mask, uint32_t n_eus) 1327bf215546Sopenharmony_ci{ 1328bf215546Sopenharmony_ci struct drm_i915_query_topology_info *topology; 1329bf215546Sopenharmony_ci 1330bf215546Sopenharmony_ci assert((slice_mask & 0xff) == slice_mask); 1331bf215546Sopenharmony_ci 1332bf215546Sopenharmony_ci size_t data_length = 100; 1333bf215546Sopenharmony_ci 1334bf215546Sopenharmony_ci topology = calloc(1, sizeof(*topology) + data_length); 1335bf215546Sopenharmony_ci if (!topology) 1336bf215546Sopenharmony_ci return false; 1337bf215546Sopenharmony_ci 1338bf215546Sopenharmony_ci topology->max_slices = util_last_bit(slice_mask); 1339bf215546Sopenharmony_ci topology->max_subslices = util_last_bit(subslice_mask); 1340bf215546Sopenharmony_ci 1341bf215546Sopenharmony_ci topology->subslice_offset = DIV_ROUND_UP(topology->max_slices, 8); 1342bf215546Sopenharmony_ci topology->subslice_stride = DIV_ROUND_UP(topology->max_subslices, 8); 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_ci uint32_t n_subslices = __builtin_popcount(slice_mask) * 1345bf215546Sopenharmony_ci __builtin_popcount(subslice_mask); 1346bf215546Sopenharmony_ci uint32_t max_eus_per_subslice = DIV_ROUND_UP(n_eus, n_subslices); 1347bf215546Sopenharmony_ci uint32_t eu_mask = (1U << max_eus_per_subslice) - 1; 1348bf215546Sopenharmony_ci 1349bf215546Sopenharmony_ci topology->max_eus_per_subslice = max_eus_per_subslice; 1350bf215546Sopenharmony_ci topology->eu_offset = topology->subslice_offset + 1351bf215546Sopenharmony_ci topology->max_slices * DIV_ROUND_UP(topology->max_subslices, 8); 1352bf215546Sopenharmony_ci topology->eu_stride = DIV_ROUND_UP(max_eus_per_subslice, 8); 1353bf215546Sopenharmony_ci 1354bf215546Sopenharmony_ci /* Set slice mask in topology */ 1355bf215546Sopenharmony_ci for (int b = 0; b < topology->subslice_offset; b++) 1356bf215546Sopenharmony_ci topology->data[b] = (slice_mask >> (b * 8)) & 0xff; 1357bf215546Sopenharmony_ci 1358bf215546Sopenharmony_ci for (int s = 0; s < topology->max_slices; s++) { 1359bf215546Sopenharmony_ci 1360bf215546Sopenharmony_ci /* Set subslice mask in topology */ 1361bf215546Sopenharmony_ci for (int b = 0; b < topology->subslice_stride; b++) { 1362bf215546Sopenharmony_ci int subslice_offset = topology->subslice_offset + 1363bf215546Sopenharmony_ci s * topology->subslice_stride + b; 1364bf215546Sopenharmony_ci 1365bf215546Sopenharmony_ci topology->data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff; 1366bf215546Sopenharmony_ci } 1367bf215546Sopenharmony_ci 1368bf215546Sopenharmony_ci /* Set eu mask in topology */ 1369bf215546Sopenharmony_ci for (int ss = 0; ss < topology->max_subslices; ss++) { 1370bf215546Sopenharmony_ci for (int b = 0; b < topology->eu_stride; b++) { 1371bf215546Sopenharmony_ci int eu_offset = topology->eu_offset + 1372bf215546Sopenharmony_ci (s * topology->max_subslices + ss) * topology->eu_stride + b; 1373bf215546Sopenharmony_ci 1374bf215546Sopenharmony_ci topology->data[eu_offset] = (eu_mask >> (b * 8)) & 0xff; 1375bf215546Sopenharmony_ci } 1376bf215546Sopenharmony_ci } 1377bf215546Sopenharmony_ci } 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_ci update_from_topology(devinfo, topology); 1380bf215546Sopenharmony_ci free(topology); 1381bf215546Sopenharmony_ci 1382bf215546Sopenharmony_ci return true; 1383bf215546Sopenharmony_ci} 1384bf215546Sopenharmony_ci 1385bf215546Sopenharmony_ci/* Generate mask from the device data. */ 1386bf215546Sopenharmony_cistatic void 1387bf215546Sopenharmony_cifill_masks(struct intel_device_info *devinfo) 1388bf215546Sopenharmony_ci{ 1389bf215546Sopenharmony_ci /* All of our internal device descriptions assign the same number of 1390bf215546Sopenharmony_ci * subslices for each slice. Just verify that this is true. 1391bf215546Sopenharmony_ci */ 1392bf215546Sopenharmony_ci for (int s = 1; s < devinfo->num_slices; s++) 1393bf215546Sopenharmony_ci assert(devinfo->num_subslices[0] == devinfo->num_subslices[s]); 1394bf215546Sopenharmony_ci 1395bf215546Sopenharmony_ci update_from_masks(devinfo, 1396bf215546Sopenharmony_ci (1U << devinfo->num_slices) - 1, 1397bf215546Sopenharmony_ci (1U << devinfo->num_subslices[0]) - 1, 1398bf215546Sopenharmony_ci devinfo->num_slices * devinfo->num_subslices[0] * 1399bf215546Sopenharmony_ci devinfo->max_eus_per_subslice); 1400bf215546Sopenharmony_ci} 1401bf215546Sopenharmony_ci 1402bf215546Sopenharmony_cistatic bool 1403bf215546Sopenharmony_cigetparam(int fd, uint32_t param, int *value) 1404bf215546Sopenharmony_ci{ 1405bf215546Sopenharmony_ci int tmp; 1406bf215546Sopenharmony_ci 1407bf215546Sopenharmony_ci struct drm_i915_getparam gp = { 1408bf215546Sopenharmony_ci .param = param, 1409bf215546Sopenharmony_ci .value = &tmp, 1410bf215546Sopenharmony_ci }; 1411bf215546Sopenharmony_ci 1412bf215546Sopenharmony_ci int ret = intel_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 1413bf215546Sopenharmony_ci if (ret != 0) 1414bf215546Sopenharmony_ci return false; 1415bf215546Sopenharmony_ci 1416bf215546Sopenharmony_ci *value = tmp; 1417bf215546Sopenharmony_ci return true; 1418bf215546Sopenharmony_ci} 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_cistatic bool 1421bf215546Sopenharmony_ciget_context_param(int fd, uint32_t context, uint32_t param, uint64_t *value) 1422bf215546Sopenharmony_ci{ 1423bf215546Sopenharmony_ci struct drm_i915_gem_context_param gp = { 1424bf215546Sopenharmony_ci .ctx_id = context, 1425bf215546Sopenharmony_ci .param = param, 1426bf215546Sopenharmony_ci }; 1427bf215546Sopenharmony_ci 1428bf215546Sopenharmony_ci int ret = intel_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &gp); 1429bf215546Sopenharmony_ci if (ret != 0) 1430bf215546Sopenharmony_ci return false; 1431bf215546Sopenharmony_ci 1432bf215546Sopenharmony_ci *value = gp.value; 1433bf215546Sopenharmony_ci return true; 1434bf215546Sopenharmony_ci} 1435bf215546Sopenharmony_ci 1436bf215546Sopenharmony_cistatic void 1437bf215546Sopenharmony_ciupdate_cs_workgroup_threads(struct intel_device_info *devinfo) 1438bf215546Sopenharmony_ci{ 1439bf215546Sopenharmony_ci /* GPGPU_WALKER::ThreadWidthCounterMaximum is U6-1 so the most threads we 1440bf215546Sopenharmony_ci * can program is 64 without going up to a rectangular group. This only 1441bf215546Sopenharmony_ci * impacts Haswell and TGL which have higher thread counts. 1442bf215546Sopenharmony_ci * 1443bf215546Sopenharmony_ci * INTERFACE_DESCRIPTOR_DATA::NumberofThreadsinGPGPUThreadGroup on Xe-HP+ 1444bf215546Sopenharmony_ci * is 10 bits so we have no such restrictions. 1445bf215546Sopenharmony_ci */ 1446bf215546Sopenharmony_ci devinfo->max_cs_workgroup_threads = 1447bf215546Sopenharmony_ci devinfo->verx10 >= 125 ? devinfo->max_cs_threads : 1448bf215546Sopenharmony_ci MIN2(devinfo->max_cs_threads, 64); 1449bf215546Sopenharmony_ci} 1450bf215546Sopenharmony_ci 1451bf215546Sopenharmony_cibool 1452bf215546Sopenharmony_ciintel_get_device_info_from_pci_id(int pci_id, 1453bf215546Sopenharmony_ci struct intel_device_info *devinfo) 1454bf215546Sopenharmony_ci{ 1455bf215546Sopenharmony_ci switch (pci_id) { 1456bf215546Sopenharmony_ci#undef CHIPSET 1457bf215546Sopenharmony_ci#define CHIPSET(id, family, fam_str, name) \ 1458bf215546Sopenharmony_ci case id: *devinfo = intel_device_info_##family; break; 1459bf215546Sopenharmony_ci#include "pci_ids/crocus_pci_ids.h" 1460bf215546Sopenharmony_ci#include "pci_ids/iris_pci_ids.h" 1461bf215546Sopenharmony_ci 1462bf215546Sopenharmony_ci#undef CHIPSET 1463bf215546Sopenharmony_ci#define CHIPSET(id, fam_str, name) \ 1464bf215546Sopenharmony_ci case id: *devinfo = intel_device_info_gfx3; break; 1465bf215546Sopenharmony_ci#include "pci_ids/i915_pci_ids.h" 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_ci default: 1468bf215546Sopenharmony_ci mesa_logw("Driver does not support the 0x%x PCI ID.", pci_id); 1469bf215546Sopenharmony_ci return false; 1470bf215546Sopenharmony_ci } 1471bf215546Sopenharmony_ci 1472bf215546Sopenharmony_ci switch (pci_id) { 1473bf215546Sopenharmony_ci#undef CHIPSET 1474bf215546Sopenharmony_ci#define CHIPSET(_id, _family, _fam_str, _name) \ 1475bf215546Sopenharmony_ci case _id: \ 1476bf215546Sopenharmony_ci /* sizeof(str_literal) includes the null */ \ 1477bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(_name) + sizeof(_fam_str) + 2 <= \ 1478bf215546Sopenharmony_ci sizeof(devinfo->name)); \ 1479bf215546Sopenharmony_ci strncpy(devinfo->name, _name " (" _fam_str ")", sizeof(devinfo->name)); \ 1480bf215546Sopenharmony_ci break; 1481bf215546Sopenharmony_ci#include "pci_ids/crocus_pci_ids.h" 1482bf215546Sopenharmony_ci#include "pci_ids/iris_pci_ids.h" 1483bf215546Sopenharmony_ci default: 1484bf215546Sopenharmony_ci strncpy(devinfo->name, "Intel Unknown", sizeof(devinfo->name)); 1485bf215546Sopenharmony_ci } 1486bf215546Sopenharmony_ci 1487bf215546Sopenharmony_ci fill_masks(devinfo); 1488bf215546Sopenharmony_ci 1489bf215546Sopenharmony_ci /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer: 1490bf215546Sopenharmony_ci * 1491bf215546Sopenharmony_ci * "Scratch Space per slice is computed based on 4 sub-slices. SW must 1492bf215546Sopenharmony_ci * allocate scratch space enough so that each slice has 4 slices allowed." 1493bf215546Sopenharmony_ci * 1494bf215546Sopenharmony_ci * The equivalent internal documentation says that this programming note 1495bf215546Sopenharmony_ci * applies to all Gfx9+ platforms. 1496bf215546Sopenharmony_ci * 1497bf215546Sopenharmony_ci * The hardware typically calculates the scratch space pointer by taking 1498bf215546Sopenharmony_ci * the base address, and adding per-thread-scratch-space * thread ID. 1499bf215546Sopenharmony_ci * Extra padding can be necessary depending how the thread IDs are 1500bf215546Sopenharmony_ci * calculated for a particular shader stage. 1501bf215546Sopenharmony_ci */ 1502bf215546Sopenharmony_ci 1503bf215546Sopenharmony_ci switch(devinfo->ver) { 1504bf215546Sopenharmony_ci case 9: 1505bf215546Sopenharmony_ci devinfo->max_wm_threads = 64 /* threads-per-PSD */ 1506bf215546Sopenharmony_ci * devinfo->num_slices 1507bf215546Sopenharmony_ci * 4; /* effective subslices per slice */ 1508bf215546Sopenharmony_ci break; 1509bf215546Sopenharmony_ci case 11: 1510bf215546Sopenharmony_ci case 12: 1511bf215546Sopenharmony_ci devinfo->max_wm_threads = 128 /* threads-per-PSD */ 1512bf215546Sopenharmony_ci * devinfo->num_slices 1513bf215546Sopenharmony_ci * 8; /* subslices per slice */ 1514bf215546Sopenharmony_ci break; 1515bf215546Sopenharmony_ci default: 1516bf215546Sopenharmony_ci assert(devinfo->ver < 9); 1517bf215546Sopenharmony_ci break; 1518bf215546Sopenharmony_ci } 1519bf215546Sopenharmony_ci 1520bf215546Sopenharmony_ci assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices)); 1521bf215546Sopenharmony_ci 1522bf215546Sopenharmony_ci if (devinfo->verx10 == 0) 1523bf215546Sopenharmony_ci devinfo->verx10 = devinfo->ver * 10; 1524bf215546Sopenharmony_ci 1525bf215546Sopenharmony_ci if (devinfo->display_ver == 0) 1526bf215546Sopenharmony_ci devinfo->display_ver = devinfo->ver; 1527bf215546Sopenharmony_ci 1528bf215546Sopenharmony_ci update_cs_workgroup_threads(devinfo); 1529bf215546Sopenharmony_ci 1530bf215546Sopenharmony_ci return true; 1531bf215546Sopenharmony_ci} 1532bf215546Sopenharmony_ci 1533bf215546Sopenharmony_ci/** 1534bf215546Sopenharmony_ci * for gfx8/gfx9, SLICE_MASK/SUBSLICE_MASK can be used to compute the topology 1535bf215546Sopenharmony_ci * (kernel 4.13+) 1536bf215546Sopenharmony_ci */ 1537bf215546Sopenharmony_cistatic bool 1538bf215546Sopenharmony_cigetparam_topology(struct intel_device_info *devinfo, int fd) 1539bf215546Sopenharmony_ci{ 1540bf215546Sopenharmony_ci int slice_mask = 0; 1541bf215546Sopenharmony_ci if (!getparam(fd, I915_PARAM_SLICE_MASK, &slice_mask)) 1542bf215546Sopenharmony_ci goto maybe_warn; 1543bf215546Sopenharmony_ci 1544bf215546Sopenharmony_ci int n_eus; 1545bf215546Sopenharmony_ci if (!getparam(fd, I915_PARAM_EU_TOTAL, &n_eus)) 1546bf215546Sopenharmony_ci goto maybe_warn; 1547bf215546Sopenharmony_ci 1548bf215546Sopenharmony_ci int subslice_mask = 0; 1549bf215546Sopenharmony_ci if (!getparam(fd, I915_PARAM_SUBSLICE_MASK, &subslice_mask)) 1550bf215546Sopenharmony_ci goto maybe_warn; 1551bf215546Sopenharmony_ci 1552bf215546Sopenharmony_ci return update_from_masks(devinfo, slice_mask, subslice_mask, n_eus); 1553bf215546Sopenharmony_ci 1554bf215546Sopenharmony_ci maybe_warn: 1555bf215546Sopenharmony_ci /* Only with Gfx8+ are we starting to see devices with fusing that can only 1556bf215546Sopenharmony_ci * be detected at runtime. 1557bf215546Sopenharmony_ci */ 1558bf215546Sopenharmony_ci if (devinfo->ver >= 8) 1559bf215546Sopenharmony_ci mesa_logw("Kernel 4.1 required to properly query GPU properties."); 1560bf215546Sopenharmony_ci 1561bf215546Sopenharmony_ci return false; 1562bf215546Sopenharmony_ci} 1563bf215546Sopenharmony_ci 1564bf215546Sopenharmony_ci/** 1565bf215546Sopenharmony_ci * preferred API for updating the topology in devinfo (kernel 4.17+) 1566bf215546Sopenharmony_ci */ 1567bf215546Sopenharmony_cistatic bool 1568bf215546Sopenharmony_ciquery_topology(struct intel_device_info *devinfo, int fd) 1569bf215546Sopenharmony_ci{ 1570bf215546Sopenharmony_ci struct drm_i915_query_topology_info *topo_info = 1571bf215546Sopenharmony_ci intel_i915_query_alloc(fd, DRM_I915_QUERY_TOPOLOGY_INFO, NULL); 1572bf215546Sopenharmony_ci if (topo_info == NULL) 1573bf215546Sopenharmony_ci return false; 1574bf215546Sopenharmony_ci 1575bf215546Sopenharmony_ci if (devinfo->verx10 >= 125) { 1576bf215546Sopenharmony_ci struct drm_i915_query_topology_info *geom_topo_info = 1577bf215546Sopenharmony_ci intel_i915_query_alloc(fd, DRM_I915_QUERY_GEOMETRY_SUBSLICES, NULL); 1578bf215546Sopenharmony_ci if (geom_topo_info == NULL) { 1579bf215546Sopenharmony_ci free(topo_info); 1580bf215546Sopenharmony_ci return false; 1581bf215546Sopenharmony_ci } 1582bf215546Sopenharmony_ci 1583bf215546Sopenharmony_ci update_from_single_slice_topology(devinfo, topo_info, geom_topo_info); 1584bf215546Sopenharmony_ci free(geom_topo_info); 1585bf215546Sopenharmony_ci } else { 1586bf215546Sopenharmony_ci update_from_topology(devinfo, topo_info); 1587bf215546Sopenharmony_ci } 1588bf215546Sopenharmony_ci 1589bf215546Sopenharmony_ci free(topo_info); 1590bf215546Sopenharmony_ci 1591bf215546Sopenharmony_ci return true; 1592bf215546Sopenharmony_ci 1593bf215546Sopenharmony_ci} 1594bf215546Sopenharmony_ci 1595bf215546Sopenharmony_ci/** 1596bf215546Sopenharmony_ci * Reports memory region info, and allows buffers to target system-memory, 1597bf215546Sopenharmony_ci * and/or device local memory. 1598bf215546Sopenharmony_ci */ 1599bf215546Sopenharmony_cistatic bool 1600bf215546Sopenharmony_ciquery_regions(struct intel_device_info *devinfo, int fd, bool update) 1601bf215546Sopenharmony_ci{ 1602bf215546Sopenharmony_ci struct drm_i915_query_memory_regions *meminfo = 1603bf215546Sopenharmony_ci intel_i915_query_alloc(fd, DRM_I915_QUERY_MEMORY_REGIONS, NULL); 1604bf215546Sopenharmony_ci if (meminfo == NULL) 1605bf215546Sopenharmony_ci return false; 1606bf215546Sopenharmony_ci 1607bf215546Sopenharmony_ci for (int i = 0; i < meminfo->num_regions; i++) { 1608bf215546Sopenharmony_ci const struct drm_i915_memory_region_info *mem = &meminfo->regions[i]; 1609bf215546Sopenharmony_ci switch (mem->region.memory_class) { 1610bf215546Sopenharmony_ci case I915_MEMORY_CLASS_SYSTEM: { 1611bf215546Sopenharmony_ci if (!update) { 1612bf215546Sopenharmony_ci devinfo->mem.sram.mem_class = mem->region.memory_class; 1613bf215546Sopenharmony_ci devinfo->mem.sram.mem_instance = mem->region.memory_instance; 1614bf215546Sopenharmony_ci devinfo->mem.sram.mappable.size = mem->probed_size; 1615bf215546Sopenharmony_ci } else { 1616bf215546Sopenharmony_ci assert(devinfo->mem.sram.mem_class == mem->region.memory_class); 1617bf215546Sopenharmony_ci assert(devinfo->mem.sram.mem_instance == mem->region.memory_instance); 1618bf215546Sopenharmony_ci assert(devinfo->mem.sram.mappable.size == mem->probed_size); 1619bf215546Sopenharmony_ci } 1620bf215546Sopenharmony_ci /* The kernel uAPI only reports an accurate unallocated_size value 1621bf215546Sopenharmony_ci * for I915_MEMORY_CLASS_DEVICE. 1622bf215546Sopenharmony_ci */ 1623bf215546Sopenharmony_ci uint64_t available; 1624bf215546Sopenharmony_ci if (os_get_available_system_memory(&available)) 1625bf215546Sopenharmony_ci devinfo->mem.sram.mappable.free = MIN2(available, mem->probed_size); 1626bf215546Sopenharmony_ci break; 1627bf215546Sopenharmony_ci } 1628bf215546Sopenharmony_ci case I915_MEMORY_CLASS_DEVICE: 1629bf215546Sopenharmony_ci if (!update) { 1630bf215546Sopenharmony_ci devinfo->mem.vram.mem_class = mem->region.memory_class; 1631bf215546Sopenharmony_ci devinfo->mem.vram.mem_instance = mem->region.memory_instance; 1632bf215546Sopenharmony_ci if (mem->probed_cpu_visible_size > 0) { 1633bf215546Sopenharmony_ci devinfo->mem.vram.mappable.size = mem->probed_cpu_visible_size; 1634bf215546Sopenharmony_ci devinfo->mem.vram.unmappable.size = 1635bf215546Sopenharmony_ci mem->probed_size - mem->probed_cpu_visible_size; 1636bf215546Sopenharmony_ci } else { 1637bf215546Sopenharmony_ci /* We are running on an older kernel without support for the 1638bf215546Sopenharmony_ci * small-bar uapi. These kernels only support systems where the 1639bf215546Sopenharmony_ci * entire vram is mappable. 1640bf215546Sopenharmony_ci */ 1641bf215546Sopenharmony_ci devinfo->mem.vram.mappable.size = mem->probed_size; 1642bf215546Sopenharmony_ci devinfo->mem.vram.unmappable.size = 0; 1643bf215546Sopenharmony_ci } 1644bf215546Sopenharmony_ci } else { 1645bf215546Sopenharmony_ci assert(devinfo->mem.vram.mem_class == mem->region.memory_class); 1646bf215546Sopenharmony_ci assert(devinfo->mem.vram.mem_instance == mem->region.memory_instance); 1647bf215546Sopenharmony_ci assert((devinfo->mem.vram.mappable.size + 1648bf215546Sopenharmony_ci devinfo->mem.vram.unmappable.size) == mem->probed_size); 1649bf215546Sopenharmony_ci } 1650bf215546Sopenharmony_ci if (mem->unallocated_cpu_visible_size > 0) { 1651bf215546Sopenharmony_ci if (mem->unallocated_size != -1) { 1652bf215546Sopenharmony_ci devinfo->mem.vram.mappable.free = mem->unallocated_cpu_visible_size; 1653bf215546Sopenharmony_ci devinfo->mem.vram.unmappable.free = 1654bf215546Sopenharmony_ci mem->unallocated_size - mem->unallocated_cpu_visible_size; 1655bf215546Sopenharmony_ci } 1656bf215546Sopenharmony_ci } else { 1657bf215546Sopenharmony_ci /* We are running on an older kernel without support for the 1658bf215546Sopenharmony_ci * small-bar uapi. These kernels only support systems where the 1659bf215546Sopenharmony_ci * entire vram is mappable. 1660bf215546Sopenharmony_ci */ 1661bf215546Sopenharmony_ci if (mem->unallocated_size != -1) { 1662bf215546Sopenharmony_ci devinfo->mem.vram.mappable.free = mem->unallocated_size; 1663bf215546Sopenharmony_ci devinfo->mem.vram.unmappable.free = 0; 1664bf215546Sopenharmony_ci } 1665bf215546Sopenharmony_ci } 1666bf215546Sopenharmony_ci break; 1667bf215546Sopenharmony_ci default: 1668bf215546Sopenharmony_ci break; 1669bf215546Sopenharmony_ci } 1670bf215546Sopenharmony_ci } 1671bf215546Sopenharmony_ci 1672bf215546Sopenharmony_ci free(meminfo); 1673bf215546Sopenharmony_ci devinfo->mem.use_class_instance = true; 1674bf215546Sopenharmony_ci return true; 1675bf215546Sopenharmony_ci} 1676bf215546Sopenharmony_ci 1677bf215546Sopenharmony_cistatic bool 1678bf215546Sopenharmony_cicompute_system_memory(struct intel_device_info *devinfo, bool update) 1679bf215546Sopenharmony_ci{ 1680bf215546Sopenharmony_ci uint64_t total_phys; 1681bf215546Sopenharmony_ci if (!os_get_total_physical_memory(&total_phys)) 1682bf215546Sopenharmony_ci return false; 1683bf215546Sopenharmony_ci 1684bf215546Sopenharmony_ci uint64_t available = 0; 1685bf215546Sopenharmony_ci os_get_available_system_memory(&available); 1686bf215546Sopenharmony_ci 1687bf215546Sopenharmony_ci if (!update) 1688bf215546Sopenharmony_ci devinfo->mem.sram.mappable.size = total_phys; 1689bf215546Sopenharmony_ci else 1690bf215546Sopenharmony_ci assert(devinfo->mem.sram.mappable.size == total_phys); 1691bf215546Sopenharmony_ci 1692bf215546Sopenharmony_ci devinfo->mem.sram.mappable.free = available; 1693bf215546Sopenharmony_ci 1694bf215546Sopenharmony_ci return true; 1695bf215546Sopenharmony_ci} 1696bf215546Sopenharmony_ci 1697bf215546Sopenharmony_cistatic int 1698bf215546Sopenharmony_ciintel_get_aperture_size(int fd, uint64_t *size) 1699bf215546Sopenharmony_ci{ 1700bf215546Sopenharmony_ci struct drm_i915_gem_get_aperture aperture = { 0 }; 1701bf215546Sopenharmony_ci 1702bf215546Sopenharmony_ci int ret = intel_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 1703bf215546Sopenharmony_ci if (ret == 0 && size) 1704bf215546Sopenharmony_ci *size = aperture.aper_size; 1705bf215546Sopenharmony_ci 1706bf215546Sopenharmony_ci return ret; 1707bf215546Sopenharmony_ci} 1708bf215546Sopenharmony_ci 1709bf215546Sopenharmony_cistatic bool 1710bf215546Sopenharmony_cihas_bit6_swizzle(int fd) 1711bf215546Sopenharmony_ci{ 1712bf215546Sopenharmony_ci struct drm_gem_close close; 1713bf215546Sopenharmony_ci int ret; 1714bf215546Sopenharmony_ci 1715bf215546Sopenharmony_ci struct drm_i915_gem_create gem_create = { 1716bf215546Sopenharmony_ci .size = 4096, 1717bf215546Sopenharmony_ci }; 1718bf215546Sopenharmony_ci 1719bf215546Sopenharmony_ci if (intel_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { 1720bf215546Sopenharmony_ci unreachable("Failed to create GEM BO"); 1721bf215546Sopenharmony_ci return false; 1722bf215546Sopenharmony_ci } 1723bf215546Sopenharmony_ci 1724bf215546Sopenharmony_ci bool swizzled = false; 1725bf215546Sopenharmony_ci 1726bf215546Sopenharmony_ci /* set_tiling overwrites the input on the error path, so we have to open 1727bf215546Sopenharmony_ci * code intel_ioctl. 1728bf215546Sopenharmony_ci */ 1729bf215546Sopenharmony_ci do { 1730bf215546Sopenharmony_ci struct drm_i915_gem_set_tiling set_tiling = { 1731bf215546Sopenharmony_ci .handle = gem_create.handle, 1732bf215546Sopenharmony_ci .tiling_mode = I915_TILING_X, 1733bf215546Sopenharmony_ci .stride = 512, 1734bf215546Sopenharmony_ci }; 1735bf215546Sopenharmony_ci 1736bf215546Sopenharmony_ci ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); 1737bf215546Sopenharmony_ci } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 1738bf215546Sopenharmony_ci 1739bf215546Sopenharmony_ci if (ret != 0) { 1740bf215546Sopenharmony_ci unreachable("Failed to set BO tiling"); 1741bf215546Sopenharmony_ci goto close_and_return; 1742bf215546Sopenharmony_ci } 1743bf215546Sopenharmony_ci 1744bf215546Sopenharmony_ci struct drm_i915_gem_get_tiling get_tiling = { 1745bf215546Sopenharmony_ci .handle = gem_create.handle, 1746bf215546Sopenharmony_ci }; 1747bf215546Sopenharmony_ci 1748bf215546Sopenharmony_ci if (intel_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) { 1749bf215546Sopenharmony_ci unreachable("Failed to get BO tiling"); 1750bf215546Sopenharmony_ci goto close_and_return; 1751bf215546Sopenharmony_ci } 1752bf215546Sopenharmony_ci 1753bf215546Sopenharmony_ci assert(get_tiling.tiling_mode == I915_TILING_X); 1754bf215546Sopenharmony_ci swizzled = get_tiling.swizzle_mode != I915_BIT_6_SWIZZLE_NONE; 1755bf215546Sopenharmony_ci 1756bf215546Sopenharmony_ciclose_and_return: 1757bf215546Sopenharmony_ci memset(&close, 0, sizeof(close)); 1758bf215546Sopenharmony_ci close.handle = gem_create.handle; 1759bf215546Sopenharmony_ci intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); 1760bf215546Sopenharmony_ci 1761bf215546Sopenharmony_ci return swizzled; 1762bf215546Sopenharmony_ci} 1763bf215546Sopenharmony_ci 1764bf215546Sopenharmony_cistatic bool 1765bf215546Sopenharmony_cihas_get_tiling(int fd) 1766bf215546Sopenharmony_ci{ 1767bf215546Sopenharmony_ci int ret; 1768bf215546Sopenharmony_ci 1769bf215546Sopenharmony_ci struct drm_i915_gem_create gem_create = { 1770bf215546Sopenharmony_ci .size = 4096, 1771bf215546Sopenharmony_ci }; 1772bf215546Sopenharmony_ci 1773bf215546Sopenharmony_ci if (intel_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { 1774bf215546Sopenharmony_ci unreachable("Failed to create GEM BO"); 1775bf215546Sopenharmony_ci return false; 1776bf215546Sopenharmony_ci } 1777bf215546Sopenharmony_ci 1778bf215546Sopenharmony_ci struct drm_i915_gem_get_tiling get_tiling = { 1779bf215546Sopenharmony_ci .handle = gem_create.handle, 1780bf215546Sopenharmony_ci }; 1781bf215546Sopenharmony_ci ret = intel_ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &get_tiling); 1782bf215546Sopenharmony_ci 1783bf215546Sopenharmony_ci struct drm_gem_close close = { 1784bf215546Sopenharmony_ci .handle = gem_create.handle, 1785bf215546Sopenharmony_ci }; 1786bf215546Sopenharmony_ci intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); 1787bf215546Sopenharmony_ci 1788bf215546Sopenharmony_ci return ret == 0; 1789bf215546Sopenharmony_ci} 1790bf215546Sopenharmony_ci 1791bf215546Sopenharmony_cistatic void 1792bf215546Sopenharmony_cifixup_chv_device_info(struct intel_device_info *devinfo) 1793bf215546Sopenharmony_ci{ 1794bf215546Sopenharmony_ci assert(devinfo->platform == INTEL_PLATFORM_CHV); 1795bf215546Sopenharmony_ci 1796bf215546Sopenharmony_ci /* Cherryview is annoying. The number of EUs is depending on fusing and 1797bf215546Sopenharmony_ci * isn't determinable from the PCI ID alone. We default to the minimum 1798bf215546Sopenharmony_ci * available for that PCI ID and then compute the real value from the 1799bf215546Sopenharmony_ci * subslice information we get from the kernel. 1800bf215546Sopenharmony_ci */ 1801bf215546Sopenharmony_ci const uint32_t subslice_total = intel_device_info_subslice_total(devinfo); 1802bf215546Sopenharmony_ci const uint32_t eu_total = intel_device_info_eu_total(devinfo); 1803bf215546Sopenharmony_ci 1804bf215546Sopenharmony_ci /* Logical CS threads = EUs per subslice * num threads per EU */ 1805bf215546Sopenharmony_ci uint32_t max_cs_threads = 1806bf215546Sopenharmony_ci eu_total / subslice_total * devinfo->num_thread_per_eu; 1807bf215546Sopenharmony_ci 1808bf215546Sopenharmony_ci /* Fuse configurations may give more threads than expected, never less. */ 1809bf215546Sopenharmony_ci if (max_cs_threads > devinfo->max_cs_threads) 1810bf215546Sopenharmony_ci devinfo->max_cs_threads = max_cs_threads; 1811bf215546Sopenharmony_ci 1812bf215546Sopenharmony_ci update_cs_workgroup_threads(devinfo); 1813bf215546Sopenharmony_ci 1814bf215546Sopenharmony_ci /* Braswell is even more annoying. Its marketing name isn't determinable 1815bf215546Sopenharmony_ci * from the PCI ID and is also dependent on fusing. 1816bf215546Sopenharmony_ci */ 1817bf215546Sopenharmony_ci if (devinfo->pci_device_id != 0x22B1) 1818bf215546Sopenharmony_ci return; 1819bf215546Sopenharmony_ci 1820bf215546Sopenharmony_ci char *bsw_model; 1821bf215546Sopenharmony_ci switch (eu_total) { 1822bf215546Sopenharmony_ci case 16: bsw_model = "405"; break; 1823bf215546Sopenharmony_ci case 12: bsw_model = "400"; break; 1824bf215546Sopenharmony_ci default: bsw_model = " "; break; 1825bf215546Sopenharmony_ci } 1826bf215546Sopenharmony_ci 1827bf215546Sopenharmony_ci char *needle = strstr(devinfo->name, "XXX"); 1828bf215546Sopenharmony_ci assert(needle); 1829bf215546Sopenharmony_ci if (needle) 1830bf215546Sopenharmony_ci memcpy(needle, bsw_model, 3); 1831bf215546Sopenharmony_ci} 1832bf215546Sopenharmony_ci 1833bf215546Sopenharmony_cistatic void 1834bf215546Sopenharmony_ciinit_max_scratch_ids(struct intel_device_info *devinfo) 1835bf215546Sopenharmony_ci{ 1836bf215546Sopenharmony_ci /* Determine the max number of subslices that potentially might be used in 1837bf215546Sopenharmony_ci * scratch space ids. 1838bf215546Sopenharmony_ci * 1839bf215546Sopenharmony_ci * For, Gfx11+, scratch space allocation is based on the number of threads 1840bf215546Sopenharmony_ci * in the base configuration. 1841bf215546Sopenharmony_ci * 1842bf215546Sopenharmony_ci * For Gfx9, devinfo->subslice_total is the TOTAL number of subslices and 1843bf215546Sopenharmony_ci * we wish to view that there are 4 subslices per slice instead of the 1844bf215546Sopenharmony_ci * actual number of subslices per slice. The documentation for 3DSTATE_PS 1845bf215546Sopenharmony_ci * "Scratch Space Base Pointer" says: 1846bf215546Sopenharmony_ci * 1847bf215546Sopenharmony_ci * "Scratch Space per slice is computed based on 4 sub-slices. SW 1848bf215546Sopenharmony_ci * must allocate scratch space enough so that each slice has 4 1849bf215546Sopenharmony_ci * slices allowed." 1850bf215546Sopenharmony_ci * 1851bf215546Sopenharmony_ci * According to the other driver team, this applies to compute shaders 1852bf215546Sopenharmony_ci * as well. This is not currently documented at all. 1853bf215546Sopenharmony_ci * 1854bf215546Sopenharmony_ci * For Gfx8 and older we user devinfo->subslice_total. 1855bf215546Sopenharmony_ci */ 1856bf215546Sopenharmony_ci unsigned subslices; 1857bf215546Sopenharmony_ci if (devinfo->verx10 == 125) 1858bf215546Sopenharmony_ci subslices = 32; 1859bf215546Sopenharmony_ci else if (devinfo->ver == 12) 1860bf215546Sopenharmony_ci subslices = (devinfo->platform == INTEL_PLATFORM_DG1 || devinfo->gt == 2 ? 6 : 2); 1861bf215546Sopenharmony_ci else if (devinfo->ver == 11) 1862bf215546Sopenharmony_ci subslices = 8; 1863bf215546Sopenharmony_ci else if (devinfo->ver >= 9 && devinfo->ver < 11) 1864bf215546Sopenharmony_ci subslices = 4 * devinfo->num_slices; 1865bf215546Sopenharmony_ci else 1866bf215546Sopenharmony_ci subslices = devinfo->subslice_total; 1867bf215546Sopenharmony_ci assert(subslices >= devinfo->subslice_total); 1868bf215546Sopenharmony_ci 1869bf215546Sopenharmony_ci unsigned scratch_ids_per_subslice; 1870bf215546Sopenharmony_ci if (devinfo->ver >= 12) { 1871bf215546Sopenharmony_ci /* Same as ICL below, but with 16 EUs. */ 1872bf215546Sopenharmony_ci scratch_ids_per_subslice = 16 * 8; 1873bf215546Sopenharmony_ci } else if (devinfo->ver >= 11) { 1874bf215546Sopenharmony_ci /* The MEDIA_VFE_STATE docs say: 1875bf215546Sopenharmony_ci * 1876bf215546Sopenharmony_ci * "Starting with this configuration, the Maximum Number of 1877bf215546Sopenharmony_ci * Threads must be set to (#EU * 8) for GPGPU dispatches. 1878bf215546Sopenharmony_ci * 1879bf215546Sopenharmony_ci * Although there are only 7 threads per EU in the configuration, 1880bf215546Sopenharmony_ci * the FFTID is calculated as if there are 8 threads per EU, 1881bf215546Sopenharmony_ci * which in turn requires a larger amount of Scratch Space to be 1882bf215546Sopenharmony_ci * allocated by the driver." 1883bf215546Sopenharmony_ci */ 1884bf215546Sopenharmony_ci scratch_ids_per_subslice = 8 * 8; 1885bf215546Sopenharmony_ci } else if (devinfo->platform == INTEL_PLATFORM_HSW) { 1886bf215546Sopenharmony_ci /* WaCSScratchSize:hsw 1887bf215546Sopenharmony_ci * 1888bf215546Sopenharmony_ci * Haswell's scratch space address calculation appears to be sparse 1889bf215546Sopenharmony_ci * rather than tightly packed. The Thread ID has bits indicating 1890bf215546Sopenharmony_ci * which subslice, EU within a subslice, and thread within an EU it 1891bf215546Sopenharmony_ci * is. There's a maximum of two slices and two subslices, so these 1892bf215546Sopenharmony_ci * can be stored with a single bit. Even though there are only 10 EUs 1893bf215546Sopenharmony_ci * per subslice, this is stored in 4 bits, so there's an effective 1894bf215546Sopenharmony_ci * maximum value of 16 EUs. Similarly, although there are only 7 1895bf215546Sopenharmony_ci * threads per EU, this is stored in a 3 bit number, giving an 1896bf215546Sopenharmony_ci * effective maximum value of 8 threads per EU. 1897bf215546Sopenharmony_ci * 1898bf215546Sopenharmony_ci * This means that we need to use 16 * 8 instead of 10 * 7 for the 1899bf215546Sopenharmony_ci * number of threads per subslice. 1900bf215546Sopenharmony_ci */ 1901bf215546Sopenharmony_ci scratch_ids_per_subslice = 16 * 8; 1902bf215546Sopenharmony_ci } else if (devinfo->platform == INTEL_PLATFORM_CHV) { 1903bf215546Sopenharmony_ci /* Cherryview devices have either 6 or 8 EUs per subslice, and each 1904bf215546Sopenharmony_ci * EU has 7 threads. The 6 EU devices appear to calculate thread IDs 1905bf215546Sopenharmony_ci * as if it had 8 EUs. 1906bf215546Sopenharmony_ci */ 1907bf215546Sopenharmony_ci scratch_ids_per_subslice = 8 * 7; 1908bf215546Sopenharmony_ci } else { 1909bf215546Sopenharmony_ci scratch_ids_per_subslice = devinfo->max_cs_threads; 1910bf215546Sopenharmony_ci } 1911bf215546Sopenharmony_ci 1912bf215546Sopenharmony_ci unsigned max_thread_ids = scratch_ids_per_subslice * subslices; 1913bf215546Sopenharmony_ci 1914bf215546Sopenharmony_ci if (devinfo->verx10 >= 125) { 1915bf215546Sopenharmony_ci /* On GFX version 12.5, scratch access changed to a surface-based model. 1916bf215546Sopenharmony_ci * Instead of each shader type having its own layout based on IDs passed 1917bf215546Sopenharmony_ci * from the relevant fixed-function unit, all scratch access is based on 1918bf215546Sopenharmony_ci * thread IDs like it always has been for compute. 1919bf215546Sopenharmony_ci */ 1920bf215546Sopenharmony_ci for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) 1921bf215546Sopenharmony_ci devinfo->max_scratch_ids[i] = max_thread_ids; 1922bf215546Sopenharmony_ci } else { 1923bf215546Sopenharmony_ci unsigned max_scratch_ids[] = { 1924bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, 1925bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads, 1926bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads, 1927bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, 1928bf215546Sopenharmony_ci [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, 1929bf215546Sopenharmony_ci [MESA_SHADER_COMPUTE] = max_thread_ids, 1930bf215546Sopenharmony_ci }; 1931bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(devinfo->max_scratch_ids) == sizeof(max_scratch_ids)); 1932bf215546Sopenharmony_ci memcpy(devinfo->max_scratch_ids, max_scratch_ids, 1933bf215546Sopenharmony_ci sizeof(devinfo->max_scratch_ids)); 1934bf215546Sopenharmony_ci } 1935bf215546Sopenharmony_ci} 1936bf215546Sopenharmony_ci 1937bf215546Sopenharmony_cibool 1938bf215546Sopenharmony_ciintel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo) 1939bf215546Sopenharmony_ci{ 1940bf215546Sopenharmony_ci /* Get PCI info. 1941bf215546Sopenharmony_ci * 1942bf215546Sopenharmony_ci * Some callers may already have a valid drm device which holds values of 1943bf215546Sopenharmony_ci * PCI fields queried here prior to calling this function. But making this 1944bf215546Sopenharmony_ci * query optional leads to a more cumbersome implementation. These callers 1945bf215546Sopenharmony_ci * still need to initialize the fields somewhere out of this function and 1946bf215546Sopenharmony_ci * rely on an ioctl to get PCI device id for the next step when skipping 1947bf215546Sopenharmony_ci * this drm query. 1948bf215546Sopenharmony_ci */ 1949bf215546Sopenharmony_ci drmDevicePtr drmdev = NULL; 1950bf215546Sopenharmony_ci if (drmGetDevice2(fd, DRM_DEVICE_GET_PCI_REVISION, &drmdev)) { 1951bf215546Sopenharmony_ci mesa_loge("Failed to query drm device."); 1952bf215546Sopenharmony_ci return false; 1953bf215546Sopenharmony_ci } 1954bf215546Sopenharmony_ci if (!intel_get_device_info_from_pci_id 1955bf215546Sopenharmony_ci (drmdev->deviceinfo.pci->device_id, devinfo)) { 1956bf215546Sopenharmony_ci drmFreeDevice(&drmdev); 1957bf215546Sopenharmony_ci return false; 1958bf215546Sopenharmony_ci } 1959bf215546Sopenharmony_ci devinfo->pci_domain = drmdev->businfo.pci->domain; 1960bf215546Sopenharmony_ci devinfo->pci_bus = drmdev->businfo.pci->bus; 1961bf215546Sopenharmony_ci devinfo->pci_dev = drmdev->businfo.pci->dev; 1962bf215546Sopenharmony_ci devinfo->pci_func = drmdev->businfo.pci->func; 1963bf215546Sopenharmony_ci devinfo->pci_device_id = drmdev->deviceinfo.pci->device_id; 1964bf215546Sopenharmony_ci devinfo->pci_revision_id = drmdev->deviceinfo.pci->revision_id; 1965bf215546Sopenharmony_ci drmFreeDevice(&drmdev); 1966bf215546Sopenharmony_ci devinfo->no_hw = env_var_as_boolean("INTEL_NO_HW", false); 1967bf215546Sopenharmony_ci 1968bf215546Sopenharmony_ci if (devinfo->ver == 10) { 1969bf215546Sopenharmony_ci mesa_loge("Gfx10 support is redacted."); 1970bf215546Sopenharmony_ci return false; 1971bf215546Sopenharmony_ci } 1972bf215546Sopenharmony_ci 1973bf215546Sopenharmony_ci /* remaining initializion queries the kernel for device info */ 1974bf215546Sopenharmony_ci if (devinfo->no_hw) { 1975bf215546Sopenharmony_ci /* Provide some sensible values for NO_HW. */ 1976bf215546Sopenharmony_ci devinfo->gtt_size = 1977bf215546Sopenharmony_ci devinfo->ver >= 8 ? (1ull << 48) : 2ull * 1024 * 1024 * 1024; 1978bf215546Sopenharmony_ci compute_system_memory(devinfo, false); 1979bf215546Sopenharmony_ci return true; 1980bf215546Sopenharmony_ci } 1981bf215546Sopenharmony_ci 1982bf215546Sopenharmony_ci if (intel_get_and_process_hwconfig_table(fd, devinfo)) { 1983bf215546Sopenharmony_ci /* After applying hwconfig values, some items need to be recalculated. */ 1984bf215546Sopenharmony_ci devinfo->max_cs_threads = 1985bf215546Sopenharmony_ci devinfo->max_eus_per_subslice * devinfo->num_thread_per_eu; 1986bf215546Sopenharmony_ci 1987bf215546Sopenharmony_ci update_cs_workgroup_threads(devinfo); 1988bf215546Sopenharmony_ci } 1989bf215546Sopenharmony_ci 1990bf215546Sopenharmony_ci int timestamp_frequency; 1991bf215546Sopenharmony_ci if (getparam(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY, 1992bf215546Sopenharmony_ci ×tamp_frequency)) 1993bf215546Sopenharmony_ci devinfo->timestamp_frequency = timestamp_frequency; 1994bf215546Sopenharmony_ci else if (devinfo->ver >= 10) { 1995bf215546Sopenharmony_ci mesa_loge("Kernel 4.15 required to read the CS timestamp frequency."); 1996bf215546Sopenharmony_ci return false; 1997bf215546Sopenharmony_ci } 1998bf215546Sopenharmony_ci 1999bf215546Sopenharmony_ci if (!getparam(fd, I915_PARAM_REVISION, &devinfo->revision)) 2000bf215546Sopenharmony_ci devinfo->revision = 0; 2001bf215546Sopenharmony_ci 2002bf215546Sopenharmony_ci if (!query_topology(devinfo, fd)) { 2003bf215546Sopenharmony_ci if (devinfo->ver >= 10) { 2004bf215546Sopenharmony_ci /* topology uAPI required for CNL+ (kernel 4.17+) */ 2005bf215546Sopenharmony_ci return false; 2006bf215546Sopenharmony_ci } 2007bf215546Sopenharmony_ci 2008bf215546Sopenharmony_ci /* else use the kernel 4.13+ api for gfx8+. For older kernels, topology 2009bf215546Sopenharmony_ci * will be wrong, affecting GPU metrics. In this case, fail silently. 2010bf215546Sopenharmony_ci */ 2011bf215546Sopenharmony_ci getparam_topology(devinfo, fd); 2012bf215546Sopenharmony_ci } 2013bf215546Sopenharmony_ci 2014bf215546Sopenharmony_ci /* If the memory region uAPI query is not available, try to generate some 2015bf215546Sopenharmony_ci * numbers out of os_* utils for sram only. 2016bf215546Sopenharmony_ci */ 2017bf215546Sopenharmony_ci if (!query_regions(devinfo, fd, false)) 2018bf215546Sopenharmony_ci compute_system_memory(devinfo, false); 2019bf215546Sopenharmony_ci 2020bf215546Sopenharmony_ci /* region info is required for lmem support */ 2021bf215546Sopenharmony_ci if (devinfo->has_local_mem && !devinfo->mem.use_class_instance) { 2022bf215546Sopenharmony_ci mesa_logw("Could not query local memory size."); 2023bf215546Sopenharmony_ci return false; 2024bf215546Sopenharmony_ci } 2025bf215546Sopenharmony_ci 2026bf215546Sopenharmony_ci if (devinfo->platform == INTEL_PLATFORM_CHV) 2027bf215546Sopenharmony_ci fixup_chv_device_info(devinfo); 2028bf215546Sopenharmony_ci 2029bf215546Sopenharmony_ci /* Broadwell PRM says: 2030bf215546Sopenharmony_ci * 2031bf215546Sopenharmony_ci * "Before Gfx8, there was a historical configuration control field to 2032bf215546Sopenharmony_ci * swizzle address bit[6] for in X/Y tiling modes. This was set in three 2033bf215546Sopenharmony_ci * different places: TILECTL[1:0], ARB_MODE[5:4], and 2034bf215546Sopenharmony_ci * DISP_ARB_CTL[14:13]. 2035bf215546Sopenharmony_ci * 2036bf215546Sopenharmony_ci * For Gfx8 and subsequent generations, the swizzle fields are all 2037bf215546Sopenharmony_ci * reserved, and the CPU's memory controller performs all address 2038bf215546Sopenharmony_ci * swizzling modifications." 2039bf215546Sopenharmony_ci */ 2040bf215546Sopenharmony_ci devinfo->has_bit6_swizzle = devinfo->ver < 8 && has_bit6_swizzle(fd); 2041bf215546Sopenharmony_ci 2042bf215546Sopenharmony_ci intel_get_aperture_size(fd, &devinfo->aperture_bytes); 2043bf215546Sopenharmony_ci get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE, &devinfo->gtt_size); 2044bf215546Sopenharmony_ci devinfo->has_tiling_uapi = has_get_tiling(fd); 2045bf215546Sopenharmony_ci 2046bf215546Sopenharmony_ci /* Gfx7 and older do not support EU/Subslice info */ 2047bf215546Sopenharmony_ci assert(devinfo->subslice_total >= 1 || devinfo->ver <= 7); 2048bf215546Sopenharmony_ci devinfo->subslice_total = MAX2(devinfo->subslice_total, 1); 2049bf215546Sopenharmony_ci 2050bf215546Sopenharmony_ci init_max_scratch_ids(devinfo); 2051bf215546Sopenharmony_ci 2052bf215546Sopenharmony_ci return true; 2053bf215546Sopenharmony_ci} 2054bf215546Sopenharmony_ci 2055bf215546Sopenharmony_cibool intel_device_info_update_memory_info(struct intel_device_info *devinfo, int fd) 2056bf215546Sopenharmony_ci{ 2057bf215546Sopenharmony_ci return query_regions(devinfo, fd, true) || compute_system_memory(devinfo, true); 2058bf215546Sopenharmony_ci} 2059