1/* 2 * Copyright 2010 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include <xf86drm.h> 24#include <nouveau_drm.h> 25#include <nvif/class.h> 26#include "util/format/u_format.h" 27#include "util/format/u_format_s3tc.h" 28#include "util/u_screen.h" 29#include "pipe/p_screen.h" 30 31#include "nouveau_vp3_video.h" 32 33#include "nv50_ir_driver.h" 34 35#include "nvc0/nvc0_context.h" 36#include "nvc0/nvc0_screen.h" 37 38#include "nvc0/mme/com9097.mme.h" 39#include "nvc0/mme/com90c0.mme.h" 40#include "nvc0/mme/comc597.mme.h" 41 42#include "nv50/g80_texture.xml.h" 43 44static bool 45nvc0_screen_is_format_supported(struct pipe_screen *pscreen, 46 enum pipe_format format, 47 enum pipe_texture_target target, 48 unsigned sample_count, 49 unsigned storage_sample_count, 50 unsigned bindings) 51{ 52 const struct util_format_description *desc = util_format_description(format); 53 54 if (sample_count > 8) 55 return false; 56 if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */ 57 return false; 58 59 if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) 60 return false; 61 62 /* Short-circuit the rest of the logic -- this is used by the gallium frontend 63 * to determine valid MS levels in a no-attachments scenario. 64 */ 65 if (format == PIPE_FORMAT_NONE && bindings & PIPE_BIND_RENDER_TARGET) 66 return true; 67 68 if ((bindings & PIPE_BIND_SAMPLER_VIEW) && (target != PIPE_BUFFER)) 69 if (util_format_get_blocksizebits(format) == 3 * 32) 70 return false; 71 72 if (bindings & PIPE_BIND_LINEAR) 73 if (util_format_is_depth_or_stencil(format) || 74 (target != PIPE_TEXTURE_1D && 75 target != PIPE_TEXTURE_2D && 76 target != PIPE_TEXTURE_RECT) || 77 sample_count > 1) 78 return false; 79 80 /* Restrict ETC2 and ASTC formats here. These are only supported on GK20A 81 * and GM20B. 82 */ 83 if ((desc->layout == UTIL_FORMAT_LAYOUT_ETC || 84 desc->layout == UTIL_FORMAT_LAYOUT_ASTC) && 85 nouveau_screen(pscreen)->device->chipset != 0x12b && 86 nouveau_screen(pscreen)->class_3d != NVEA_3D_CLASS) 87 return false; 88 89 /* shared is always supported */ 90 bindings &= ~(PIPE_BIND_LINEAR | 91 PIPE_BIND_SHARED); 92 93 if (bindings & PIPE_BIND_SHADER_IMAGE) { 94 if (format == PIPE_FORMAT_B8G8R8A8_UNORM && 95 nouveau_screen(pscreen)->class_3d < NVE4_3D_CLASS) { 96 /* This should work on Fermi, but for currently unknown reasons it 97 * does not and results in breaking reads from pbos. */ 98 return false; 99 } 100 } 101 102 if (bindings & PIPE_BIND_INDEX_BUFFER) { 103 if (format != PIPE_FORMAT_R8_UINT && 104 format != PIPE_FORMAT_R16_UINT && 105 format != PIPE_FORMAT_R32_UINT) 106 return false; 107 bindings &= ~PIPE_BIND_INDEX_BUFFER; 108 } 109 110 return (( nvc0_format_table[format].usage | 111 nvc0_vertex_format[format].usage) & bindings) == bindings; 112} 113 114static int 115nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) 116{ 117 const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; 118 const struct nouveau_screen *screen = nouveau_screen(pscreen); 119 struct nouveau_device *dev = screen->device; 120 static bool debug_cap_printed[PIPE_CAP_LAST] = {}; 121 122 switch (param) { 123 /* non-boolean caps */ 124 case PIPE_CAP_MAX_TEXTURE_2D_SIZE: 125 return 16384; 126 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: 127 return 15; 128 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: 129 return 12; 130 case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: 131 return 2048; 132 case PIPE_CAP_MIN_TEXEL_OFFSET: 133 return -8; 134 case PIPE_CAP_MAX_TEXEL_OFFSET: 135 return 7; 136 case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: 137 return -32; 138 case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: 139 return 31; 140 case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT: 141 return 128 * 1024 * 1024; 142 case PIPE_CAP_GLSL_FEATURE_LEVEL: 143 return 430; 144 case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: 145 return 430; 146 case PIPE_CAP_MAX_RENDER_TARGETS: 147 return 8; 148 case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: 149 return 1; 150 case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: 151 case PIPE_CAP_RASTERIZER_SUBPIXEL_BITS: 152 return 8; 153 case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: 154 return 4; 155 case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: 156 case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: 157 return 128; 158 case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: 159 case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: 160 return 1024; 161 case PIPE_CAP_MAX_VERTEX_STREAMS: 162 return 4; 163 case PIPE_CAP_MAX_GS_INVOCATIONS: 164 return 32; 165 case PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT: 166 return 1 << 27; 167 case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: 168 return 2048; 169 case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET: 170 return 2047; 171 case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: 172 return 256; 173 case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: 174 if (class_3d < GM107_3D_CLASS) 175 return 256; /* IMAGE bindings require alignment to 256 */ 176 return 16; 177 case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: 178 return 16; 179 case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: 180 return NOUVEAU_MIN_BUFFER_MAP_ALIGN; 181 case PIPE_CAP_MAX_VIEWPORTS: 182 return NVC0_MAX_VIEWPORTS; 183 case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: 184 return 4; 185 case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: 186 return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50; 187 case PIPE_CAP_ENDIANNESS: 188 return PIPE_ENDIAN_LITTLE; 189 case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: 190 return 30; 191 case PIPE_CAP_MAX_WINDOW_RECTANGLES: 192 return NVC0_MAX_WINDOW_RECTANGLES; 193 case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: 194 return class_3d >= GM200_3D_CLASS ? 8 : 0; 195 case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: 196 return 64 * 1024 * 1024; 197 case PIPE_CAP_MAX_VARYINGS: 198 /* NOTE: These only count our slots for GENERIC varyings. 199 * The address space may be larger, but the actual hard limit seems to be 200 * less than what the address space layout permits, so don't add TEXCOORD, 201 * COLOR, etc. here. 202 */ 203 return 0x1f0 / 16; 204 case PIPE_CAP_MAX_VERTEX_BUFFERS: 205 return 16; 206 case PIPE_CAP_GL_BEGIN_END_BUFFER_SIZE: 207 return 512 * 1024; /* TODO: Investigate tuning this */ 208 case PIPE_CAP_MAX_TEXTURE_MB: 209 return 0; /* TODO: use 1/2 of VRAM for this? */ 210 211 case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART: 212 case PIPE_CAP_SUPPORTED_PRIM_MODES: 213 return BITFIELD_MASK(PIPE_PRIM_MAX); 214 215 /* supported caps */ 216 case PIPE_CAP_TEXTURE_MIRROR_CLAMP: 217 case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: 218 case PIPE_CAP_TEXTURE_SWIZZLE: 219 case PIPE_CAP_TEXTURE_SHADOW_MAP: 220 case PIPE_CAP_NPOT_TEXTURES: 221 case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: 222 case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: 223 case PIPE_CAP_ANISOTROPIC_FILTER: 224 case PIPE_CAP_SEAMLESS_CUBE_MAP: 225 case PIPE_CAP_CUBE_MAP_ARRAY: 226 case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: 227 case PIPE_CAP_TEXTURE_MULTISAMPLE: 228 case PIPE_CAP_DEPTH_CLIP_DISABLE: 229 case PIPE_CAP_POINT_SPRITE: 230 case PIPE_CAP_TGSI_TEXCOORD: 231 case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD: 232 case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES: 233 case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: 234 case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: 235 case PIPE_CAP_VERTEX_COLOR_CLAMPED: 236 case PIPE_CAP_QUERY_TIMESTAMP: 237 case PIPE_CAP_QUERY_TIME_ELAPSED: 238 case PIPE_CAP_OCCLUSION_QUERY: 239 case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: 240 case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: 241 case PIPE_CAP_QUERY_PIPELINE_STATISTICS: 242 case PIPE_CAP_BLEND_EQUATION_SEPARATE: 243 case PIPE_CAP_INDEP_BLEND_ENABLE: 244 case PIPE_CAP_INDEP_BLEND_FUNC: 245 case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT: 246 case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER: 247 case PIPE_CAP_POINT_COORD_ORIGIN_UPPER_LEFT: 248 case PIPE_CAP_PRIMITIVE_RESTART: 249 case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX: 250 case PIPE_CAP_VS_INSTANCEID: 251 case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: 252 case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: 253 case PIPE_CAP_CONDITIONAL_RENDER: 254 case PIPE_CAP_TEXTURE_BARRIER: 255 case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: 256 case PIPE_CAP_START_INSTANCE: 257 case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: 258 case PIPE_CAP_DRAW_INDIRECT: 259 case PIPE_CAP_USER_VERTEX_BUFFERS: 260 case PIPE_CAP_TEXTURE_QUERY_LOD: 261 case PIPE_CAP_SAMPLE_SHADING: 262 case PIPE_CAP_TEXTURE_GATHER_OFFSETS: 263 case PIPE_CAP_TEXTURE_GATHER_SM5: 264 case PIPE_CAP_FS_FINE_DERIVATIVE: 265 case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: 266 case PIPE_CAP_SAMPLER_VIEW_TARGET: 267 case PIPE_CAP_CLIP_HALFZ: 268 case PIPE_CAP_POLYGON_OFFSET_CLAMP: 269 case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: 270 case PIPE_CAP_TEXTURE_FLOAT_LINEAR: 271 case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: 272 case PIPE_CAP_DEPTH_BOUNDS_TEST: 273 case PIPE_CAP_TEXTURE_QUERY_SAMPLES: 274 case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: 275 case PIPE_CAP_FORCE_PERSAMPLE_INTERP: 276 case PIPE_CAP_CLEAR_TEXTURE: 277 case PIPE_CAP_DRAW_PARAMETERS: 278 case PIPE_CAP_SHADER_PACK_HALF_FLOAT: 279 case PIPE_CAP_MULTI_DRAW_INDIRECT: 280 case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: 281 case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL: 282 case PIPE_CAP_QUERY_BUFFER_OBJECT: 283 case PIPE_CAP_INVALIDATE_BUFFER: 284 case PIPE_CAP_STRING_MARKER: 285 case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: 286 case PIPE_CAP_CULL_DISTANCE: 287 case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: 288 case PIPE_CAP_SHADER_GROUP_VOTE: 289 case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: 290 case PIPE_CAP_SHADER_ARRAY_COMPONENTS: 291 case PIPE_CAP_LEGACY_MATH_RULES: 292 case PIPE_CAP_DOUBLES: 293 case PIPE_CAP_INT64: 294 case PIPE_CAP_TGSI_TEX_TXF_LZ: 295 case PIPE_CAP_SHADER_CLOCK: 296 case PIPE_CAP_COMPUTE: 297 case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: 298 case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: 299 case PIPE_CAP_QUERY_SO_OVERFLOW: 300 case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: 301 case PIPE_CAP_TGSI_DIV: 302 case PIPE_CAP_IMAGE_ATOMIC_INC_WRAP: 303 case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION: 304 case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: 305 case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF: 306 case PIPE_CAP_FLATSHADE: 307 case PIPE_CAP_ALPHA_TEST: 308 case PIPE_CAP_POINT_SIZE_FIXED: 309 case PIPE_CAP_TWO_SIDED_COLOR: 310 case PIPE_CAP_CLIP_PLANES: 311 case PIPE_CAP_TEXTURE_SHADOW_LOD: 312 case PIPE_CAP_PACKED_STREAM_OUTPUT: 313 case PIPE_CAP_CLEAR_SCISSORED: 314 case PIPE_CAP_GL_CLAMP: 315 case PIPE_CAP_IMAGE_STORE_FORMATTED: 316 case PIPE_CAP_TEXRECT: 317 case PIPE_CAP_ALLOW_DYNAMIC_VAO_FASTPATH: 318 case PIPE_CAP_SHAREABLE_SHADERS: 319 case PIPE_CAP_PREFER_BACK_BUFFER_REUSE: 320 return 1; 321 case PIPE_CAP_TEXTURE_TRANSFER_MODES: 322 return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? PIPE_TEXTURE_TRANSFER_BLIT : 0; 323 case PIPE_CAP_FBFETCH: 324 return class_3d >= NVE4_3D_CLASS ? 1 : 0; /* needs testing on fermi */ 325 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: 326 case PIPE_CAP_SHADER_BALLOT: 327 return class_3d >= NVE4_3D_CLASS; 328 case PIPE_CAP_BINDLESS_TEXTURE: 329 return class_3d >= NVE4_3D_CLASS; 330 case PIPE_CAP_IMAGE_ATOMIC_FLOAT_ADD: 331 return class_3d < GM107_3D_CLASS; /* needs additional lowering */ 332 case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: 333 case PIPE_CAP_VS_LAYER_VIEWPORT: 334 case PIPE_CAP_TES_LAYER_VIEWPORT: 335 case PIPE_CAP_POST_DEPTH_COVERAGE: 336 case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES: 337 case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES: 338 case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE: 339 case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: 340 case PIPE_CAP_VIEWPORT_SWIZZLE: 341 case PIPE_CAP_VIEWPORT_MASK: 342 case PIPE_CAP_SAMPLER_REDUCTION_MINMAX: 343 return class_3d >= GM200_3D_CLASS; 344 case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: 345 return class_3d >= GP100_3D_CLASS; 346 case PIPE_CAP_RESOURCE_FROM_USER_MEMORY_COMPUTE_ONLY: 347 case PIPE_CAP_SYSTEM_SVM: 348 return screen->has_svm ? 1 : 0; 349 350 /* caps has to be turned on with nir */ 351 case PIPE_CAP_GL_SPIRV: 352 case PIPE_CAP_GL_SPIRV_VARIABLE_POINTERS: 353 case PIPE_CAP_INT64_DIVMOD: 354 return screen->prefer_nir ? 1 : 0; 355 356 /* nir related caps */ 357 case PIPE_CAP_NIR_IMAGES_AS_DEREF: 358 return 0; 359 360 /* unsupported caps */ 361 case PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART: 362 case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: 363 case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT: 364 case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER: 365 case PIPE_CAP_SHADER_STENCIL_EXPORT: 366 case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: 367 case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: 368 case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: 369 case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: 370 case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY: 371 case PIPE_CAP_FAKE_SW_MSAA: 372 case PIPE_CAP_VS_WINDOW_SPACE_POSITION: 373 case PIPE_CAP_VERTEXID_NOBASE: 374 case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: 375 case PIPE_CAP_FS_POSITION_IS_SYSVAL: 376 case PIPE_CAP_FS_POINT_IS_SYSVAL: 377 case PIPE_CAP_GENERATE_MIPMAP: 378 case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: 379 case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: 380 case PIPE_CAP_QUERY_MEMORY_INFO: 381 case PIPE_CAP_PCI_GROUP: 382 case PIPE_CAP_PCI_BUS: 383 case PIPE_CAP_PCI_DEVICE: 384 case PIPE_CAP_PCI_FUNCTION: 385 case PIPE_CAP_SHADER_CAN_READ_OUTPUTS: 386 case PIPE_CAP_NATIVE_FENCE_FD: 387 case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: 388 case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: 389 case PIPE_CAP_MEMOBJ: 390 case PIPE_CAP_LOAD_CONSTBUF: 391 case PIPE_CAP_TILE_RASTER_ORDER: 392 case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES: 393 case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS: 394 case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: 395 case PIPE_CAP_CONTEXT_PRIORITY_MASK: 396 case PIPE_CAP_FENCE_SIGNAL: 397 case PIPE_CAP_CONSTBUF0_FLAGS: 398 case PIPE_CAP_PACKED_UNIFORMS: 399 case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES: 400 case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS: 401 case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS: 402 case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS: 403 case PIPE_CAP_SURFACE_SAMPLE_COUNT: 404 case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE: 405 case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: 406 case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: 407 case PIPE_CAP_NIR_COMPACT_ARRAYS: 408 case PIPE_CAP_IMAGE_LOAD_FORMATTED: 409 case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES: 410 case PIPE_CAP_ATOMIC_FLOAT_MINMAX: 411 case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE: 412 case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK: 413 case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED: 414 case PIPE_CAP_FBFETCH_COHERENT: 415 case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE: 416 case PIPE_CAP_OPENCL_INTEGER_FUNCTIONS: /* could be done */ 417 case PIPE_CAP_INTEGER_MULTIPLY_32X16: /* could be done */ 418 case PIPE_CAP_FRONTEND_NOOP: 419 case PIPE_CAP_SHADER_SAMPLES_IDENTICAL: 420 case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED: 421 case PIPE_CAP_PSIZ_CLAMPED: 422 case PIPE_CAP_TEXTURE_BUFFER_SAMPLER: 423 case PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0: 424 case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE: /* when we fix MT stuff */ 425 case PIPE_CAP_ALPHA_TO_COVERAGE_DITHER_CONTROL: /* TODO */ 426 case PIPE_CAP_SHADER_ATOMIC_INT64: /* TODO */ 427 case PIPE_CAP_GLSL_ZERO_INIT: 428 case PIPE_CAP_BLEND_EQUATION_ADVANCED: 429 case PIPE_CAP_NO_CLIP_ON_COPY_TEX: 430 case PIPE_CAP_DEVICE_PROTECTED_CONTENT: 431 case PIPE_CAP_SAMPLER_REDUCTION_MINMAX_ARB: 432 case PIPE_CAP_DRAW_VERTEX_STATE: 433 case PIPE_CAP_PREFER_POT_ALIGNED_VARYINGS: 434 case PIPE_CAP_MAX_SPARSE_TEXTURE_SIZE: 435 case PIPE_CAP_MAX_SPARSE_3D_TEXTURE_SIZE: 436 case PIPE_CAP_MAX_SPARSE_ARRAY_TEXTURE_LAYERS: 437 case PIPE_CAP_SPARSE_TEXTURE_FULL_ARRAY_CUBE_MIPMAPS: 438 case PIPE_CAP_QUERY_SPARSE_TEXTURE_RESIDENCY: 439 case PIPE_CAP_CLAMP_SPARSE_TEXTURE_LOD: 440 case PIPE_CAP_HARDWARE_GL_SELECT: 441 return 0; 442 443 case PIPE_CAP_VENDOR_ID: 444 return 0x10de; 445 case PIPE_CAP_DEVICE_ID: { 446 uint64_t device_id; 447 if (nouveau_getparam(dev, NOUVEAU_GETPARAM_PCI_DEVICE, &device_id)) { 448 NOUVEAU_ERR("NOUVEAU_GETPARAM_PCI_DEVICE failed.\n"); 449 return -1; 450 } 451 return device_id; 452 } 453 case PIPE_CAP_ACCELERATED: 454 return 1; 455 case PIPE_CAP_VIDEO_MEMORY: 456 return dev->vram_size >> 20; 457 case PIPE_CAP_UMA: 458 return 0; 459 460 default: 461 if (!debug_cap_printed[param]) { 462 debug_printf("%s: unhandled cap %d\n", __func__, param); 463 debug_cap_printed[param] = true; 464 } 465 FALLTHROUGH; 466 /* caps where we want the default value */ 467 case PIPE_CAP_DMABUF: 468 case PIPE_CAP_ESSL_FEATURE_LEVEL: 469 case PIPE_CAP_THROTTLE: 470 return u_pipe_screen_get_param_defaults(pscreen, param); 471 } 472} 473 474static int 475nvc0_screen_get_shader_param(struct pipe_screen *pscreen, 476 enum pipe_shader_type shader, 477 enum pipe_shader_cap param) 478{ 479 const struct nouveau_screen *screen = nouveau_screen(pscreen); 480 const uint16_t class_3d = screen->class_3d; 481 482 switch (shader) { 483 case PIPE_SHADER_VERTEX: 484 case PIPE_SHADER_GEOMETRY: 485 case PIPE_SHADER_FRAGMENT: 486 case PIPE_SHADER_COMPUTE: 487 case PIPE_SHADER_TESS_CTRL: 488 case PIPE_SHADER_TESS_EVAL: 489 break; 490 default: 491 return 0; 492 } 493 494 switch (param) { 495 case PIPE_SHADER_CAP_PREFERRED_IR: 496 return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; 497 case PIPE_SHADER_CAP_SUPPORTED_IRS: { 498 uint32_t irs = 1 << PIPE_SHADER_IR_NIR | 499 ((class_3d >= GV100_3D_CLASS) ? 0 : 1 << PIPE_SHADER_IR_TGSI); 500 if (screen->force_enable_cl) 501 irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED; 502 return irs; 503 } 504 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: 505 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: 506 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: 507 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: 508 return 16384; 509 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: 510 return 16; 511 case PIPE_SHADER_CAP_MAX_INPUTS: 512 return 0x200 / 16; 513 case PIPE_SHADER_CAP_MAX_OUTPUTS: 514 return 32; 515 case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE: 516 return NVC0_MAX_CONSTBUF_SIZE; 517 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: 518 return NVC0_MAX_PIPE_CONSTBUFS; 519 case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: 520 return shader != PIPE_SHADER_FRAGMENT; 521 case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: 522 /* HW doesn't support indirect addressing of fragment program inputs 523 * on Volta. The binary driver generates a function to handle every 524 * possible indirection, and indirectly calls the function to handle 525 * this instead. 526 */ 527 if (class_3d >= GV100_3D_CLASS) 528 return shader != PIPE_SHADER_FRAGMENT; 529 return 1; 530 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: 531 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: 532 return 1; 533 case PIPE_SHADER_CAP_MAX_TEMPS: 534 return NVC0_CAP_MAX_PROGRAM_TEMPS; 535 case PIPE_SHADER_CAP_CONT_SUPPORTED: 536 return 1; 537 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: 538 return 1; 539 case PIPE_SHADER_CAP_SUBROUTINES: 540 return 1; 541 case PIPE_SHADER_CAP_INTEGERS: 542 return 1; 543 case PIPE_SHADER_CAP_DROUND_SUPPORTED: 544 return 1; 545 case PIPE_SHADER_CAP_DFRACEXP_DLDEXP_SUPPORTED: 546 case PIPE_SHADER_CAP_LDEXP_SUPPORTED: 547 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: 548 case PIPE_SHADER_CAP_INT64_ATOMICS: 549 case PIPE_SHADER_CAP_FP16: 550 case PIPE_SHADER_CAP_FP16_DERIVATIVES: 551 case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: 552 case PIPE_SHADER_CAP_INT16: 553 case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS: 554 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: 555 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: 556 return 0; 557 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: 558 return NVC0_MAX_BUFFERS; 559 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: 560 return (class_3d >= NVE4_3D_CLASS) ? 32 : 16; 561 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: 562 return (class_3d >= NVE4_3D_CLASS) ? 32 : 16; 563 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: 564 if (class_3d >= NVE4_3D_CLASS) 565 return NVC0_MAX_IMAGES; 566 if (shader == PIPE_SHADER_FRAGMENT || shader == PIPE_SHADER_COMPUTE) 567 return NVC0_MAX_IMAGES; 568 return 0; 569 default: 570 NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); 571 return 0; 572 } 573} 574 575static float 576nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) 577{ 578 const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; 579 580 switch (param) { 581 case PIPE_CAPF_MIN_LINE_WIDTH: 582 case PIPE_CAPF_MIN_LINE_WIDTH_AA: 583 case PIPE_CAPF_MIN_POINT_SIZE: 584 case PIPE_CAPF_MIN_POINT_SIZE_AA: 585 return 1; 586 case PIPE_CAPF_POINT_SIZE_GRANULARITY: 587 case PIPE_CAPF_LINE_WIDTH_GRANULARITY: 588 return 0.1; 589 case PIPE_CAPF_MAX_LINE_WIDTH: 590 case PIPE_CAPF_MAX_LINE_WIDTH_AA: 591 return 10.0f; 592 case PIPE_CAPF_MAX_POINT_SIZE: 593 return 63.0f; 594 case PIPE_CAPF_MAX_POINT_SIZE_AA: 595 return 63.375f; 596 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: 597 return 16.0f; 598 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: 599 return 15.0f; 600 case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE: 601 return 0.0f; 602 case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE: 603 return class_3d >= GM200_3D_CLASS ? 0.75f : 0.0f; 604 case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY: 605 return class_3d >= GM200_3D_CLASS ? 0.25f : 0.0f; 606 } 607 608 NOUVEAU_ERR("unknown PIPE_CAPF %d\n", param); 609 return 0.0f; 610} 611 612static int 613nvc0_screen_get_compute_param(struct pipe_screen *pscreen, 614 enum pipe_shader_ir ir_type, 615 enum pipe_compute_cap param, void *data) 616{ 617 struct nvc0_screen *screen = nvc0_screen(pscreen); 618 const uint16_t obj_class = screen->compute->oclass; 619 620#define RET(x) do { \ 621 if (data) \ 622 memcpy(data, x, sizeof(x)); \ 623 return sizeof(x); \ 624} while (0) 625 626 switch (param) { 627 case PIPE_COMPUTE_CAP_GRID_DIMENSION: 628 RET((uint64_t []) { 3 }); 629 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: 630 if (obj_class >= NVE4_COMPUTE_CLASS) { 631 RET(((uint64_t []) { 0x7fffffff, 65535, 65535 })); 632 } else { 633 RET(((uint64_t []) { 65535, 65535, 65535 })); 634 } 635 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: 636 RET(((uint64_t []) { 1024, 1024, 64 })); 637 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: 638 RET((uint64_t []) { 1024 }); 639 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: 640 if (obj_class >= NVE4_COMPUTE_CLASS) { 641 RET((uint64_t []) { 1024 }); 642 } else { 643 RET((uint64_t []) { 512 }); 644 } 645 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */ 646 RET((uint64_t []) { 1ULL << 40 }); 647 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ 648 switch (obj_class) { 649 case GM200_COMPUTE_CLASS: 650 RET((uint64_t []) { 96 << 10 }); 651 case GM107_COMPUTE_CLASS: 652 RET((uint64_t []) { 64 << 10 }); 653 default: 654 RET((uint64_t []) { 48 << 10 }); 655 } 656 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ 657 RET((uint64_t []) { 512 << 10 }); 658 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ 659 RET((uint64_t []) { 4096 }); 660 case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: 661 RET((uint32_t []) { 32 }); 662 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: 663 RET((uint64_t []) { 1ULL << 40 }); 664 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: 665 RET((uint32_t []) { NVC0_MAX_IMAGES }); 666 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: 667 RET((uint32_t []) { screen->mp_count_compute }); 668 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: 669 RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */ 670 case PIPE_COMPUTE_CAP_ADDRESS_BITS: 671 RET((uint32_t []) { 64 }); 672 default: 673 return 0; 674 } 675 676#undef RET 677} 678 679static void 680nvc0_screen_get_sample_pixel_grid(struct pipe_screen *pscreen, 681 unsigned sample_count, 682 unsigned *width, unsigned *height) 683{ 684 switch (sample_count) { 685 case 0: 686 case 1: 687 /* this could be 4x4, but the GL state tracker makes it difficult to 688 * create a 1x MSAA texture and smaller grids save CB space */ 689 *width = 2; 690 *height = 4; 691 break; 692 case 2: 693 *width = 2; 694 *height = 4; 695 break; 696 case 4: 697 *width = 2; 698 *height = 2; 699 break; 700 case 8: 701 *width = 1; 702 *height = 2; 703 break; 704 default: 705 assert(0); 706 } 707} 708 709static void 710nvc0_screen_destroy(struct pipe_screen *pscreen) 711{ 712 struct nvc0_screen *screen = nvc0_screen(pscreen); 713 714 if (!nouveau_drm_screen_unref(&screen->base)) 715 return; 716 717 nouveau_fence_cleanup(&screen->base); 718 719 if (screen->base.pushbuf) 720 screen->base.pushbuf->user_priv = NULL; 721 722 if (screen->blitter) 723 nvc0_blitter_destroy(screen); 724 if (screen->pm.prog) { 725 screen->pm.prog->code = NULL; /* hardcoded, don't FREE */ 726 nvc0_program_destroy(NULL, screen->pm.prog); 727 FREE(screen->pm.prog); 728 } 729 730 nouveau_bo_ref(NULL, &screen->text); 731 nouveau_bo_ref(NULL, &screen->uniform_bo); 732 nouveau_bo_ref(NULL, &screen->tls); 733 nouveau_bo_ref(NULL, &screen->txc); 734 nouveau_bo_ref(NULL, &screen->fence.bo); 735 nouveau_bo_ref(NULL, &screen->poly_cache); 736 737 nouveau_heap_destroy(&screen->lib_code); 738 nouveau_heap_destroy(&screen->text_heap); 739 740 FREE(screen->tic.entries); 741 742 nouveau_object_del(&screen->eng3d); 743 nouveau_object_del(&screen->eng2d); 744 nouveau_object_del(&screen->m2mf); 745 nouveau_object_del(&screen->compute); 746 nouveau_object_del(&screen->nvsw); 747 748 nouveau_screen_fini(&screen->base); 749 750 FREE(screen); 751} 752 753static int 754nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, 755 unsigned size, const uint32_t *data) 756{ 757 struct nouveau_pushbuf *push = screen->base.pushbuf; 758 759 size /= 4; 760 761 assert((pos + size) <= 0x800); 762 763 BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2); 764 PUSH_DATA (push, (m - 0x3800) / 8); 765 PUSH_DATA (push, pos); 766 BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1); 767 PUSH_DATA (push, pos); 768 PUSH_DATAp(push, data, size); 769 770 return pos + size; 771} 772 773static int 774tu102_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, 775 unsigned size, const uint32_t *data) 776{ 777 struct nouveau_pushbuf *push = screen->base.pushbuf; 778 779 size /= 4; 780 781 assert((pos + size) <= 0x800); 782 783 BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2); 784 PUSH_DATA (push, (m - 0x3800) / 8); 785 PUSH_DATA (push, pos); 786 BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1); 787 PUSH_DATA (push, pos); 788 PUSH_DATAp(push, data, size); 789 790 return pos + (size / 3); 791} 792 793static void 794nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) 795{ 796 BEGIN_NVC0(push, SUBC_3D(0x10cc), 1); 797 PUSH_DATA (push, 0xff); 798 BEGIN_NVC0(push, SUBC_3D(0x10e0), 2); 799 PUSH_DATA (push, 0xff); 800 PUSH_DATA (push, 0xff); 801 BEGIN_NVC0(push, SUBC_3D(0x10ec), 2); 802 PUSH_DATA (push, 0xff); 803 PUSH_DATA (push, 0xff); 804 if (obj_class < GV100_3D_CLASS) { 805 BEGIN_NVC0(push, SUBC_3D(0x074c), 1); 806 PUSH_DATA (push, 0x3f); 807 } 808 809 BEGIN_NVC0(push, SUBC_3D(0x16a8), 1); 810 PUSH_DATA (push, (3 << 16) | 3); 811 BEGIN_NVC0(push, SUBC_3D(0x1794), 1); 812 PUSH_DATA (push, (2 << 16) | 2); 813 814 if (obj_class < GM107_3D_CLASS) { 815 BEGIN_NVC0(push, SUBC_3D(0x12ac), 1); 816 PUSH_DATA (push, 0); 817 } 818 BEGIN_NVC0(push, SUBC_3D(0x0218), 1); 819 PUSH_DATA (push, 0x10); 820 BEGIN_NVC0(push, SUBC_3D(0x10fc), 1); 821 PUSH_DATA (push, 0x10); 822 BEGIN_NVC0(push, SUBC_3D(0x1290), 1); 823 PUSH_DATA (push, 0x10); 824 BEGIN_NVC0(push, SUBC_3D(0x12d8), 2); 825 PUSH_DATA (push, 0x10); 826 PUSH_DATA (push, 0x10); 827 BEGIN_NVC0(push, SUBC_3D(0x1140), 1); 828 PUSH_DATA (push, 0x10); 829 BEGIN_NVC0(push, SUBC_3D(0x1610), 1); 830 PUSH_DATA (push, 0xe); 831 832 BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_GEN_MODE), 1); 833 PUSH_DATA (push, NVC0_3D_VERTEX_ID_GEN_MODE_DRAW_ARRAYS_ADD_START); 834 BEGIN_NVC0(push, SUBC_3D(0x030c), 1); 835 PUSH_DATA (push, 0); 836 BEGIN_NVC0(push, SUBC_3D(0x0300), 1); 837 PUSH_DATA (push, 3); 838 839 if (obj_class < GV100_3D_CLASS) { 840 BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); 841 PUSH_DATA (push, 0x3fffff); 842 } 843 BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1); 844 PUSH_DATA (push, 1); 845 BEGIN_NVC0(push, SUBC_3D(0x19c0), 1); 846 PUSH_DATA (push, 1); 847 848 if (obj_class < GM107_3D_CLASS) { 849 BEGIN_NVC0(push, SUBC_3D(0x075c), 1); 850 PUSH_DATA (push, 3); 851 852 if (obj_class >= NVE4_3D_CLASS) { 853 BEGIN_NVC0(push, SUBC_3D(0x07fc), 1); 854 PUSH_DATA (push, 1); 855 } 856 } 857 858 /* TODO: find out what software methods 0x1528, 0x1280 and (on nve4) 0x02dc 859 * are supposed to do */ 860} 861 862static void 863nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence) 864{ 865 struct nvc0_screen *screen = nvc0_screen(pscreen); 866 struct nouveau_pushbuf *push = screen->base.pushbuf; 867 868 /* we need to do it after possible flush in MARK_RING */ 869 *sequence = ++screen->base.fence.sequence; 870 871 assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5); 872 PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4)); 873 PUSH_DATAh(push, screen->fence.bo->offset); 874 PUSH_DATA (push, screen->fence.bo->offset); 875 PUSH_DATA (push, *sequence); 876 PUSH_DATA (push, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT | 877 (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT)); 878} 879 880static u32 881nvc0_screen_fence_update(struct pipe_screen *pscreen) 882{ 883 struct nvc0_screen *screen = nvc0_screen(pscreen); 884 return screen->fence.map[0]; 885} 886 887static int 888nvc0_screen_init_compute(struct nvc0_screen *screen) 889{ 890 screen->base.base.get_compute_param = nvc0_screen_get_compute_param; 891 892 switch (screen->base.device->chipset & ~0xf) { 893 case 0xc0: 894 case 0xd0: 895 return nvc0_screen_compute_setup(screen, screen->base.pushbuf); 896 case 0xe0: 897 case 0xf0: 898 case 0x100: 899 case 0x110: 900 case 0x120: 901 case 0x130: 902 case 0x140: 903 case 0x160: 904 return nve4_screen_compute_setup(screen, screen->base.pushbuf); 905 default: 906 return -1; 907 } 908} 909 910static int 911nvc0_screen_resize_tls_area(struct nvc0_screen *screen, 912 uint32_t lpos, uint32_t lneg, uint32_t cstack) 913{ 914 struct nouveau_bo *bo = NULL; 915 int ret; 916 uint64_t size = (lpos + lneg) * 32 + cstack; 917 918 if (size >= (1 << 20)) { 919 NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size); 920 return -1; 921 } 922 923 size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */ 924 size = align(size, 0x8000); 925 size *= screen->mp_count; 926 927 size = align(size, 1 << 17); 928 929 ret = nouveau_bo_new(screen->base.device, NV_VRAM_DOMAIN(&screen->base), 1 << 17, size, 930 NULL, &bo); 931 if (ret) 932 return ret; 933 934 /* Make sure that the pushbuf has acquired a reference to the old tls 935 * segment, as it may have commands that will reference it. 936 */ 937 if (screen->tls) 938 PUSH_REFN(screen->base.pushbuf, screen->tls, 939 NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RDWR); 940 nouveau_bo_ref(NULL, &screen->tls); 941 screen->tls = bo; 942 return 0; 943} 944 945int 946nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size) 947{ 948 struct nouveau_pushbuf *push = screen->base.pushbuf; 949 struct nouveau_bo *bo; 950 int ret; 951 952 ret = nouveau_bo_new(screen->base.device, NV_VRAM_DOMAIN(&screen->base), 953 1 << 17, size, NULL, &bo); 954 if (ret) 955 return ret; 956 957 /* Make sure that the pushbuf has acquired a reference to the old text 958 * segment, as it may have commands that will reference it. 959 */ 960 if (screen->text) 961 PUSH_REFN(push, screen->text, 962 NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD); 963 nouveau_bo_ref(NULL, &screen->text); 964 screen->text = bo; 965 966 nouveau_heap_destroy(&screen->lib_code); 967 nouveau_heap_destroy(&screen->text_heap); 968 969 /* XXX: getting a page fault at the end of the code buffer every few 970 * launches, don't use the last 256 bytes to work around them - prefetch ? 971 */ 972 nouveau_heap_init(&screen->text_heap, 0, size - 0x100); 973 974 /* update the code segment setup */ 975 if (screen->eng3d->oclass < GV100_3D_CLASS) { 976 BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); 977 PUSH_DATAh(push, screen->text->offset); 978 PUSH_DATA (push, screen->text->offset); 979 if (screen->compute) { 980 BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); 981 PUSH_DATAh(push, screen->text->offset); 982 PUSH_DATA (push, screen->text->offset); 983 } 984 } 985 986 return 0; 987} 988 989void 990nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize, 991 int stage, int index, int size, uint64_t addr) 992{ 993 assert(stage != 5); 994 995 struct nouveau_pushbuf *push = screen->base.pushbuf; 996 997 if (screen->base.class_3d >= GM107_3D_CLASS) { 998 struct nvc0_cb_binding *binding = &screen->cb_bindings[stage][index]; 999 1000 // TODO: Better figure out the conditions in which this is needed 1001 bool serialize = binding->addr == addr && binding->size != size; 1002 if (can_serialize) 1003 serialize = serialize && *can_serialize; 1004 if (serialize) { 1005 IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); 1006 if (can_serialize) 1007 *can_serialize = false; 1008 } 1009 1010 binding->addr = addr; 1011 binding->size = size; 1012 } 1013 1014 if (size >= 0) { 1015 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); 1016 PUSH_DATA (push, size); 1017 PUSH_DATAh(push, addr); 1018 PUSH_DATA (push, addr); 1019 } 1020 IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0)); 1021} 1022 1023static const void * 1024nvc0_screen_get_compiler_options(struct pipe_screen *pscreen, 1025 enum pipe_shader_ir ir, 1026 enum pipe_shader_type shader) 1027{ 1028 struct nvc0_screen *screen = nvc0_screen(pscreen); 1029 if (ir == PIPE_SHADER_IR_NIR) 1030 return nv50_ir_nir_shader_compiler_options(screen->base.device->chipset, 1031 shader); 1032 return NULL; 1033} 1034 1035#define FAIL_SCREEN_INIT(str, err) \ 1036 do { \ 1037 NOUVEAU_ERR(str, err); \ 1038 goto fail; \ 1039 } while(0) 1040 1041struct nouveau_screen * 1042nvc0_screen_create(struct nouveau_device *dev) 1043{ 1044 struct nvc0_screen *screen; 1045 struct pipe_screen *pscreen; 1046 struct nouveau_object *chan; 1047 struct nouveau_pushbuf *push; 1048 uint64_t value; 1049 uint32_t obj_class; 1050 uint32_t flags; 1051 int ret; 1052 unsigned i; 1053 1054 switch (dev->chipset & ~0xf) { 1055 case 0xc0: 1056 case 0xd0: 1057 case 0xe0: 1058 case 0xf0: 1059 case 0x100: 1060 case 0x110: 1061 case 0x120: 1062 case 0x130: 1063 case 0x140: 1064 case 0x160: 1065 break; 1066 default: 1067 return NULL; 1068 } 1069 1070 screen = CALLOC_STRUCT(nvc0_screen); 1071 if (!screen) 1072 return NULL; 1073 pscreen = &screen->base.base; 1074 pscreen->destroy = nvc0_screen_destroy; 1075 1076 ret = nouveau_screen_init(&screen->base, dev); 1077 if (ret) 1078 FAIL_SCREEN_INIT("Base screen init failed: %d\n", ret); 1079 chan = screen->base.channel; 1080 push = screen->base.pushbuf; 1081 push->user_priv = screen; 1082 push->rsvd_kick = 5; 1083 1084 /* TODO: could this be higher on Kepler+? how does reclocking vs no 1085 * reclocking affect performance? 1086 * TODO: could this be higher on Fermi? 1087 */ 1088 if (dev->chipset >= 0xe0) 1089 screen->base.transfer_pushbuf_threshold = 1024; 1090 1091 screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER | 1092 PIPE_BIND_SHADER_BUFFER | 1093 PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | 1094 PIPE_BIND_COMMAND_ARGS_BUFFER | PIPE_BIND_QUERY_BUFFER; 1095 screen->base.sysmem_bindings |= 1096 PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; 1097 1098 if (screen->base.vram_domain & NOUVEAU_BO_GART) { 1099 screen->base.sysmem_bindings |= screen->base.vidmem_bindings; 1100 screen->base.vidmem_bindings = 0; 1101 } 1102 1103 pscreen->context_create = nvc0_create; 1104 pscreen->is_format_supported = nvc0_screen_is_format_supported; 1105 pscreen->get_param = nvc0_screen_get_param; 1106 pscreen->get_shader_param = nvc0_screen_get_shader_param; 1107 pscreen->get_paramf = nvc0_screen_get_paramf; 1108 pscreen->get_sample_pixel_grid = nvc0_screen_get_sample_pixel_grid; 1109 pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info; 1110 pscreen->get_driver_query_group_info = nvc0_screen_get_driver_query_group_info; 1111 /* nir stuff */ 1112 pscreen->get_compiler_options = nvc0_screen_get_compiler_options; 1113 1114 nvc0_screen_init_resource_functions(pscreen); 1115 1116 screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param; 1117 screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported; 1118 1119 flags = NOUVEAU_BO_GART | NOUVEAU_BO_MAP; 1120 if (screen->base.drm->version >= 0x01000202) 1121 flags |= NOUVEAU_BO_COHERENT; 1122 1123 ret = nouveau_bo_new(dev, flags, 0, 4096, NULL, &screen->fence.bo); 1124 if (ret) 1125 FAIL_SCREEN_INIT("Error allocating fence BO: %d\n", ret); 1126 nouveau_bo_map(screen->fence.bo, 0, NULL); 1127 screen->fence.map = screen->fence.bo->map; 1128 screen->base.fence.emit = nvc0_screen_fence_emit; 1129 screen->base.fence.update = nvc0_screen_fence_update; 1130 1131 if (dev->chipset < 0x140) { 1132 ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e, 1133 NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw); 1134 if (ret) 1135 FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret); 1136 1137 BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); 1138 PUSH_DATA (push, screen->nvsw->handle); 1139 } 1140 1141 switch (dev->chipset & ~0xf) { 1142 case 0x160: 1143 case 0x140: 1144 case 0x130: 1145 case 0x120: 1146 case 0x110: 1147 case 0x100: 1148 case 0xf0: 1149 obj_class = NVF0_P2MF_CLASS; 1150 break; 1151 case 0xe0: 1152 obj_class = NVE4_P2MF_CLASS; 1153 break; 1154 default: 1155 obj_class = NVC0_M2MF_CLASS; 1156 break; 1157 } 1158 ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0, 1159 &screen->m2mf); 1160 if (ret) 1161 FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); 1162 1163 BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); 1164 PUSH_DATA (push, screen->m2mf->oclass); 1165 if (screen->m2mf->oclass == NVE4_P2MF_CLASS) { 1166 BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1); 1167 PUSH_DATA (push, NVE4_COPY_CLASS); 1168 } 1169 1170 ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0, 1171 &screen->eng2d); 1172 if (ret) 1173 FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret); 1174 1175 BEGIN_NVC0(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1); 1176 PUSH_DATA (push, screen->eng2d->oclass); 1177 BEGIN_NVC0(push, SUBC_2D(NVC0_2D_SINGLE_GPC), 1); 1178 PUSH_DATA (push, 0); 1179 BEGIN_NVC0(push, NVC0_2D(OPERATION), 1); 1180 PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY); 1181 BEGIN_NVC0(push, NVC0_2D(CLIP_ENABLE), 1); 1182 PUSH_DATA (push, 0); 1183 BEGIN_NVC0(push, NVC0_2D(COLOR_KEY_ENABLE), 1); 1184 PUSH_DATA (push, 0); 1185 BEGIN_NVC0(push, NVC0_2D(SET_PIXELS_FROM_MEMORY_CORRAL_SIZE), 1); 1186 PUSH_DATA (push, 0x3f); 1187 BEGIN_NVC0(push, NVC0_2D(SET_PIXELS_FROM_MEMORY_SAFE_OVERLAP), 1); 1188 PUSH_DATA (push, 1); 1189 BEGIN_NVC0(push, NVC0_2D(COND_MODE), 1); 1190 PUSH_DATA (push, NV50_2D_COND_MODE_ALWAYS); 1191 1192 BEGIN_NVC0(push, SUBC_2D(NVC0_GRAPH_NOTIFY_ADDRESS_HIGH), 2); 1193 PUSH_DATAh(push, screen->fence.bo->offset + 16); 1194 PUSH_DATA (push, screen->fence.bo->offset + 16); 1195 1196 switch (dev->chipset & ~0xf) { 1197 case 0x160: 1198 obj_class = TU102_3D_CLASS; 1199 break; 1200 case 0x140: 1201 obj_class = GV100_3D_CLASS; 1202 break; 1203 case 0x130: 1204 switch (dev->chipset) { 1205 case 0x130: 1206 case 0x13b: 1207 obj_class = GP100_3D_CLASS; 1208 break; 1209 default: 1210 obj_class = GP102_3D_CLASS; 1211 break; 1212 } 1213 break; 1214 case 0x120: 1215 obj_class = GM200_3D_CLASS; 1216 break; 1217 case 0x110: 1218 obj_class = GM107_3D_CLASS; 1219 break; 1220 case 0x100: 1221 case 0xf0: 1222 obj_class = NVF0_3D_CLASS; 1223 break; 1224 case 0xe0: 1225 switch (dev->chipset) { 1226 case 0xea: 1227 obj_class = NVEA_3D_CLASS; 1228 break; 1229 default: 1230 obj_class = NVE4_3D_CLASS; 1231 break; 1232 } 1233 break; 1234 case 0xd0: 1235 obj_class = NVC8_3D_CLASS; 1236 break; 1237 case 0xc0: 1238 default: 1239 switch (dev->chipset) { 1240 case 0xc8: 1241 obj_class = NVC8_3D_CLASS; 1242 break; 1243 case 0xc1: 1244 obj_class = NVC1_3D_CLASS; 1245 break; 1246 default: 1247 obj_class = NVC0_3D_CLASS; 1248 break; 1249 } 1250 break; 1251 } 1252 ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0, 1253 &screen->eng3d); 1254 if (ret) 1255 FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); 1256 screen->base.class_3d = obj_class; 1257 1258 BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); 1259 PUSH_DATA (push, screen->eng3d->oclass); 1260 1261 BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1); 1262 PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS); 1263 1264 if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) { 1265 /* kill shaders after about 1 second (at 100 MHz) */ 1266 BEGIN_NVC0(push, NVC0_3D(WATCHDOG_TIMER), 1); 1267 PUSH_DATA (push, 0x17); 1268 } 1269 1270 IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE), 1271 screen->base.drm->version >= 0x01000101); 1272 BEGIN_NVC0(push, NVC0_3D(RT_COMP_ENABLE(0)), 8); 1273 for (i = 0; i < 8; ++i) 1274 PUSH_DATA(push, screen->base.drm->version >= 0x01000101); 1275 1276 BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); 1277 PUSH_DATA (push, 1); 1278 1279 BEGIN_NVC0(push, NVC0_3D(CSAA_ENABLE), 1); 1280 PUSH_DATA (push, 0); 1281 BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_ENABLE), 1); 1282 PUSH_DATA (push, 0); 1283 BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 1); 1284 PUSH_DATA (push, NVC0_3D_MULTISAMPLE_MODE_MS1); 1285 BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_CTRL), 1); 1286 PUSH_DATA (push, 0); 1287 BEGIN_NVC0(push, NVC0_3D(LINE_WIDTH_SEPARATE), 1); 1288 PUSH_DATA (push, 1); 1289 BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1); 1290 PUSH_DATA (push, 1); 1291 BEGIN_NVC0(push, NVC0_3D(BLEND_SEPARATE_ALPHA), 1); 1292 PUSH_DATA (push, 1); 1293 BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1); 1294 PUSH_DATA (push, 0); 1295 BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1); 1296 PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH); 1297 if (screen->eng3d->oclass < NVE4_3D_CLASS) { 1298 IMMED_NVC0(push, NVC0_3D(TEX_MISC), 0); 1299 } else { 1300 BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1); 1301 PUSH_DATA (push, 15); 1302 } 1303 BEGIN_NVC0(push, NVC0_3D(CALL_LIMIT_LOG), 1); 1304 PUSH_DATA (push, 8); /* 128 */ 1305 BEGIN_NVC0(push, NVC0_3D(ZCULL_STATCTRS_ENABLE), 1); 1306 PUSH_DATA (push, 1); 1307 if (screen->eng3d->oclass >= NVC1_3D_CLASS) { 1308 BEGIN_NVC0(push, NVC0_3D(CACHE_SPLIT), 1); 1309 PUSH_DATA (push, NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1); 1310 } 1311 1312 nvc0_magic_3d_init(push, screen->eng3d->oclass); 1313 1314 ret = nvc0_screen_resize_text_area(screen, 1 << 19); 1315 if (ret) 1316 FAIL_SCREEN_INIT("Error allocating TEXT area: %d\n", ret); 1317 1318 /* 6 user uniform areas, 6 driver areas, and 1 for the runout */ 1319 ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 13 << 16, NULL, 1320 &screen->uniform_bo); 1321 if (ret) 1322 FAIL_SCREEN_INIT("Error allocating uniform BO: %d\n", ret); 1323 1324 PUSH_REFN (push, screen->uniform_bo, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_WR); 1325 1326 /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */ 1327 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); 1328 PUSH_DATA (push, 256); 1329 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO); 1330 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO); 1331 BEGIN_1IC0(push, NVC0_3D(CB_POS), 5); 1332 PUSH_DATA (push, 0); 1333 PUSH_DATAf(push, 0.0f); 1334 PUSH_DATAf(push, 0.0f); 1335 PUSH_DATAf(push, 0.0f); 1336 PUSH_DATAf(push, 0.0f); 1337 BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); 1338 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO); 1339 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO); 1340 1341 if (screen->base.drm->version >= 0x01000101) { 1342 ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); 1343 if (ret) 1344 FAIL_SCREEN_INIT("NOUVEAU_GETPARAM_GRAPH_UNITS failed: %d\n", ret); 1345 } else { 1346 if (dev->chipset >= 0xe0 && dev->chipset < 0xf0) 1347 value = (8 << 8) | 4; 1348 else 1349 value = (16 << 8) | 4; 1350 } 1351 screen->gpc_count = value & 0x000000ff; 1352 screen->mp_count = value >> 8; 1353 screen->mp_count_compute = screen->mp_count; 1354 1355 ret = nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200); 1356 if (ret) 1357 FAIL_SCREEN_INIT("Error allocating TLS area: %d\n", ret); 1358 1359 BEGIN_NVC0(push, NVC0_3D(TEMP_ADDRESS_HIGH), 4); 1360 PUSH_DATAh(push, screen->tls->offset); 1361 PUSH_DATA (push, screen->tls->offset); 1362 PUSH_DATA (push, screen->tls->size >> 32); 1363 PUSH_DATA (push, screen->tls->size); 1364 BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1); 1365 PUSH_DATA (push, 0); 1366 /* Reduce likelihood of collision with real buffers by placing the hole at 1367 * the top of the 4G area. This will have to be dealt with for real 1368 * eventually by blocking off that area from the VM. 1369 */ 1370 BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1); 1371 PUSH_DATA (push, 0xff << 24); 1372 1373 if (screen->eng3d->oclass < GM107_3D_CLASS) { 1374 ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL, 1375 &screen->poly_cache); 1376 if (ret) 1377 FAIL_SCREEN_INIT("Error allocating poly cache BO: %d\n", ret); 1378 1379 BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); 1380 PUSH_DATAh(push, screen->poly_cache->offset); 1381 PUSH_DATA (push, screen->poly_cache->offset); 1382 PUSH_DATA (push, 3); 1383 } 1384 1385 ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 17, NULL, 1386 &screen->txc); 1387 if (ret) 1388 FAIL_SCREEN_INIT("Error allocating txc BO: %d\n", ret); 1389 1390 BEGIN_NVC0(push, NVC0_3D(TIC_ADDRESS_HIGH), 3); 1391 PUSH_DATAh(push, screen->txc->offset); 1392 PUSH_DATA (push, screen->txc->offset); 1393 PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); 1394 if (screen->eng3d->oclass >= GM107_3D_CLASS) { 1395 screen->tic.maxwell = true; 1396 if (screen->eng3d->oclass == GM107_3D_CLASS) { 1397 screen->tic.maxwell = 1398 debug_get_bool_option("NOUVEAU_MAXWELL_TIC", true); 1399 IMMED_NVC0(push, SUBC_3D(0x0f10), screen->tic.maxwell); 1400 } 1401 } 1402 1403 BEGIN_NVC0(push, NVC0_3D(TSC_ADDRESS_HIGH), 3); 1404 PUSH_DATAh(push, screen->txc->offset + 65536); 1405 PUSH_DATA (push, screen->txc->offset + 65536); 1406 PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); 1407 1408 BEGIN_NVC0(push, NVC0_3D(SCREEN_Y_CONTROL), 1); 1409 PUSH_DATA (push, 0); 1410 BEGIN_NVC0(push, NVC0_3D(WINDOW_OFFSET_X), 2); 1411 PUSH_DATA (push, 0); 1412 PUSH_DATA (push, 0); 1413 BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1); /* deactivate ZCULL */ 1414 PUSH_DATA (push, 0x3f); 1415 1416 BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), 1); 1417 PUSH_DATA (push, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY); 1418 BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), 8 * 2); 1419 for (i = 0; i < 8 * 2; ++i) 1420 PUSH_DATA(push, 0); 1421 BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_EN), 1); 1422 PUSH_DATA (push, 0); 1423 BEGIN_NVC0(push, NVC0_3D(CLIPID_ENABLE), 1); 1424 PUSH_DATA (push, 0); 1425 1426 /* neither scissors, viewport nor stencil mask should affect clears */ 1427 BEGIN_NVC0(push, NVC0_3D(CLEAR_FLAGS), 1); 1428 PUSH_DATA (push, 0); 1429 1430 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1); 1431 PUSH_DATA (push, 1); 1432 for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) { 1433 BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2); 1434 PUSH_DATAf(push, 0.0f); 1435 PUSH_DATAf(push, 1.0f); 1436 } 1437 BEGIN_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 1); 1438 PUSH_DATA (push, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1); 1439 1440 /* We use scissors instead of exact view volume clipping, 1441 * so they're always enabled. 1442 */ 1443 for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) { 1444 BEGIN_NVC0(push, NVC0_3D(SCISSOR_ENABLE(i)), 3); 1445 PUSH_DATA (push, 1); 1446 PUSH_DATA (push, 16384 << 16); 1447 PUSH_DATA (push, 16384 << 16); 1448 } 1449 1450 if (screen->eng3d->oclass < TU102_3D_CLASS) { 1451#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); 1452 1453 i = 0; 1454 MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf); 1455 MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables); 1456 MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select); 1457 MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select); 1458 MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select); 1459 MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front); 1460 MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back); 1461 MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect); 1462 MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect); 1463 MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); 1464 MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); 1465 MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); 1466 MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state); 1467 MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter); 1468 MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query); 1469 MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); 1470 } else { 1471#undef MK_MACRO 1472#define MK_MACRO(m, n) i = tu102_graph_set_macro(screen, m, i, sizeof(n), n); 1473 1474 i = 0; 1475 MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mmec597_per_instance_bf); 1476 MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mmec597_blend_enables); 1477 MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mmec597_vertex_array_select); 1478 MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mmec597_tep_select); 1479 MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mmec597_gp_select); 1480 MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mmec597_poly_mode_front); 1481 MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mmec597_poly_mode_back); 1482 MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mmec597_draw_arrays_indirect); 1483 MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mmec597_draw_elts_indirect); 1484 MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mmec597_draw_arrays_indirect_count); 1485 MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mmec597_draw_elts_indirect_count); 1486 MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write); 1487 MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mmec597_conservative_raster_state); 1488 MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter); 1489 MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mmec597_compute_counter_to_query); 1490 } 1491 1492 BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); 1493 PUSH_DATA (push, 1); 1494 BEGIN_NVC0(push, NVC0_3D(RT_SEPARATE_FRAG_DATA), 1); 1495 PUSH_DATA (push, 1); 1496 BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); 1497 PUSH_DATA (push, 0x40); 1498 BEGIN_NVC0(push, NVC0_3D(LAYER), 1); 1499 PUSH_DATA (push, 0); 1500 BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1); 1501 PUSH_DATA (push, 0x30); 1502 BEGIN_NVC0(push, NVC0_3D(PATCH_VERTICES), 1); 1503 PUSH_DATA (push, 3); 1504 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); 1505 PUSH_DATA (push, 0x20); 1506 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(0)), 1); 1507 PUSH_DATA (push, 0x00); 1508 screen->save_state.patch_vertices = 3; 1509 1510 BEGIN_NVC0(push, NVC0_3D(POINT_COORD_REPLACE), 1); 1511 PUSH_DATA (push, 0); 1512 BEGIN_NVC0(push, NVC0_3D(POINT_RASTER_RULES), 1); 1513 PUSH_DATA (push, NVC0_3D_POINT_RASTER_RULES_OGL); 1514 1515 IMMED_NVC0(push, NVC0_3D(EDGEFLAG), 1); 1516 1517 if (nvc0_screen_init_compute(screen)) 1518 goto fail; 1519 1520 /* XXX: Compute and 3D are somehow aliased on Fermi. */ 1521 for (i = 0; i < 5; ++i) { 1522 unsigned j = 0; 1523 for (j = 0; j < 16; j++) 1524 screen->cb_bindings[i][j].size = -1; 1525 1526 /* TIC and TSC entries for each unit (nve4+ only) */ 1527 /* auxiliary constants (6 user clip planes, base instance id) */ 1528 nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE, 1529 screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); 1530 if (screen->eng3d->oclass >= NVE4_3D_CLASS) { 1531 unsigned j; 1532 BEGIN_1IC0(push, NVC0_3D(CB_POS), 9); 1533 PUSH_DATA (push, NVC0_CB_AUX_UNK_INFO); 1534 for (j = 0; j < 8; ++j) 1535 PUSH_DATA(push, j); 1536 } else { 1537 BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1); 1538 PUSH_DATA (push, 0x54); 1539 } 1540 1541 /* MS sample coordinate offsets: these do not work with _ALT modes ! */ 1542 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * 8); 1543 PUSH_DATA (push, NVC0_CB_AUX_MS_INFO); 1544 PUSH_DATA (push, 0); /* 0 */ 1545 PUSH_DATA (push, 0); 1546 PUSH_DATA (push, 1); /* 1 */ 1547 PUSH_DATA (push, 0); 1548 PUSH_DATA (push, 0); /* 2 */ 1549 PUSH_DATA (push, 1); 1550 PUSH_DATA (push, 1); /* 3 */ 1551 PUSH_DATA (push, 1); 1552 PUSH_DATA (push, 2); /* 4 */ 1553 PUSH_DATA (push, 0); 1554 PUSH_DATA (push, 3); /* 5 */ 1555 PUSH_DATA (push, 0); 1556 PUSH_DATA (push, 2); /* 6 */ 1557 PUSH_DATA (push, 1); 1558 PUSH_DATA (push, 3); /* 7 */ 1559 PUSH_DATA (push, 1); 1560 } 1561 BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1); 1562 PUSH_DATA (push, 0); 1563 1564 PUSH_KICK (push); 1565 1566 screen->tic.entries = CALLOC( 1567 NVC0_TIC_MAX_ENTRIES + NVC0_TSC_MAX_ENTRIES + NVE4_IMG_MAX_HANDLES, 1568 sizeof(void *)); 1569 screen->tsc.entries = screen->tic.entries + NVC0_TIC_MAX_ENTRIES; 1570 screen->img.entries = (void *)(screen->tsc.entries + NVC0_TSC_MAX_ENTRIES); 1571 1572 if (!nvc0_blitter_create(screen)) 1573 goto fail; 1574 1575 nouveau_fence_new(&screen->base, &screen->base.fence.current); 1576 1577 return &screen->base; 1578 1579fail: 1580 screen->base.base.context_create = NULL; 1581 return &screen->base; 1582} 1583 1584int 1585nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry) 1586{ 1587 int i = screen->tic.next; 1588 1589 while (screen->tic.lock[i / 32] & (1 << (i % 32))) 1590 i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); 1591 1592 screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); 1593 1594 if (screen->tic.entries[i]) 1595 nv50_tic_entry(screen->tic.entries[i])->id = -1; 1596 1597 screen->tic.entries[i] = entry; 1598 return i; 1599} 1600 1601int 1602nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry) 1603{ 1604 int i = screen->tsc.next; 1605 1606 while (screen->tsc.lock[i / 32] & (1 << (i % 32))) 1607 i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); 1608 1609 screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); 1610 1611 if (screen->tsc.entries[i]) 1612 nv50_tsc_entry(screen->tsc.entries[i])->id = -1; 1613 1614 screen->tsc.entries[i] = entry; 1615 return i; 1616} 1617